diff options
847 files changed, 27694 insertions, 16374 deletions
diff --git a/Documentation/arm/sunxi/README b/Documentation/arm/sunxi/README index 1fe2d7fd4108..5e38e1582f95 100644 --- a/Documentation/arm/sunxi/README +++ b/Documentation/arm/sunxi/README @@ -36,7 +36,7 @@ SunXi family + User Manual http://dl.linux-sunxi.org/A20/A20%20User%20Manual%202013-03-22.pdf - - Allwinner A23 + - Allwinner A23 (sun8i) + Datasheet http://dl.linux-sunxi.org/A23/A23%20Datasheet%20V1.0%2020130830.pdf + User Manual @@ -55,7 +55,23 @@ SunXi family + User Manual http://dl.linux-sunxi.org/A31/A3x_release_document/A31s/IC/A31s%20User%20Manual%20%20V1.0%2020130322.pdf + - Allwinner A33 (sun8i) + + Datasheet + http://dl.linux-sunxi.org/A33/A33%20Datasheet%20release%201.1.pdf + + User Manual + http://dl.linux-sunxi.org/A33/A33%20user%20manual%20release%201.1.pdf + + - Allwinner H3 (sun8i) + + Datasheet + http://dl.linux-sunxi.org/H3/Allwinner_H3_Datasheet_V1.0.pdf + * Quad ARM Cortex-A15, Quad ARM Cortex-A7 based SoCs - Allwinner A80 + Datasheet http://dl.linux-sunxi.org/A80/A80_Datasheet_Revision_1.0_0404.pdf + + * Octa ARM Cortex-A7 based SoCs + - Allwinner A83T + + Not Supported + + Datasheet + http://dl.linux-sunxi.org/A83T/A83T_datasheet_Revision_1.1.pdf diff --git a/Documentation/devicetree/bindings/arm/sunxi.txt b/Documentation/devicetree/bindings/arm/sunxi.txt index 42941fdefb11..67da20539540 100644 --- a/Documentation/devicetree/bindings/arm/sunxi.txt +++ b/Documentation/devicetree/bindings/arm/sunxi.txt @@ -9,4 +9,6 @@ using one of the following compatible strings: allwinner,sun6i-a31 allwinner,sun7i-a20 allwinner,sun8i-a23 + allwinner,sun8i-a33 + allwinner,sun8i-h3 allwinner,sun9i-a80 diff --git a/Documentation/devicetree/bindings/hwlock/hwlock.txt b/Documentation/devicetree/bindings/hwlock/hwlock.txt new file mode 100644 index 000000000000..085d1f5c916a --- /dev/null +++ b/Documentation/devicetree/bindings/hwlock/hwlock.txt @@ -0,0 +1,59 @@ +Generic hwlock bindings +======================= + +Generic bindings that are common to all the hwlock platform specific driver +implementations. + +Please also look through the individual platform specific hwlock binding +documentations for identifying any additional properties specific to that +platform. + +hwlock providers: +================= + +Required properties: +- #hwlock-cells: Specifies the number of cells needed to represent a + specific lock. + +hwlock users: +============= + +Consumers that require specific hwlock(s) should specify them using the +property "hwlocks", and an optional "hwlock-names" property. + +Required properties: +- hwlocks: List of phandle to a hwlock provider node and an + associated hwlock args specifier as indicated by + #hwlock-cells. The list can have just a single hwlock + or multiple hwlocks, with each hwlock represented by + a phandle and a corresponding args specifier. + +Optional properties: +- hwlock-names: List of hwlock name strings defined in the same order + as the hwlocks, with one name per hwlock. Consumers can + use the hwlock-names to match and get a specific hwlock. + + +1. Example of a node using a single specific hwlock: + +The following example has a node requesting a hwlock in the bank defined by +the node hwlock1. hwlock1 is a hwlock provider with an argument specifier +of length 1. + + node { + ... + hwlocks = <&hwlock1 2>; + ... + }; + +2. Example of a node using multiple specific hwlocks: + +The following example has a node requesting two hwlocks, a hwlock within +the hwlock device node 'hwlock1' with #hwlock-cells value of 1, and another +hwlock within the hwlock device node 'hwlock2' with #hwlock-cells value of 2. + + node { + ... + hwlocks = <&hwlock1 2>, <&hwlock2 0 3>; + ... + }; diff --git a/Documentation/devicetree/bindings/hwlock/omap-hwspinlock.txt b/Documentation/devicetree/bindings/hwlock/omap-hwspinlock.txt new file mode 100644 index 000000000000..2c9804f4f4ac --- /dev/null +++ b/Documentation/devicetree/bindings/hwlock/omap-hwspinlock.txt @@ -0,0 +1,26 @@ +OMAP4+ HwSpinlock Driver +======================== + +Required properties: +- compatible: Should be "ti,omap4-hwspinlock" for + OMAP44xx, OMAP54xx, AM33xx, AM43xx, DRA7xx SoCs +- reg: Contains the hwspinlock module register address space + (base address and length) +- ti,hwmods: Name of the hwmod associated with the hwspinlock device +- #hwlock-cells: Should be 1. The OMAP hwspinlock users will use a + 0-indexed relative hwlock number as the argument + specifier value for requesting a specific hwspinlock + within a hwspinlock bank. + +Please look at the generic hwlock binding for usage information for consumers, +"Documentation/devicetree/bindings/hwlock/hwlock.txt" + +Example: + +/* OMAP4 */ +hwspinlock: spinlock@4a0f6000 { + compatible = "ti,omap4-hwspinlock"; + reg = <0x4a0f6000 0x1000>; + ti,hwmods = "spinlock"; + #hwlock-cells = <1>; +}; diff --git a/Documentation/devicetree/bindings/hwlock/qcom-hwspinlock.txt b/Documentation/devicetree/bindings/hwlock/qcom-hwspinlock.txt new file mode 100644 index 000000000000..4563f524556b --- /dev/null +++ b/Documentation/devicetree/bindings/hwlock/qcom-hwspinlock.txt @@ -0,0 +1,39 @@ +Qualcomm Hardware Mutex Block: + +The hardware block provides mutexes utilized between different processors on +the SoC as part of the communication protocol used by these processors. + +- compatible: + Usage: required + Value type: <string> + Definition: must be one of: + "qcom,sfpb-mutex", + "qcom,tcsr-mutex" + +- syscon: + Usage: required + Value type: <prop-encoded-array> + Definition: one cell containing: + syscon phandle + offset of the hwmutex block within the syscon + stride of the hwmutex registers + +- #hwlock-cells: + Usage: required + Value type: <u32> + Definition: must be 1, the specified cell represent the lock id + (hwlock standard property, see hwlock.txt) + +Example: + + tcsr_mutex_block: syscon@fd484000 { + compatible = "syscon"; + reg = <0xfd484000 0x2000>; + }; + + hwlock@fd484000 { + compatible = "qcom,tcsr-mutex"; + syscon = <&tcsr_mutex_block 0 0x80>; + + #hwlock-cells = <1>; + }; diff --git a/Documentation/devicetree/bindings/hwlock/sirf,hwspinlock.txt b/Documentation/devicetree/bindings/hwlock/sirf,hwspinlock.txt new file mode 100644 index 000000000000..9bb1240a68e0 --- /dev/null +++ b/Documentation/devicetree/bindings/hwlock/sirf,hwspinlock.txt @@ -0,0 +1,28 @@ +SIRF Hardware spinlock device Binding +----------------------------------------------- + +Required properties : +- compatible : shall contain only one of the following: + "sirf,hwspinlock" + +- reg : the register address of hwspinlock + +- #hwlock-cells : hwlock users only use the hwlock id to represent a specific + hwlock, so the number of cells should be <1> here. + +Please look at the generic hwlock binding for usage information for consumers, +"Documentation/devicetree/bindings/hwlock/hwlock.txt" + +Example of hwlock provider: + hwlock { + compatible = "sirf,hwspinlock"; + reg = <0x13240000 0x00010000>; + #hwlock-cells = <1>; + }; + +Example of hwlock users: + node { + ... + hwlocks = <&hwlock 2>; + ... + }; diff --git a/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt b/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt index 938f8e1ba205..0db60470ebb6 100644 --- a/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt +++ b/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt @@ -8,6 +8,7 @@ of the EMIF IP and memory parts attached to it. Required properties: - compatible : Should be of the form "ti,emif-<ip-rev>" where <ip-rev> is the IP revision of the specific EMIF instance. + For am437x should be ti,emif-am4372. - phy-type : <u32> indicating the DDR phy type. Following are the allowed values diff --git a/Documentation/devicetree/bindings/remoteproc/wkup_m3_rproc.txt b/Documentation/devicetree/bindings/remoteproc/wkup_m3_rproc.txt new file mode 100644 index 000000000000..3a70073797eb --- /dev/null +++ b/Documentation/devicetree/bindings/remoteproc/wkup_m3_rproc.txt @@ -0,0 +1,52 @@ +TI Wakeup M3 Remoteproc Driver +============================== + +The TI AM33xx and AM43xx family of devices use a small Cortex M3 co-processor +(commonly referred to as Wakeup M3 or CM3) to help with various low power tasks +that cannot be controlled from the MPU. This CM3 processor requires a firmware +binary to accomplish this. The wkup_m3 remoteproc driver handles the loading of +the firmware and booting of the CM3. + +Wkup M3 Device Node: +==================== +A wkup_m3 device node is used to represent the Wakeup M3 processor instance +within the SoC. It is added as a child node of the parent interconnect bus +(l4_wkup) through which it is accessible to the MPU. + +Required properties: +-------------------- +- compatible: Should be one of, + "ti,am3352-wkup-m3" for AM33xx SoCs + "ti,am4372-wkup-m3" for AM43xx SoCs +- reg: Should contain the address ranges for the two internal + memory regions, UMEM and DMEM. The parent node should + provide an appropriate ranges property for properly + translating these into bus addresses. +- reg-names: Contains the corresponding names for the two memory + regions. These should be named "umem" & "dmem". +- ti,hwmods: Name of the hwmod associated with the wkupm3 device. +- ti,pm-firmware: Name of firmware file to be used for loading and + booting the wkup_m3 remote processor. + +Example: +-------- +/* AM33xx */ +ocp { + l4_wkup: l4_wkup@44c00000 { + compatible = "am335-l4-wkup", "simple-bus"; + ranges = <0 0x44c00000 0x400000>; + #address-cells = <1>; + #size-cells = <1>; + + wkup_m3: wkup_m3@100000 { + compatible = "ti,am3352-wkup-m3"; + reg = <0x100000 0x4000>, + <0x180000 0x2000>; + reg-names = "umem", "dmem"; + ti,hwmods = "wkup_m3"; + ti,pm-firmware = "am335x-pm-firmware.elf"; + }; + }; + + ... +}; diff --git a/Documentation/devicetree/bindings/usb/atmel-usb.txt b/Documentation/devicetree/bindings/usb/atmel-usb.txt index 1be8d7a26c15..5883b73ea1b5 100644 --- a/Documentation/devicetree/bindings/usb/atmel-usb.txt +++ b/Documentation/devicetree/bindings/usb/atmel-usb.txt @@ -79,9 +79,9 @@ Atmel High-Speed USB device controller Required properties: - compatible: Should be one of the following - "at91sam9rl-udc" - "at91sam9g45-udc" - "sama5d3-udc" + "atmel,at91sam9rl-udc" + "atmel,at91sam9g45-udc" + "atmel,sama5d3-udc" - reg: Address and length of the register set for the device - interrupts: Should contain usba interrupt - clocks: Should reference the peripheral and host clocks diff --git a/Documentation/filesystems/caching/backend-api.txt b/Documentation/filesystems/caching/backend-api.txt index 277d1e810670..c0bd5677271b 100644 --- a/Documentation/filesystems/caching/backend-api.txt +++ b/Documentation/filesystems/caching/backend-api.txt @@ -676,6 +676,29 @@ FS-Cache provides some utilities that a cache backend may make use of: as possible. + (*) Indicate that a stale object was found and discarded: + + void fscache_object_retrying_stale(struct fscache_object *object); + + This is called to indicate that the lookup procedure found an object in + the cache that the netfs decided was stale. The object has been + discarded from the cache and the lookup will be performed again. + + + (*) Indicate that the caching backend killed an object: + + void fscache_object_mark_killed(struct fscache_object *object, + enum fscache_why_object_killed why); + + This is called to indicate that the cache backend preemptively killed an + object. The why parameter should be set to indicate the reason: + + FSCACHE_OBJECT_IS_STALE - the object was stale and needs discarding. + FSCACHE_OBJECT_NO_SPACE - there was insufficient cache space + FSCACHE_OBJECT_WAS_RETIRED - the object was retired when relinquished. + FSCACHE_OBJECT_WAS_CULLED - the object was culled to make space. + + (*) Get and release references on a retrieval record: void fscache_get_retrieval(struct fscache_retrieval *op); diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt index 770267af5b3e..50f0a5757f48 100644 --- a/Documentation/filesystems/caching/fscache.txt +++ b/Documentation/filesystems/caching/fscache.txt @@ -284,8 +284,9 @@ proc files. enq=N Number of times async ops queued for processing can=N Number of async ops cancelled rej=N Number of async ops rejected due to object lookup/create failure + ini=N Number of async ops initialised dfr=N Number of async ops queued for deferred release - rel=N Number of async ops released + rel=N Number of async ops released (should equal ini=N when idle) gc=N Number of deferred-release async ops garbage collected CacheOp alo=N Number of in-progress alloc_object() cache ops luo=N Number of in-progress lookup_object() cache ops @@ -303,6 +304,10 @@ proc files. wrp=N Number of in-progress write_page() cache ops ucp=N Number of in-progress uncache_page() cache ops dsp=N Number of in-progress dissociate_pages() cache ops + CacheEv nsp=N Number of object lookups/creations rejected due to lack of space + stl=N Number of stale objects deleted + rtr=N Number of objects retired when relinquished + cul=N Number of objects culled (*) /proc/fs/fscache/histogram diff --git a/Documentation/filesystems/dax.txt b/Documentation/filesystems/dax.txt index baf41118660d..7af2851d667c 100644 --- a/Documentation/filesystems/dax.txt +++ b/Documentation/filesystems/dax.txt @@ -18,8 +18,10 @@ Usage ----- If you have a block device which supports DAX, you can make a filesystem -on it as usual. When mounting it, use the -o dax option manually -or add 'dax' to the options in /etc/fstab. +on it as usual. The DAX code currently only supports files with a block +size equal to your kernel's PAGE_SIZE, so you may need to specify a block +size when creating the filesystem. When mounting it, use the "-o dax" +option on the command line or add 'dax' to the options in /etc/fstab. Implementation Tips for Block Driver Writers diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 68f1c9106573..f24d1b833957 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -500,3 +500,7 @@ in your dentry operations instead. dentry, it does not get nameidata at all and it gets called only when cookie is non-NULL. Note that link body isn't available anymore, so if you need it, store it as cookie. +-- +[mandatory] + __fd_install() & fd_install() can now sleep. Callers should not + hold a spinlock or other resources that do not allow a schedule. diff --git a/Documentation/hwmon/submitting-patches b/Documentation/hwmon/submitting-patches index 3d1bac399a22..d201828d202f 100644 --- a/Documentation/hwmon/submitting-patches +++ b/Documentation/hwmon/submitting-patches @@ -81,6 +81,13 @@ increase the chances of your change being accepted. * Provide a detect function if and only if a chip can be detected reliably. +* Only the following I2C addresses shall be probed: 0x18-0x1f, 0x28-0x2f, + 0x48-0x4f, 0x58, 0x5c, 0x73 and 0x77. Probing other addresses is strongly + discouraged as it is known to cause trouble with other (non-hwmon) I2C + chips. If your chip lives at an address which can't be probed then the + device will have to be instantiated explicitly (which is always better + anyway.) + * Avoid writing to chip registers in the detect function. If you have to write, only do it after you have already gathered enough data to be certain that the detection is going to be successful. diff --git a/Documentation/hwmon/w83792d b/Documentation/hwmon/w83792d index 53f7b6866fec..f2ffc402ea45 100644 --- a/Documentation/hwmon/w83792d +++ b/Documentation/hwmon/w83792d @@ -8,6 +8,7 @@ Supported chips: Datasheet: http://www.winbond.com.tw Author: Shane Huang (Winbond) +Updated: Roger Lucas Module Parameters @@ -38,9 +39,16 @@ parameter; this will put it into a more well-behaved state first. The driver implements three temperature sensors, seven fan rotation speed sensors, nine voltage sensors, and two automatic fan regulation strategies called: Smart Fan I (Thermal Cruise mode) and Smart Fan II. -Automatic fan control mode is possible only for fan1-fan3. Fan4-fan7 can run -synchronized with selected fan (fan1-fan3). This functionality and manual PWM -control for fan4-fan7 is not yet implemented. + +The driver also implements up to seven fan control outputs: pwm1-7. Pwm1-7 +can be configured to PWM output or Analogue DC output via their associated +pwmX_mode. Outputs pwm4 through pwm7 may or may not be present depending on +how the W83792AD/D was configured by the BIOS. + +Automatic fan control mode is possible only for fan1-fan3. + +For all pwmX outputs, a value of 0 means minimum fan speed and a value of +255 means maximum fan speed. Temperatures are measured in degrees Celsius and measurement resolution is 1 degC for temp1 and 0.5 degC for temp2 and temp3. An alarm is triggered when @@ -157,14 +165,14 @@ for each fan. /sys files ---------- -pwm[1-3] - this file stores PWM duty cycle or DC value (fan speed) in range: +pwm[1-7] - this file stores PWM duty cycle or DC value (fan speed) in range: 0 (stop) to 255 (full) pwm[1-3]_enable - this file controls mode of fan/temperature control: * 0 Disabled * 1 Manual mode * 2 Smart Fan II * 3 Thermal Cruise -pwm[1-3]_mode - Select PWM of DC mode +pwm[1-7]_mode - Select PWM or DC mode * 0 DC * 1 PWM thermal_cruise[1-3] - Selects the desired temperature for cruise (degC) diff --git a/Documentation/hwspinlock.txt b/Documentation/hwspinlock.txt index 62f7d4ea6e26..61c1ee98e59f 100644 --- a/Documentation/hwspinlock.txt +++ b/Documentation/hwspinlock.txt @@ -48,6 +48,16 @@ independent, drivers. ids for predefined purposes. Should be called from a process context (might sleep). + int of_hwspin_lock_get_id(struct device_node *np, int index); + - retrieve the global lock id for an OF phandle-based specific lock. + This function provides a means for DT users of a hwspinlock module + to get the global lock id of a specific hwspinlock, so that it can + be requested using the normal hwspin_lock_request_specific() API. + The function returns a lock id number on success, -EPROBE_DEFER if + the hwspinlock device is not yet registered with the core, or other + error values. + Should be called from a process context (might sleep). + int hwspin_lock_free(struct hwspinlock *hwlock); - free a previously-assigned hwspinlock; returns 0 on success, or an appropriate error code on failure (e.g. -EINVAL if the hwspinlock diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index 51f4221657bf..611c52267d24 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -321,6 +321,7 @@ Code Seq#(hex) Include File Comments 0xDB 00-0F drivers/char/mwave/mwavepub.h 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ <mailto:aherrman@de.ibm.com> +0xE5 00-3F linux/fuse.h 0xEC 00-01 drivers/platform/chrome/cros_ec_dev.h ChromeOS EC driver 0xF3 00-3F drivers/usb/misc/sisusbvga/sisusb.h sisfb (in development) <mailto:thomas@winischhofer.net> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index afe7e2bbbc23..1d6f0459cd7b 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -293,6 +293,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. acpi_os_name= [HW,ACPI] Tell ACPI BIOS the name of the OS Format: To spoof as Windows 98: ="Microsoft Windows" + acpi_rev_override [ACPI] Override the _REV object to return 5 (instead + of 2 which is mandated by ACPI 6) as the supported ACPI + specification revision (when using this switch, it may + be necessary to carry out a cold reboot _twice_ in a + row to make it take effect on the platform firmware). + acpi_osi= [HW,ACPI] Modify list of supported OS interface strings acpi_osi="string1" # add string1 acpi_osi="!string2" # remove string2 diff --git a/Documentation/ntb.txt b/Documentation/ntb.txt new file mode 100644 index 000000000000..1d9bbabb6c79 --- /dev/null +++ b/Documentation/ntb.txt @@ -0,0 +1,127 @@ +# NTB Drivers + +NTB (Non-Transparent Bridge) is a type of PCI-Express bridge chip that connects +the separate memory systems of two computers to the same PCI-Express fabric. +Existing NTB hardware supports a common feature set, including scratchpad +registers, doorbell registers, and memory translation windows. Scratchpad +registers are read-and-writable registers that are accessible from either side +of the device, so that peers can exchange a small amount of information at a +fixed address. Doorbell registers provide a way for peers to send interrupt +events. Memory windows allow translated read and write access to the peer +memory. + +## NTB Core Driver (ntb) + +The NTB core driver defines an api wrapping the common feature set, and allows +clients interested in NTB features to discover NTB the devices supported by +hardware drivers. The term "client" is used here to mean an upper layer +component making use of the NTB api. The term "driver," or "hardware driver," +is used here to mean a driver for a specific vendor and model of NTB hardware. + +## NTB Client Drivers + +NTB client drivers should register with the NTB core driver. After +registering, the client probe and remove functions will be called appropriately +as ntb hardware, or hardware drivers, are inserted and removed. The +registration uses the Linux Device framework, so it should feel familiar to +anyone who has written a pci driver. + +### NTB Transport Client (ntb\_transport) and NTB Netdev (ntb\_netdev) + +The primary client for NTB is the Transport client, used in tandem with NTB +Netdev. These drivers function together to create a logical link to the peer, +across the ntb, to exchange packets of network data. The Transport client +establishes a logical link to the peer, and creates queue pairs to exchange +messages and data. The NTB Netdev then creates an ethernet device using a +Transport queue pair. Network data is copied between socket buffers and the +Transport queue pair buffer. The Transport client may be used for other things +besides Netdev, however no other applications have yet been written. + +### NTB Ping Pong Test Client (ntb\_pingpong) + +The Ping Pong test client serves as a demonstration to exercise the doorbell +and scratchpad registers of NTB hardware, and as an example simple NTB client. +Ping Pong enables the link when started, waits for the NTB link to come up, and +then proceeds to read and write the doorbell scratchpad registers of the NTB. +The peers interrupt each other using a bit mask of doorbell bits, which is +shifted by one in each round, to test the behavior of multiple doorbell bits +and interrupt vectors. The Ping Pong driver also reads the first local +scratchpad, and writes the value plus one to the first peer scratchpad, each +round before writing the peer doorbell register. + +Module Parameters: + +* unsafe - Some hardware has known issues with scratchpad and doorbell + registers. By default, Ping Pong will not attempt to exercise such + hardware. You may override this behavior at your own risk by setting + unsafe=1. +* delay\_ms - Specify the delay between receiving a doorbell + interrupt event and setting the peer doorbell register for the next + round. +* init\_db - Specify the doorbell bits to start new series of rounds. A new + series begins once all the doorbell bits have been shifted out of + range. +* dyndbg - It is suggested to specify dyndbg=+p when loading this module, and + then to observe debugging output on the console. + +### NTB Tool Test Client (ntb\_tool) + +The Tool test client serves for debugging, primarily, ntb hardware and drivers. +The Tool provides access through debugfs for reading, setting, and clearing the +NTB doorbell, and reading and writing scratchpads. + +The Tool does not currently have any module parameters. + +Debugfs Files: + +* *debugfs*/ntb\_tool/*hw*/ - A directory in debugfs will be created for each + NTB device probed by the tool. This directory is shortened to *hw* + below. +* *hw*/db - This file is used to read, set, and clear the local doorbell. Not + all operations may be supported by all hardware. To read the doorbell, + read the file. To set the doorbell, write `s` followed by the bits to + set (eg: `echo 's 0x0101' > db`). To clear the doorbell, write `c` + followed by the bits to clear. +* *hw*/mask - This file is used to read, set, and clear the local doorbell mask. + See *db* for details. +* *hw*/peer\_db - This file is used to read, set, and clear the peer doorbell. + See *db* for details. +* *hw*/peer\_mask - This file is used to read, set, and clear the peer doorbell + mask. See *db* for details. +* *hw*/spad - This file is used to read and write local scratchpads. To read + the values of all scratchpads, read the file. To write values, write a + series of pairs of scratchpad number and value + (eg: `echo '4 0x123 7 0xabc' > spad` + # to set scratchpads `4` and `7` to `0x123` and `0xabc`, respectively). +* *hw*/peer\_spad - This file is used to read and write peer scratchpads. See + *spad* for details. + +## NTB Hardware Drivers + +NTB hardware drivers should register devices with the NTB core driver. After +registering, clients probe and remove functions will be called. + +### NTB Intel Hardware Driver (ntb\_hw\_intel) + +The Intel hardware driver supports NTB on Xeon and Atom CPUs. + +Module Parameters: + +* b2b\_mw\_idx - If the peer ntb is to be accessed via a memory window, then use + this memory window to access the peer ntb. A value of zero or positive + starts from the first mw idx, and a negative value starts from the last + mw idx. Both sides MUST set the same value here! The default value is + `-1`. +* b2b\_mw\_share - If the peer ntb is to be accessed via a memory window, and if + the memory window is large enough, still allow the client to use the + second half of the memory window for address translation to the peer. +* xeon\_b2b\_usd\_bar2\_addr64 - If using B2B topology on Xeon hardware, use + this 64 bit address on the bus between the NTB devices for the window + at BAR2, on the upstream side of the link. +* xeon\_b2b\_usd\_bar4\_addr64 - See *xeon\_b2b\_bar2\_addr64*. +* xeon\_b2b\_usd\_bar4\_addr32 - See *xeon\_b2b\_bar2\_addr64*. +* xeon\_b2b\_usd\_bar5\_addr32 - See *xeon\_b2b\_bar2\_addr64*. +* xeon\_b2b\_dsd\_bar2\_addr64 - See *xeon\_b2b\_bar2\_addr64*. +* xeon\_b2b\_dsd\_bar4\_addr64 - See *xeon\_b2b\_bar2\_addr64*. +* xeon\_b2b\_dsd\_bar4\_addr32 - See *xeon\_b2b\_bar2\_addr64*. +* xeon\_b2b\_dsd\_bar5\_addr32 - See *xeon\_b2b\_bar2\_addr64*. diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt index f732a8321e8a..8cc17ca71813 100644 --- a/Documentation/power/swsusp.txt +++ b/Documentation/power/swsusp.txt @@ -410,8 +410,17 @@ Documentation/usb/persist.txt. Q: Can I suspend-to-disk using a swap partition under LVM? -A: No. You can suspend successfully, but you'll not be able to -resume. uswsusp should be able to work with LVM. See suspend.sf.net. +A: Yes and No. You can suspend successfully, but the kernel will not be able +to resume on its own. You need an initramfs that can recognize the resume +situation, activate the logical volume containing the swap volume (but not +touch any filesystems!), and eventually call + +echo -n "$major:$minor" > /sys/power/resume + +where $major and $minor are the respective major and minor device numbers of +the swap volume. + +uswsusp works with LVM, too. See http://suspend.sourceforge.net/ Q: I upgraded the kernel from 2.6.15 to 2.6.16. Both kernels were compiled with the similar configuration files. Anyway I found that diff --git a/Documentation/remoteproc.txt b/Documentation/remoteproc.txt index e6469fdcf89a..ef0219fa4bb4 100644 --- a/Documentation/remoteproc.txt +++ b/Documentation/remoteproc.txt @@ -51,6 +51,12 @@ cost. rproc_shutdown() returns, and users can still use it with a subsequent rproc_boot(), if needed. + struct rproc *rproc_get_by_phandle(phandle phandle) + - Find an rproc handle using a device tree phandle. Returns the rproc + handle on success, and NULL on failure. This function increments + the remote processor's refcount, so always use rproc_put() to + decrement it back once rproc isn't needed anymore. + 3. Typical usage #include <linux/remoteproc.h> diff --git a/Documentation/target/tcm_mod_builder.py b/Documentation/target/tcm_mod_builder.py index 6085e1f19c9d..949de191fcdc 100755 --- a/Documentation/target/tcm_mod_builder.py +++ b/Documentation/target/tcm_mod_builder.py @@ -50,15 +50,6 @@ def tcm_mod_build_FC_include(fabric_mod_dir_var, fabric_mod_name): buf = "#define " + fabric_mod_name.upper() + "_VERSION \"v0.1\"\n" buf += "#define " + fabric_mod_name.upper() + "_NAMELEN 32\n" buf += "\n" - buf += "struct " + fabric_mod_name + "_nacl {\n" - buf += " /* Binary World Wide unique Port Name for FC Initiator Nport */\n" - buf += " u64 nport_wwpn;\n" - buf += " /* ASCII formatted WWPN for FC Initiator Nport */\n" - buf += " char nport_name[" + fabric_mod_name.upper() + "_NAMELEN];\n" - buf += " /* Returned by " + fabric_mod_name + "_make_nodeacl() */\n" - buf += " struct se_node_acl se_node_acl;\n" - buf += "};\n" - buf += "\n" buf += "struct " + fabric_mod_name + "_tpg {\n" buf += " /* FC lport target portal group tag for TCM */\n" buf += " u16 lport_tpgt;\n" @@ -69,8 +60,6 @@ def tcm_mod_build_FC_include(fabric_mod_dir_var, fabric_mod_name): buf += "};\n" buf += "\n" buf += "struct " + fabric_mod_name + "_lport {\n" - buf += " /* SCSI protocol the lport is providing */\n" - buf += " u8 lport_proto_id;\n" buf += " /* Binary World Wide unique Port Name for FC Target Lport */\n" buf += " u64 lport_wwpn;\n" buf += " /* ASCII formatted WWPN for FC Target Lport */\n" @@ -105,14 +94,6 @@ def tcm_mod_build_SAS_include(fabric_mod_dir_var, fabric_mod_name): buf = "#define " + fabric_mod_name.upper() + "_VERSION \"v0.1\"\n" buf += "#define " + fabric_mod_name.upper() + "_NAMELEN 32\n" buf += "\n" - buf += "struct " + fabric_mod_name + "_nacl {\n" - buf += " /* Binary World Wide unique Port Name for SAS Initiator port */\n" - buf += " u64 iport_wwpn;\n" - buf += " /* ASCII formatted WWPN for Sas Initiator port */\n" - buf += " char iport_name[" + fabric_mod_name.upper() + "_NAMELEN];\n" - buf += " /* Returned by " + fabric_mod_name + "_make_nodeacl() */\n" - buf += " struct se_node_acl se_node_acl;\n" - buf += "};\n\n" buf += "struct " + fabric_mod_name + "_tpg {\n" buf += " /* SAS port target portal group tag for TCM */\n" buf += " u16 tport_tpgt;\n" @@ -122,8 +103,6 @@ def tcm_mod_build_SAS_include(fabric_mod_dir_var, fabric_mod_name): buf += " struct se_portal_group se_tpg;\n" buf += "};\n\n" buf += "struct " + fabric_mod_name + "_tport {\n" - buf += " /* SCSI protocol the tport is providing */\n" - buf += " u8 tport_proto_id;\n" buf += " /* Binary World Wide unique Port Name for SAS Target port */\n" buf += " u64 tport_wwpn;\n" buf += " /* ASCII formatted WWPN for SAS Target port */\n" @@ -158,12 +137,6 @@ def tcm_mod_build_iSCSI_include(fabric_mod_dir_var, fabric_mod_name): buf = "#define " + fabric_mod_name.upper() + "_VERSION \"v0.1\"\n" buf += "#define " + fabric_mod_name.upper() + "_NAMELEN 32\n" buf += "\n" - buf += "struct " + fabric_mod_name + "_nacl {\n" - buf += " /* ASCII formatted InitiatorName */\n" - buf += " char iport_name[" + fabric_mod_name.upper() + "_NAMELEN];\n" - buf += " /* Returned by " + fabric_mod_name + "_make_nodeacl() */\n" - buf += " struct se_node_acl se_node_acl;\n" - buf += "};\n\n" buf += "struct " + fabric_mod_name + "_tpg {\n" buf += " /* iSCSI target portal group tag for TCM */\n" buf += " u16 tport_tpgt;\n" @@ -173,8 +146,6 @@ def tcm_mod_build_iSCSI_include(fabric_mod_dir_var, fabric_mod_name): buf += " struct se_portal_group se_tpg;\n" buf += "};\n\n" buf += "struct " + fabric_mod_name + "_tport {\n" - buf += " /* SCSI protocol the tport is providing */\n" - buf += " u8 tport_proto_id;\n" buf += " /* ASCII formatted TargetName for IQN */\n" buf += " char tport_name[" + fabric_mod_name.upper() + "_NAMELEN];\n" buf += " /* Returned by " + fabric_mod_name + "_make_tport() */\n" @@ -232,61 +203,12 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += "#include <target/target_core_base.h>\n" buf += "#include <target/target_core_fabric.h>\n" buf += "#include <target/target_core_fabric_configfs.h>\n" - buf += "#include <target/target_core_configfs.h>\n" buf += "#include <target/configfs_macros.h>\n\n" buf += "#include \"" + fabric_mod_name + "_base.h\"\n" buf += "#include \"" + fabric_mod_name + "_fabric.h\"\n\n" buf += "static const struct target_core_fabric_ops " + fabric_mod_name + "_ops;\n\n" - buf += "static struct se_node_acl *" + fabric_mod_name + "_make_nodeacl(\n" - buf += " struct se_portal_group *se_tpg,\n" - buf += " struct config_group *group,\n" - buf += " const char *name)\n" - buf += "{\n" - buf += " struct se_node_acl *se_nacl, *se_nacl_new;\n" - buf += " struct " + fabric_mod_name + "_nacl *nacl;\n" - - if proto_ident == "FC" or proto_ident == "SAS": - buf += " u64 wwpn = 0;\n" - - buf += " u32 nexus_depth;\n\n" - buf += " /* " + fabric_mod_name + "_parse_wwn(name, &wwpn, 1) < 0)\n" - buf += " return ERR_PTR(-EINVAL); */\n" - buf += " se_nacl_new = " + fabric_mod_name + "_alloc_fabric_acl(se_tpg);\n" - buf += " if (!se_nacl_new)\n" - buf += " return ERR_PTR(-ENOMEM);\n" - buf += "//#warning FIXME: Hardcoded nexus depth in " + fabric_mod_name + "_make_nodeacl()\n" - buf += " nexus_depth = 1;\n" - buf += " /*\n" - buf += " * se_nacl_new may be released by core_tpg_add_initiator_node_acl()\n" - buf += " * when converting a NodeACL from demo mode -> explict\n" - buf += " */\n" - buf += " se_nacl = core_tpg_add_initiator_node_acl(se_tpg, se_nacl_new,\n" - buf += " name, nexus_depth);\n" - buf += " if (IS_ERR(se_nacl)) {\n" - buf += " " + fabric_mod_name + "_release_fabric_acl(se_tpg, se_nacl_new);\n" - buf += " return se_nacl;\n" - buf += " }\n" - buf += " /*\n" - buf += " * Locate our struct " + fabric_mod_name + "_nacl and set the FC Nport WWPN\n" - buf += " */\n" - buf += " nacl = container_of(se_nacl, struct " + fabric_mod_name + "_nacl, se_node_acl);\n" - - if proto_ident == "FC" or proto_ident == "SAS": - buf += " nacl->" + fabric_mod_init_port + "_wwpn = wwpn;\n" - - buf += " /* " + fabric_mod_name + "_format_wwn(&nacl->" + fabric_mod_init_port + "_name[0], " + fabric_mod_name.upper() + "_NAMELEN, wwpn); */\n\n" - buf += " return se_nacl;\n" - buf += "}\n\n" - buf += "static void " + fabric_mod_name + "_drop_nodeacl(struct se_node_acl *se_acl)\n" - buf += "{\n" - buf += " struct " + fabric_mod_name + "_nacl *nacl = container_of(se_acl,\n" - buf += " struct " + fabric_mod_name + "_nacl, se_node_acl);\n" - buf += " core_tpg_del_initiator_node_acl(se_acl->se_tpg, se_acl, 1);\n" - buf += " kfree(nacl);\n" - buf += "}\n\n" - buf += "static struct se_portal_group *" + fabric_mod_name + "_make_tpg(\n" buf += " struct se_wwn *wwn,\n" buf += " struct config_group *group,\n" @@ -309,8 +231,7 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += " tpg->" + fabric_mod_port + " = " + fabric_mod_port + ";\n" buf += " tpg->" + fabric_mod_port + "_tpgt = tpgt;\n\n" buf += " ret = core_tpg_register(&" + fabric_mod_name + "_ops, wwn,\n" - buf += " &tpg->se_tpg, tpg,\n" - buf += " TRANSPORT_TPG_TYPE_NORMAL);\n" + buf += " &tpg->se_tpg, SCSI_PROTOCOL_SAS);\n" buf += " if (ret < 0) {\n" buf += " kfree(tpg);\n" buf += " return NULL;\n" @@ -372,21 +293,13 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += "static const struct target_core_fabric_ops " + fabric_mod_name + "_ops = {\n" buf += " .module = THIS_MODULE,\n" buf += " .name = " + fabric_mod_name + ",\n" - buf += " .get_fabric_proto_ident = " + fabric_mod_name + "_get_fabric_proto_ident,\n" buf += " .get_fabric_name = " + fabric_mod_name + "_get_fabric_name,\n" - buf += " .get_fabric_proto_ident = " + fabric_mod_name + "_get_fabric_proto_ident,\n" buf += " .tpg_get_wwn = " + fabric_mod_name + "_get_fabric_wwn,\n" buf += " .tpg_get_tag = " + fabric_mod_name + "_get_tag,\n" - buf += " .tpg_get_default_depth = " + fabric_mod_name + "_get_default_depth,\n" - buf += " .tpg_get_pr_transport_id = " + fabric_mod_name + "_get_pr_transport_id,\n" - buf += " .tpg_get_pr_transport_id_len = " + fabric_mod_name + "_get_pr_transport_id_len,\n" - buf += " .tpg_parse_pr_out_transport_id = " + fabric_mod_name + "_parse_pr_out_transport_id,\n" buf += " .tpg_check_demo_mode = " + fabric_mod_name + "_check_false,\n" buf += " .tpg_check_demo_mode_cache = " + fabric_mod_name + "_check_true,\n" buf += " .tpg_check_demo_mode_write_protect = " + fabric_mod_name + "_check_true,\n" buf += " .tpg_check_prod_mode_write_protect = " + fabric_mod_name + "_check_false,\n" - buf += " .tpg_alloc_fabric_acl = " + fabric_mod_name + "_alloc_fabric_acl,\n" - buf += " .tpg_release_fabric_acl = " + fabric_mod_name + "_release_fabric_acl,\n" buf += " .tpg_get_inst_index = " + fabric_mod_name + "_tpg_get_inst_index,\n" buf += " .release_cmd = " + fabric_mod_name + "_release_cmd,\n" buf += " .shutdown_session = " + fabric_mod_name + "_shutdown_session,\n" @@ -396,7 +309,6 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += " .write_pending = " + fabric_mod_name + "_write_pending,\n" buf += " .write_pending_status = " + fabric_mod_name + "_write_pending_status,\n" buf += " .set_default_node_attributes = " + fabric_mod_name + "_set_default_node_attrs,\n" - buf += " .get_task_tag = " + fabric_mod_name + "_get_task_tag,\n" buf += " .get_cmd_state = " + fabric_mod_name + "_get_cmd_state,\n" buf += " .queue_data_in = " + fabric_mod_name + "_queue_data_in,\n" buf += " .queue_status = " + fabric_mod_name + "_queue_status,\n" @@ -409,12 +321,6 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += " .fabric_drop_wwn = " + fabric_mod_name + "_drop_" + fabric_mod_port + ",\n" buf += " .fabric_make_tpg = " + fabric_mod_name + "_make_tpg,\n" buf += " .fabric_drop_tpg = " + fabric_mod_name + "_drop_tpg,\n" - buf += " .fabric_post_link = NULL,\n" - buf += " .fabric_pre_unlink = NULL,\n" - buf += " .fabric_make_np = NULL,\n" - buf += " .fabric_drop_np = NULL,\n" - buf += " .fabric_make_nodeacl = " + fabric_mod_name + "_make_nodeacl,\n" - buf += " .fabric_drop_nodeacl = " + fabric_mod_name + "_drop_nodeacl,\n" buf += "\n" buf += " .tfc_wwn_attrs = " + fabric_mod_name + "_wwn_attrs;\n" buf += "};\n\n" @@ -507,7 +413,6 @@ def tcm_mod_dump_fabric_ops(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += "#include <scsi/scsi_proto.h>\n" buf += "#include <target/target_core_base.h>\n" buf += "#include <target/target_core_fabric.h>\n" - buf += "#include <target/target_core_configfs.h>\n\n" buf += "#include \"" + fabric_mod_name + "_base.h\"\n" buf += "#include \"" + fabric_mod_name + "_fabric.h\"\n\n" @@ -539,35 +444,6 @@ def tcm_mod_dump_fabric_ops(proto_ident, fabric_mod_dir_var, fabric_mod_name): bufi += "char *" + fabric_mod_name + "_get_fabric_name(void);\n" continue - if re.search('get_fabric_proto_ident', fo): - buf += "u8 " + fabric_mod_name + "_get_fabric_proto_ident(struct se_portal_group *se_tpg)\n" - buf += "{\n" - buf += " struct " + fabric_mod_name + "_tpg *tpg = container_of(se_tpg,\n" - buf += " struct " + fabric_mod_name + "_tpg, se_tpg);\n" - buf += " struct " + fabric_mod_name + "_" + fabric_mod_port + " *" + fabric_mod_port + " = tpg->" + fabric_mod_port + ";\n" - buf += " u8 proto_id;\n\n" - buf += " switch (" + fabric_mod_port + "->" + fabric_mod_port + "_proto_id) {\n" - if proto_ident == "FC": - buf += " case SCSI_PROTOCOL_FCP:\n" - buf += " default:\n" - buf += " proto_id = fc_get_fabric_proto_ident(se_tpg);\n" - buf += " break;\n" - elif proto_ident == "SAS": - buf += " case SCSI_PROTOCOL_SAS:\n" - buf += " default:\n" - buf += " proto_id = sas_get_fabric_proto_ident(se_tpg);\n" - buf += " break;\n" - elif proto_ident == "iSCSI": - buf += " case SCSI_PROTOCOL_ISCSI:\n" - buf += " default:\n" - buf += " proto_id = iscsi_get_fabric_proto_ident(se_tpg);\n" - buf += " break;\n" - - buf += " }\n\n" - buf += " return proto_id;\n" - buf += "}\n\n" - bufi += "u8 " + fabric_mod_name + "_get_fabric_proto_ident(struct se_portal_group *);\n" - if re.search('get_wwn', fo): buf += "char *" + fabric_mod_name + "_get_fabric_wwn(struct se_portal_group *se_tpg)\n" buf += "{\n" @@ -587,150 +463,6 @@ def tcm_mod_dump_fabric_ops(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += "}\n\n" bufi += "u16 " + fabric_mod_name + "_get_tag(struct se_portal_group *);\n" - if re.search('get_default_depth', fo): - buf += "u32 " + fabric_mod_name + "_get_default_depth(struct se_portal_group *se_tpg)\n" - buf += "{\n" - buf += " return 1;\n" - buf += "}\n\n" - bufi += "u32 " + fabric_mod_name + "_get_default_depth(struct se_portal_group *);\n" - - if re.search('get_pr_transport_id\)\(', fo): - buf += "u32 " + fabric_mod_name + "_get_pr_transport_id(\n" - buf += " struct se_portal_group *se_tpg,\n" - buf += " struct se_node_acl *se_nacl,\n" - buf += " struct t10_pr_registration *pr_reg,\n" - buf += " int *format_code,\n" - buf += " unsigned char *buf)\n" - buf += "{\n" - buf += " struct " + fabric_mod_name + "_tpg *tpg = container_of(se_tpg,\n" - buf += " struct " + fabric_mod_name + "_tpg, se_tpg);\n" - buf += " struct " + fabric_mod_name + "_" + fabric_mod_port + " *" + fabric_mod_port + " = tpg->" + fabric_mod_port + ";\n" - buf += " int ret = 0;\n\n" - buf += " switch (" + fabric_mod_port + "->" + fabric_mod_port + "_proto_id) {\n" - if proto_ident == "FC": - buf += " case SCSI_PROTOCOL_FCP:\n" - buf += " default:\n" - buf += " ret = fc_get_pr_transport_id(se_tpg, se_nacl, pr_reg,\n" - buf += " format_code, buf);\n" - buf += " break;\n" - elif proto_ident == "SAS": - buf += " case SCSI_PROTOCOL_SAS:\n" - buf += " default:\n" - buf += " ret = sas_get_pr_transport_id(se_tpg, se_nacl, pr_reg,\n" - buf += " format_code, buf);\n" - buf += " break;\n" - elif proto_ident == "iSCSI": - buf += " case SCSI_PROTOCOL_ISCSI:\n" - buf += " default:\n" - buf += " ret = iscsi_get_pr_transport_id(se_tpg, se_nacl, pr_reg,\n" - buf += " format_code, buf);\n" - buf += " break;\n" - - buf += " }\n\n" - buf += " return ret;\n" - buf += "}\n\n" - bufi += "u32 " + fabric_mod_name + "_get_pr_transport_id(struct se_portal_group *,\n" - bufi += " struct se_node_acl *, struct t10_pr_registration *,\n" - bufi += " int *, unsigned char *);\n" - - if re.search('get_pr_transport_id_len\)\(', fo): - buf += "u32 " + fabric_mod_name + "_get_pr_transport_id_len(\n" - buf += " struct se_portal_group *se_tpg,\n" - buf += " struct se_node_acl *se_nacl,\n" - buf += " struct t10_pr_registration *pr_reg,\n" - buf += " int *format_code)\n" - buf += "{\n" - buf += " struct " + fabric_mod_name + "_tpg *tpg = container_of(se_tpg,\n" - buf += " struct " + fabric_mod_name + "_tpg, se_tpg);\n" - buf += " struct " + fabric_mod_name + "_" + fabric_mod_port + " *" + fabric_mod_port + " = tpg->" + fabric_mod_port + ";\n" - buf += " int ret = 0;\n\n" - buf += " switch (" + fabric_mod_port + "->" + fabric_mod_port + "_proto_id) {\n" - if proto_ident == "FC": - buf += " case SCSI_PROTOCOL_FCP:\n" - buf += " default:\n" - buf += " ret = fc_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg,\n" - buf += " format_code);\n" - buf += " break;\n" - elif proto_ident == "SAS": - buf += " case SCSI_PROTOCOL_SAS:\n" - buf += " default:\n" - buf += " ret = sas_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg,\n" - buf += " format_code);\n" - buf += " break;\n" - elif proto_ident == "iSCSI": - buf += " case SCSI_PROTOCOL_ISCSI:\n" - buf += " default:\n" - buf += " ret = iscsi_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg,\n" - buf += " format_code);\n" - buf += " break;\n" - - - buf += " }\n\n" - buf += " return ret;\n" - buf += "}\n\n" - bufi += "u32 " + fabric_mod_name + "_get_pr_transport_id_len(struct se_portal_group *,\n" - bufi += " struct se_node_acl *, struct t10_pr_registration *,\n" - bufi += " int *);\n" - - if re.search('parse_pr_out_transport_id\)\(', fo): - buf += "char *" + fabric_mod_name + "_parse_pr_out_transport_id(\n" - buf += " struct se_portal_group *se_tpg,\n" - buf += " const char *buf,\n" - buf += " u32 *out_tid_len,\n" - buf += " char **port_nexus_ptr)\n" - buf += "{\n" - buf += " struct " + fabric_mod_name + "_tpg *tpg = container_of(se_tpg,\n" - buf += " struct " + fabric_mod_name + "_tpg, se_tpg);\n" - buf += " struct " + fabric_mod_name + "_" + fabric_mod_port + " *" + fabric_mod_port + " = tpg->" + fabric_mod_port + ";\n" - buf += " char *tid = NULL;\n\n" - buf += " switch (" + fabric_mod_port + "->" + fabric_mod_port + "_proto_id) {\n" - if proto_ident == "FC": - buf += " case SCSI_PROTOCOL_FCP:\n" - buf += " default:\n" - buf += " tid = fc_parse_pr_out_transport_id(se_tpg, buf, out_tid_len,\n" - buf += " port_nexus_ptr);\n" - elif proto_ident == "SAS": - buf += " case SCSI_PROTOCOL_SAS:\n" - buf += " default:\n" - buf += " tid = sas_parse_pr_out_transport_id(se_tpg, buf, out_tid_len,\n" - buf += " port_nexus_ptr);\n" - elif proto_ident == "iSCSI": - buf += " case SCSI_PROTOCOL_ISCSI:\n" - buf += " default:\n" - buf += " tid = iscsi_parse_pr_out_transport_id(se_tpg, buf, out_tid_len,\n" - buf += " port_nexus_ptr);\n" - - buf += " }\n\n" - buf += " return tid;\n" - buf += "}\n\n" - bufi += "char *" + fabric_mod_name + "_parse_pr_out_transport_id(struct se_portal_group *,\n" - bufi += " const char *, u32 *, char **);\n" - - if re.search('alloc_fabric_acl\)\(', fo): - buf += "struct se_node_acl *" + fabric_mod_name + "_alloc_fabric_acl(struct se_portal_group *se_tpg)\n" - buf += "{\n" - buf += " struct " + fabric_mod_name + "_nacl *nacl;\n\n" - buf += " nacl = kzalloc(sizeof(struct " + fabric_mod_name + "_nacl), GFP_KERNEL);\n" - buf += " if (!nacl) {\n" - buf += " printk(KERN_ERR \"Unable to allocate struct " + fabric_mod_name + "_nacl\\n\");\n" - buf += " return NULL;\n" - buf += " }\n\n" - buf += " return &nacl->se_node_acl;\n" - buf += "}\n\n" - bufi += "struct se_node_acl *" + fabric_mod_name + "_alloc_fabric_acl(struct se_portal_group *);\n" - - if re.search('release_fabric_acl\)\(', fo): - buf += "void " + fabric_mod_name + "_release_fabric_acl(\n" - buf += " struct se_portal_group *se_tpg,\n" - buf += " struct se_node_acl *se_nacl)\n" - buf += "{\n" - buf += " struct " + fabric_mod_name + "_nacl *nacl = container_of(se_nacl,\n" - buf += " struct " + fabric_mod_name + "_nacl, se_node_acl);\n" - buf += " kfree(nacl);\n" - buf += "}\n\n" - bufi += "void " + fabric_mod_name + "_release_fabric_acl(struct se_portal_group *,\n" - bufi += " struct se_node_acl *);\n" - if re.search('tpg_get_inst_index\)\(', fo): buf += "u32 " + fabric_mod_name + "_tpg_get_inst_index(struct se_portal_group *se_tpg)\n" buf += "{\n" @@ -787,13 +519,6 @@ def tcm_mod_dump_fabric_ops(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += "}\n\n" bufi += "void " + fabric_mod_name + "_set_default_node_attrs(struct se_node_acl *);\n" - if re.search('get_task_tag\)\(', fo): - buf += "u32 " + fabric_mod_name + "_get_task_tag(struct se_cmd *se_cmd)\n" - buf += "{\n" - buf += " return 0;\n" - buf += "}\n\n" - bufi += "u32 " + fabric_mod_name + "_get_task_tag(struct se_cmd *);\n" - if re.search('get_cmd_state\)\(', fo): buf += "int " + fabric_mod_name + "_get_cmd_state(struct se_cmd *se_cmd)\n" buf += "{\n" diff --git a/Documentation/target/tcm_mod_builder.txt b/Documentation/target/tcm_mod_builder.txt index 84533d8e747f..ae22f7005540 100644 --- a/Documentation/target/tcm_mod_builder.txt +++ b/Documentation/target/tcm_mod_builder.txt @@ -13,8 +13,8 @@ fabric skeleton, by simply using: This script will create a new drivers/target/$TCM_NEW_MOD/, and will do the following *) Generate new API callers for drivers/target/target_core_fabric_configs.c logic - ->make_nodeacl(), ->drop_nodeacl(), ->make_tpg(), ->drop_tpg() - ->make_wwn(), ->drop_wwn(). These are created into $TCM_NEW_MOD/$TCM_NEW_MOD_configfs.c + ->make_tpg(), ->drop_tpg(), ->make_wwn(), ->drop_wwn(). These are created + into $TCM_NEW_MOD/$TCM_NEW_MOD_configfs.c *) Generate basic infrastructure for loading/unloading LKMs and TCM/ConfigFS fabric module using a skeleton struct target_core_fabric_ops API template. *) Based on user defined T10 Proto_Ident for the new fabric module being built, diff --git a/Documentation/target/tcmu-design.txt b/Documentation/target/tcmu-design.txt index 263b907517ac..bef81e42788f 100644 --- a/Documentation/target/tcmu-design.txt +++ b/Documentation/target/tcmu-design.txt @@ -152,7 +152,7 @@ overall shared memory region, not the entry. The data in/out buffers are accessible via tht req.iov[] array. iov_cnt contains the number of entries in iov[] needed to describe either the Data-In or Data-Out buffers. For bidirectional commands, iov_cnt specifies how many iovec -entries cover the Data-Out area, and iov_bidi_count specifies how many +entries cover the Data-Out area, and iov_bidi_cnt specifies how many iovec entries immediately after that in iov[] cover the Data-In area. Just like other fields, iov.iov_base is an offset from the start of the region. diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt index 7c1f9fad6674..9da6f3512249 100644 --- a/Documentation/x86/boot.txt +++ b/Documentation/x86/boot.txt @@ -406,7 +406,7 @@ Protocol: 2.00+ - If 0, the protected-mode code is loaded at 0x10000. - If 1, the protected-mode code is loaded at 0x100000. - Bit 1 (kernel internal): ALSR_FLAG + Bit 1 (kernel internal): KASLR_FLAG - Used internally by the compressed kernel to communicate KASLR status to kernel proper. If 1, KASLR enabled. diff --git a/Documentation/x86/entry_64.txt b/Documentation/x86/entry_64.txt index 33884d156125..c1df8eba9dfd 100644 --- a/Documentation/x86/entry_64.txt +++ b/Documentation/x86/entry_64.txt @@ -1,14 +1,14 @@ This file documents some of the kernel entries in -arch/x86/kernel/entry_64.S. A lot of this explanation is adapted from +arch/x86/entry/entry_64.S. A lot of this explanation is adapted from an email from Ingo Molnar: http://lkml.kernel.org/r/<20110529191055.GC9835%40elte.hu> The x86 architecture has quite a few different ways to jump into kernel code. Most of these entry points are registered in -arch/x86/kernel/traps.c and implemented in arch/x86/kernel/entry_64.S -for 64-bit, arch/x86/kernel/entry_32.S for 32-bit and finally -arch/x86/ia32/ia32entry.S which implements the 32-bit compatibility +arch/x86/kernel/traps.c and implemented in arch/x86/entry/entry_64.S +for 64-bit, arch/x86/entry/entry_32.S for 32-bit and finally +arch/x86/entry/entry_64_compat.S which implements the 32-bit compatibility syscall entry points and thus provides for 32-bit processes the ability to execute syscalls when running on 64-bit kernels. diff --git a/MAINTAINERS b/MAINTAINERS index 8183b4659a4d..2d3d55c8f5be 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1614,6 +1614,7 @@ M: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm/boot/dts/vexpress* +F: arch/arm64/boot/dts/arm/vexpress* F: arch/arm/mach-vexpress/ F: */*/vexpress* F: */*/*/vexpress* @@ -2562,19 +2563,31 @@ F: arch/powerpc/include/uapi/asm/spu*.h F: arch/powerpc/oprofile/*cell* F: arch/powerpc/platforms/cell/ -CEPH DISTRIBUTED FILE SYSTEM CLIENT +CEPH COMMON CODE (LIBCEPH) +M: Ilya Dryomov <idryomov@gmail.com> M: "Yan, Zheng" <zyan@redhat.com> M: Sage Weil <sage@redhat.com> L: ceph-devel@vger.kernel.org W: http://ceph.com/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git +T: git git://github.com/ceph/ceph-client.git S: Supported -F: Documentation/filesystems/ceph.txt -F: fs/ceph/ F: net/ceph/ F: include/linux/ceph/ F: include/linux/crush/ +CEPH DISTRIBUTED FILE SYSTEM CLIENT (CEPH) +M: "Yan, Zheng" <zyan@redhat.com> +M: Sage Weil <sage@redhat.com> +M: Ilya Dryomov <idryomov@gmail.com> +L: ceph-devel@vger.kernel.org +W: http://ceph.com/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git +T: git git://github.com/ceph/ceph-client.git +S: Supported +F: Documentation/filesystems/ceph.txt +F: fs/ceph/ + CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM: L: linux-usb@vger.kernel.org S: Orphan @@ -4430,9 +4443,11 @@ FUSE: FILESYSTEM IN USERSPACE M: Miklos Szeredi <miklos@szeredi.hu> L: fuse-devel@lists.sourceforge.net W: http://fuse.sourceforge.net/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git S: Maintained F: fs/fuse/ F: include/uapi/linux/fuse.h +F: Documentation/filesystems/fuse.txt FUTURE DOMAIN TMC-16x0 SCSI DRIVER (16-bit) M: Rik Faith <faith@cs.unc.edu> @@ -5468,6 +5483,13 @@ F: include/linux/mei_cl_bus.h F: drivers/misc/mei/* F: Documentation/misc-devices/mei/* +INTEL PMC IPC DRIVER +M: Zha Qipeng<qipeng.zha@intel.com> +L: platform-driver-x86@vger.kernel.org +S: Maintained +F: drivers/platform/x86/intel_pmc_ipc.c +F: arch/x86/include/asm/intel_pmc_ipc.h + IOC3 ETHERNET DRIVER M: Ralf Baechle <ralf@linux-mips.org> L: linux-mips@linux-mips.org @@ -6138,6 +6160,7 @@ L: linux-nvdimm@lists.01.org Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ S: Supported F: drivers/nvdimm/pmem.c +F: include/linux/pmem.h LINUX FOR IBM pSERIES (RS/6000) M: Paul Mackerras <paulus@au.ibm.com> @@ -6152,7 +6175,7 @@ M: Michael Ellerman <mpe@ellerman.id.au> W: http://www.penguinppc.org/ L: linuxppc-dev@lists.ozlabs.org Q: http://patchwork.ozlabs.org/project/linuxppc-dev/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git S: Supported F: Documentation/powerpc/ F: arch/powerpc/ @@ -7211,15 +7234,25 @@ F: drivers/power/bq27x00_battery.c F: drivers/power/isp1704_charger.c F: drivers/power/rx51_battery.c -NTB DRIVER +NTB DRIVER CORE M: Jon Mason <jdmason@kudzu.us> M: Dave Jiang <dave.jiang@intel.com> +M: Allen Hubbe <Allen.Hubbe@emc.com> S: Supported W: https://github.com/jonmason/ntb/wiki T: git git://github.com/jonmason/ntb.git F: drivers/ntb/ F: drivers/net/ntb_netdev.c F: include/linux/ntb.h +F: include/linux/ntb_transport.h + +NTB INTEL DRIVER +M: Jon Mason <jdmason@kudzu.us> +M: Dave Jiang <dave.jiang@intel.com> +S: Supported +W: https://github.com/jonmason/ntb/wiki +T: git git://github.com/jonmason/ntb.git +F: drivers/ntb/hw/intel/ NTFS FILESYSTEM M: Anton Altaparmakov <anton@tuxera.com> @@ -7363,7 +7396,6 @@ M: Ohad Ben-Cohen <ohad@wizery.com> L: linux-omap@vger.kernel.org S: Maintained F: drivers/hwspinlock/omap_hwspinlock.c -F: arch/arm/mach-omap2/hwspinlock.c OMAP MMC SUPPORT M: Jarkko Lavinen <jarkko.lavinen@nokia.com> @@ -8349,10 +8381,12 @@ RADOS BLOCK DEVICE (RBD) M: Ilya Dryomov <idryomov@gmail.com> M: Sage Weil <sage@redhat.com> M: Alex Elder <elder@kernel.org> -M: ceph-devel@vger.kernel.org +L: ceph-devel@vger.kernel.org W: http://ceph.com/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git +T: git git://github.com/ceph/ceph-client.git S: Supported +F: Documentation/ABI/testing/sysfs-bus-rbd F: drivers/block/rbd.c F: drivers/block/rbd_types.h @@ -1,7 +1,7 @@ VERSION = 4 -PATCHLEVEL = 1 +PATCHLEVEL = 2 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc2 NAME = Hurr durr I'ma sheep # *DOCUMENTATION* @@ -335,15 +335,6 @@ endif export KBUILD_MODULES KBUILD_BUILTIN export KBUILD_CHECKSRC KBUILD_SRC KBUILD_EXTMOD -ifneq ($(CC),) -ifeq ($(shell $(CC) -v 2>&1 | grep -c "clang version"), 1) -COMPILER := clang -else -COMPILER := gcc -endif -export COMPILER -endif - # We need some generic definitions (do not try to remake the file). scripts/Kbuild.include: ; include scripts/Kbuild.include @@ -670,6 +661,13 @@ endif endif KBUILD_CFLAGS += $(stackp-flag) +ifeq ($(shell $(CC) -v 2>&1 | grep -c "clang version"), 1) +COMPILER := clang +else +COMPILER := gcc +endif +export COMPILER + ifeq ($(COMPILER),clang) KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,) KBUILD_CPPFLAGS += $(call cc-option,-Wno-unknown-warning-option,) diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index e0cf99893212..807f7d61d7a7 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -71,15 +71,12 @@ static void print_task_path_n_nm(struct task_struct *tsk, char *buf) mmput(mm); if (exe_file) { - path = exe_file->f_path; - path_get(&exe_file->f_path); + path_nm = file_path(exe_file, buf, 255); fput(exe_file); - path_nm = d_path(&path, buf, 255); - path_put(&path); } done: - pr_info("Path: %s\n", path_nm); + pr_info("Path: %s\n", !IS_ERR(path_nm) ? path_nm : "?"); } static void show_faulting_vma(unsigned long address, char *buf) @@ -103,8 +100,7 @@ static void show_faulting_vma(unsigned long address, char *buf) if (vma && (vma->vm_start <= address)) { struct file *file = vma->vm_file; if (file) { - struct path *path = &file->f_path; - nm = d_path(path, buf, PAGE_SIZE - 1); + nm = file_path(file, buf, PAGE_SIZE - 1); inode = file_inode(vma->vm_file); dev = inode->i_sb->s_dev; ino = inode->i_ino; diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a750c1425c3a..1c5021002fe4 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1693,6 +1693,12 @@ config HIGHMEM config HIGHPTE bool "Allocate 2nd-level pagetables from highmem" depends on HIGHMEM + help + The VM uses one page of physical memory for each page table. + For systems with a lot of processes, this can use a lot of + precious low memory, eventually leading to low memory being + consumed by page tables. Setting this option will allow + user-space 2nd level page tables to reside in high memory. config HW_PERF_EVENTS bool "Enable hardware performance counter support for perf events" diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index f1b157971366..a2e16f940394 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -1635,7 +1635,7 @@ config PID_IN_CONTEXTIDR config DEBUG_SET_MODULE_RONX bool "Set loadable kernel module data as NX and text as RO" - depends on MODULES + depends on MODULES && MMU ---help--- This option helps catch unintended modifications to loadable kernel module's text and read-only data. It also prevents execution diff --git a/arch/arm/boot/dts/am335x-boneblack.dts b/arch/arm/boot/dts/am335x-boneblack.dts index 901739fcb85a..5c42d259fa68 100644 --- a/arch/arm/boot/dts/am335x-boneblack.dts +++ b/arch/arm/boot/dts/am335x-boneblack.dts @@ -80,3 +80,7 @@ status = "okay"; }; }; + +&rtc { + system-power-controller; +}; diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi index c80a3e233792..ade28c790f4b 100644 --- a/arch/arm/boot/dts/am4372.dtsi +++ b/arch/arm/boot/dts/am4372.dtsi @@ -132,6 +132,12 @@ }; }; + emif: emif@4c000000 { + compatible = "ti,emif-am4372"; + reg = <0x4c000000 0x1000000>; + ti,hwmods = "emif"; + }; + edma: edma@49000000 { compatible = "ti,edma3"; ti,hwmods = "tpcc", "tptc0", "tptc1", "tptc2"; @@ -941,6 +947,7 @@ ti,hwmods = "dss_rfbi"; clocks = <&disp_clk>; clock-names = "fck"; + status = "disabled"; }; }; diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts index a42cc377a862..a63bf78191ea 100644 --- a/arch/arm/boot/dts/am57xx-beagle-x15.dts +++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts @@ -605,6 +605,10 @@ phy-supply = <&ldousb_reg>; }; +&usb2_phy2 { + phy-supply = <&ldousb_reg>; +}; + &usb1 { dr_mode = "host"; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/armada-xp.dtsi b/arch/arm/boot/dts/armada-xp.dtsi index 0854d4493da7..3de9b761cc1a 100644 --- a/arch/arm/boot/dts/armada-xp.dtsi +++ b/arch/arm/boot/dts/armada-xp.dtsi @@ -305,7 +305,7 @@ spi0_pins: spi0-pins { marvell,pins = "mpp36", "mpp37", "mpp38", "mpp39"; - marvell,function = "spi"; + marvell,function = "spi0"; }; uart2_pins: uart2-pins { diff --git a/arch/arm/boot/dts/at91sam9g45.dtsi b/arch/arm/boot/dts/at91sam9g45.dtsi index d260ba779ae5..18177f5a7464 100644 --- a/arch/arm/boot/dts/at91sam9g45.dtsi +++ b/arch/arm/boot/dts/at91sam9g45.dtsi @@ -1148,7 +1148,7 @@ usb2: gadget@fff78000 { #address-cells = <1>; #size-cells = <0>; - compatible = "atmel,at91sam9rl-udc"; + compatible = "atmel,at91sam9g45-udc"; reg = <0x00600000 0x80000 0xfff78000 0x400>; interrupts = <27 IRQ_TYPE_LEVEL_HIGH 0>; diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi index 7521bdf17ef2..b6c8df8d380e 100644 --- a/arch/arm/boot/dts/at91sam9x5.dtsi +++ b/arch/arm/boot/dts/at91sam9x5.dtsi @@ -1108,7 +1108,7 @@ usb2: gadget@f803c000 { #address-cells = <1>; #size-cells = <0>; - compatible = "atmel,at91sam9rl-udc"; + compatible = "atmel,at91sam9g45-udc"; reg = <0x00500000 0x80000 0xf803c000 0x400>; interrupts = <23 IRQ_TYPE_LEVEL_HIGH 0>; diff --git a/arch/arm/boot/dts/atlas7.dtsi b/arch/arm/boot/dts/atlas7.dtsi index 5dfd3a44bf82..3e21311f9514 100644 --- a/arch/arm/boot/dts/atlas7.dtsi +++ b/arch/arm/boot/dts/atlas7.dtsi @@ -135,6 +135,1025 @@ compatible = "sirf,atlas7-ioc"; reg = <0x18880000 0x1000>, <0x10E40000 0x1000>; + + audio_ac97_pmx: audio_ac97@0 { + audio_ac97 { + groups = "audio_ac97_grp"; + function = "audio_ac97"; + }; + }; + + audio_func_dbg_pmx: audio_func_dbg@0 { + audio_func_dbg { + groups = "audio_func_dbg_grp"; + function = "audio_func_dbg"; + }; + }; + + audio_i2s_pmx: audio_i2s@0 { + audio_i2s { + groups = "audio_i2s_grp"; + function = "audio_i2s"; + }; + }; + + audio_i2s_2ch_pmx: audio_i2s_2ch@0 { + audio_i2s_2ch { + groups = "audio_i2s_2ch_grp"; + function = "audio_i2s_2ch"; + }; + }; + + audio_i2s_extclk_pmx: audio_i2s_extclk@0 { + audio_i2s_extclk { + groups = "audio_i2s_extclk_grp"; + function = "audio_i2s_extclk"; + }; + }; + + audio_uart0_pmx: audio_uart0@0 { + audio_uart0 { + groups = "audio_uart0_grp"; + function = "audio_uart0"; + }; + }; + + audio_uart1_pmx: audio_uart1@0 { + audio_uart1 { + groups = "audio_uart1_grp"; + function = "audio_uart1"; + }; + }; + + audio_uart2_pmx0: audio_uart2@0 { + audio_uart2_0 { + groups = "audio_uart2_grp0"; + function = "audio_uart2_m0"; + }; + }; + + audio_uart2_pmx1: audio_uart2@1 { + audio_uart2_1 { + groups = "audio_uart2_grp1"; + function = "audio_uart2_m1"; + }; + }; + + c_can_trnsvr_pmx: c_can_trnsvr@0 { + c_can_trnsvr { + groups = "c_can_trnsvr_grp"; + function = "c_can_trnsvr"; + }; + }; + + c0_can_pmx0: c0_can@0 { + c0_can_0 { + groups = "c0_can_grp0"; + function = "c0_can_m0"; + }; + }; + + c0_can_pmx1: c0_can@1 { + c0_can_1 { + groups = "c0_can_grp1"; + function = "c0_can_m1"; + }; + }; + + c1_can_pmx0: c1_can@0 { + c1_can_0 { + groups = "c1_can_grp0"; + function = "c1_can_m0"; + }; + }; + + c1_can_pmx1: c1_can@1 { + c1_can_1 { + groups = "c1_can_grp1"; + function = "c1_can_m1"; + }; + }; + + c1_can_pmx2: c1_can@2 { + c1_can_2 { + groups = "c1_can_grp2"; + function = "c1_can_m2"; + }; + }; + + ca_audio_lpc_pmx: ca_audio_lpc@0 { + ca_audio_lpc { + groups = "ca_audio_lpc_grp"; + function = "ca_audio_lpc"; + }; + }; + + ca_bt_lpc_pmx: ca_bt_lpc@0 { + ca_bt_lpc { + groups = "ca_bt_lpc_grp"; + function = "ca_bt_lpc"; + }; + }; + + ca_coex_pmx: ca_coex@0 { + ca_coex { + groups = "ca_coex_grp"; + function = "ca_coex"; + }; + }; + + ca_curator_lpc_pmx: ca_curator_lpc@0 { + ca_curator_lpc { + groups = "ca_curator_lpc_grp"; + function = "ca_curator_lpc"; + }; + }; + + ca_pcm_debug_pmx: ca_pcm_debug@0 { + ca_pcm_debug { + groups = "ca_pcm_debug_grp"; + function = "ca_pcm_debug"; + }; + }; + + ca_pio_pmx: ca_pio@0 { + ca_pio { + groups = "ca_pio_grp"; + function = "ca_pio"; + }; + }; + + ca_sdio_debug_pmx: ca_sdio_debug@0 { + ca_sdio_debug { + groups = "ca_sdio_debug_grp"; + function = "ca_sdio_debug"; + }; + }; + + ca_spi_pmx: ca_spi@0 { + ca_spi { + groups = "ca_spi_grp"; + function = "ca_spi"; + }; + }; + + ca_trb_pmx: ca_trb@0 { + ca_trb { + groups = "ca_trb_grp"; + function = "ca_trb"; + }; + }; + + ca_uart_debug_pmx: ca_uart_debug@0 { + ca_uart_debug { + groups = "ca_uart_debug_grp"; + function = "ca_uart_debug"; + }; + }; + + clkc_pmx0: clkc@0 { + clkc_0 { + groups = "clkc_grp0"; + function = "clkc_m0"; + }; + }; + + clkc_pmx1: clkc@1 { + clkc_1 { + groups = "clkc_grp1"; + function = "clkc_m1"; + }; + }; + + gn_gnss_i2c_pmx: gn_gnss_i2c@0 { + gn_gnss_i2c { + groups = "gn_gnss_i2c_grp"; + function = "gn_gnss_i2c"; + }; + }; + + gn_gnss_uart_nopause_pmx: gn_gnss_uart_nopause@0 { + gn_gnss_uart_nopause { + groups = "gn_gnss_uart_nopause_grp"; + function = "gn_gnss_uart_nopause"; + }; + }; + + gn_gnss_uart_pmx: gn_gnss_uart@0 { + gn_gnss_uart { + groups = "gn_gnss_uart_grp"; + function = "gn_gnss_uart"; + }; + }; + + gn_trg_spi_pmx0: gn_trg_spi@0 { + gn_trg_spi_0 { + groups = "gn_trg_spi_grp0"; + function = "gn_trg_spi_m0"; + }; + }; + + gn_trg_spi_pmx1: gn_trg_spi@1 { + gn_trg_spi_1 { + groups = "gn_trg_spi_grp1"; + function = "gn_trg_spi_m1"; + }; + }; + + cvbs_dbg_pmx: cvbs_dbg@0 { + cvbs_dbg { + groups = "cvbs_dbg_grp"; + function = "cvbs_dbg"; + }; + }; + + cvbs_dbg_test_pmx0: cvbs_dbg_test@0 { + cvbs_dbg_test_0 { + groups = "cvbs_dbg_test_grp0"; + function = "cvbs_dbg_test_m0"; + }; + }; + + cvbs_dbg_test_pmx1: cvbs_dbg_test@1 { + cvbs_dbg_test_1 { + groups = "cvbs_dbg_test_grp1"; + function = "cvbs_dbg_test_m1"; + }; + }; + + cvbs_dbg_test_pmx2: cvbs_dbg_test@2 { + cvbs_dbg_test_2 { + groups = "cvbs_dbg_test_grp2"; + function = "cvbs_dbg_test_m2"; + }; + }; + + cvbs_dbg_test_pmx3: cvbs_dbg_test@3 { + cvbs_dbg_test_3 { + groups = "cvbs_dbg_test_grp3"; + function = "cvbs_dbg_test_m3"; + }; + }; + + cvbs_dbg_test_pmx4: cvbs_dbg_test@4 { + cvbs_dbg_test_4 { + groups = "cvbs_dbg_test_grp4"; + function = "cvbs_dbg_test_m4"; + }; + }; + + cvbs_dbg_test_pmx5: cvbs_dbg_test@5 { + cvbs_dbg_test_5 { + groups = "cvbs_dbg_test_grp5"; + function = "cvbs_dbg_test_m5"; + }; + }; + + cvbs_dbg_test_pmx6: cvbs_dbg_test@6 { + cvbs_dbg_test_6 { + groups = "cvbs_dbg_test_grp6"; + function = "cvbs_dbg_test_m6"; + }; + }; + + cvbs_dbg_test_pmx7: cvbs_dbg_test@7 { + cvbs_dbg_test_7 { + groups = "cvbs_dbg_test_grp7"; + function = "cvbs_dbg_test_m7"; + }; + }; + + cvbs_dbg_test_pmx8: cvbs_dbg_test@8 { + cvbs_dbg_test_8 { + groups = "cvbs_dbg_test_grp8"; + function = "cvbs_dbg_test_m8"; + }; + }; + + cvbs_dbg_test_pmx9: cvbs_dbg_test@9 { + cvbs_dbg_test_9 { + groups = "cvbs_dbg_test_grp9"; + function = "cvbs_dbg_test_m9"; + }; + }; + + cvbs_dbg_test_pmx10: cvbs_dbg_test@10 { + cvbs_dbg_test_10 { + groups = "cvbs_dbg_test_grp10"; + function = "cvbs_dbg_test_m10"; + }; + }; + + cvbs_dbg_test_pmx11: cvbs_dbg_test@11 { + cvbs_dbg_test_11 { + groups = "cvbs_dbg_test_grp11"; + function = "cvbs_dbg_test_m11"; + }; + }; + + cvbs_dbg_test_pmx12: cvbs_dbg_test@12 { + cvbs_dbg_test_12 { + groups = "cvbs_dbg_test_grp12"; + function = "cvbs_dbg_test_m12"; + }; + }; + + cvbs_dbg_test_pmx13: cvbs_dbg_test@13 { + cvbs_dbg_test_13 { + groups = "cvbs_dbg_test_grp13"; + function = "cvbs_dbg_test_m13"; + }; + }; + + cvbs_dbg_test_pmx14: cvbs_dbg_test@14 { + cvbs_dbg_test_14 { + groups = "cvbs_dbg_test_grp14"; + function = "cvbs_dbg_test_m14"; + }; + }; + + cvbs_dbg_test_pmx15: cvbs_dbg_test@15 { + cvbs_dbg_test_15 { + groups = "cvbs_dbg_test_grp15"; + function = "cvbs_dbg_test_m15"; + }; + }; + + gn_gnss_power_pmx: gn_gnss_power@0 { + gn_gnss_power { + groups = "gn_gnss_power_grp"; + function = "gn_gnss_power"; + }; + }; + + gn_gnss_sw_status_pmx: gn_gnss_sw_status@0 { + gn_gnss_sw_status { + groups = "gn_gnss_sw_status_grp"; + function = "gn_gnss_sw_status"; + }; + }; + + gn_gnss_eclk_pmx: gn_gnss_eclk@0 { + gn_gnss_eclk { + groups = "gn_gnss_eclk_grp"; + function = "gn_gnss_eclk"; + }; + }; + + gn_gnss_irq1_pmx0: gn_gnss_irq1@0 { + gn_gnss_irq1_0 { + groups = "gn_gnss_irq1_grp0"; + function = "gn_gnss_irq1_m0"; + }; + }; + + gn_gnss_irq2_pmx0: gn_gnss_irq2@0 { + gn_gnss_irq2_0 { + groups = "gn_gnss_irq2_grp0"; + function = "gn_gnss_irq2_m0"; + }; + }; + + gn_gnss_tm_pmx: gn_gnss_tm@0 { + gn_gnss_tm { + groups = "gn_gnss_tm_grp"; + function = "gn_gnss_tm"; + }; + }; + + gn_gnss_tsync_pmx: gn_gnss_tsync@0 { + gn_gnss_tsync { + groups = "gn_gnss_tsync_grp"; + function = "gn_gnss_tsync"; + }; + }; + + gn_io_gnsssys_sw_cfg_pmx: gn_io_gnsssys_sw_cfg@0 { + gn_io_gnsssys_sw_cfg { + groups = "gn_io_gnsssys_sw_cfg_grp"; + function = "gn_io_gnsssys_sw_cfg"; + }; + }; + + gn_trg_pmx0: gn_trg@0 { + gn_trg_0 { + groups = "gn_trg_grp0"; + function = "gn_trg_m0"; + }; + }; + + gn_trg_pmx1: gn_trg@1 { + gn_trg_1 { + groups = "gn_trg_grp1"; + function = "gn_trg_m1"; + }; + }; + + gn_trg_shutdown_pmx0: gn_trg_shutdown@0 { + gn_trg_shutdown_0 { + groups = "gn_trg_shutdown_grp0"; + function = "gn_trg_shutdown_m0"; + }; + }; + + gn_trg_shutdown_pmx1: gn_trg_shutdown@1 { + gn_trg_shutdown_1 { + groups = "gn_trg_shutdown_grp1"; + function = "gn_trg_shutdown_m1"; + }; + }; + + gn_trg_shutdown_pmx2: gn_trg_shutdown@2 { + gn_trg_shutdown_2 { + groups = "gn_trg_shutdown_grp2"; + function = "gn_trg_shutdown_m2"; + }; + }; + + gn_trg_shutdown_pmx3: gn_trg_shutdown@3 { + gn_trg_shutdown_3 { + groups = "gn_trg_shutdown_grp3"; + function = "gn_trg_shutdown_m3"; + }; + }; + + i2c0_pmx: i2c0@0 { + i2c0 { + groups = "i2c0_grp"; + function = "i2c0"; + }; + }; + + i2c1_pmx: i2c1@0 { + i2c1 { + groups = "i2c1_grp"; + function = "i2c1"; + }; + }; + + jtag_pmx0: jtag@0 { + jtag_0 { + groups = "jtag_grp0"; + function = "jtag_m0"; + }; + }; + + ks_kas_spi_pmx0: ks_kas_spi@0 { + ks_kas_spi_0 { + groups = "ks_kas_spi_grp0"; + function = "ks_kas_spi_m0"; + }; + }; + + ld_ldd_pmx: ld_ldd@0 { + ld_ldd { + groups = "ld_ldd_grp"; + function = "ld_ldd"; + }; + }; + + ld_ldd_16bit_pmx: ld_ldd_16bit@0 { + ld_ldd_16bit { + groups = "ld_ldd_16bit_grp"; + function = "ld_ldd_16bit"; + }; + }; + + ld_ldd_fck_pmx: ld_ldd_fck@0 { + ld_ldd_fck { + groups = "ld_ldd_fck_grp"; + function = "ld_ldd_fck"; + }; + }; + + ld_ldd_lck_pmx: ld_ldd_lck@0 { + ld_ldd_lck { + groups = "ld_ldd_lck_grp"; + function = "ld_ldd_lck"; + }; + }; + + lr_lcdrom_pmx: lr_lcdrom@0 { + lr_lcdrom { + groups = "lr_lcdrom_grp"; + function = "lr_lcdrom"; + }; + }; + + lvds_analog_pmx: lvds_analog@0 { + lvds_analog { + groups = "lvds_analog_grp"; + function = "lvds_analog"; + }; + }; + + nd_df_pmx: nd_df@0 { + nd_df { + groups = "nd_df_grp"; + function = "nd_df"; + }; + }; + + nd_df_nowp_pmx: nd_df_nowp@0 { + nd_df_nowp { + groups = "nd_df_nowp_grp"; + function = "nd_df_nowp"; + }; + }; + + ps_pmx: ps@0 { + ps { + groups = "ps_grp"; + function = "ps"; + }; + }; + + pwc_core_on_pmx: pwc_core_on@0 { + pwc_core_on { + groups = "pwc_core_on_grp"; + function = "pwc_core_on"; + }; + }; + + pwc_ext_on_pmx: pwc_ext_on@0 { + pwc_ext_on { + groups = "pwc_ext_on_grp"; + function = "pwc_ext_on"; + }; + }; + + pwc_gpio3_clk_pmx: pwc_gpio3_clk@0 { + pwc_gpio3_clk { + groups = "pwc_gpio3_clk_grp"; + function = "pwc_gpio3_clk"; + }; + }; + + pwc_io_on_pmx: pwc_io_on@0 { + pwc_io_on { + groups = "pwc_io_on_grp"; + function = "pwc_io_on"; + }; + }; + + pwc_lowbatt_b_pmx0: pwc_lowbatt_b@0 { + pwc_lowbatt_b_0 { + groups = "pwc_lowbatt_b_grp0"; + function = "pwc_lowbatt_b_m0"; + }; + }; + + pwc_mem_on_pmx: pwc_mem_on@0 { + pwc_mem_on { + groups = "pwc_mem_on_grp"; + function = "pwc_mem_on"; + }; + }; + + pwc_on_key_b_pmx0: pwc_on_key_b@0 { + pwc_on_key_b_0 { + groups = "pwc_on_key_b_grp0"; + function = "pwc_on_key_b_m0"; + }; + }; + + pwc_wakeup_src0_pmx: pwc_wakeup_src0@0 { + pwc_wakeup_src0 { + groups = "pwc_wakeup_src0_grp"; + function = "pwc_wakeup_src0"; + }; + }; + + pwc_wakeup_src1_pmx: pwc_wakeup_src1@0 { + pwc_wakeup_src1 { + groups = "pwc_wakeup_src1_grp"; + function = "pwc_wakeup_src1"; + }; + }; + + pwc_wakeup_src2_pmx: pwc_wakeup_src2@0 { + pwc_wakeup_src2 { + groups = "pwc_wakeup_src2_grp"; + function = "pwc_wakeup_src2"; + }; + }; + + pwc_wakeup_src3_pmx: pwc_wakeup_src3@0 { + pwc_wakeup_src3 { + groups = "pwc_wakeup_src3_grp"; + function = "pwc_wakeup_src3"; + }; + }; + + pw_cko0_pmx0: pw_cko0@0 { + pw_cko0_0 { + groups = "pw_cko0_grp0"; + function = "pw_cko0_m0"; + }; + }; + + pw_cko0_pmx1: pw_cko0@1 { + pw_cko0_1 { + groups = "pw_cko0_grp1"; + function = "pw_cko0_m1"; + }; + }; + + pw_cko0_pmx2: pw_cko0@2 { + pw_cko0_2 { + groups = "pw_cko0_grp2"; + function = "pw_cko0_m2"; + }; + }; + + pw_cko1_pmx0: pw_cko1@0 { + pw_cko1_0 { + groups = "pw_cko1_grp0"; + function = "pw_cko1_m0"; + }; + }; + + pw_cko1_pmx1: pw_cko1@1 { + pw_cko1_1 { + groups = "pw_cko1_grp1"; + function = "pw_cko1_m1"; + }; + }; + + pw_i2s01_clk_pmx0: pw_i2s01_clk@0 { + pw_i2s01_clk_0 { + groups = "pw_i2s01_clk_grp0"; + function = "pw_i2s01_clk_m0"; + }; + }; + + pw_i2s01_clk_pmx1: pw_i2s01_clk@1 { + pw_i2s01_clk_1 { + groups = "pw_i2s01_clk_grp1"; + function = "pw_i2s01_clk_m1"; + }; + }; + + pw_pwm0_pmx: pw_pwm0@0 { + pw_pwm0 { + groups = "pw_pwm0_grp"; + function = "pw_pwm0"; + }; + }; + + pw_pwm1_pmx: pw_pwm1@0 { + pw_pwm1 { + groups = "pw_pwm1_grp"; + function = "pw_pwm1"; + }; + }; + + pw_pwm2_pmx0: pw_pwm2@0 { + pw_pwm2_0 { + groups = "pw_pwm2_grp0"; + function = "pw_pwm2_m0"; + }; + }; + + pw_pwm2_pmx1: pw_pwm2@1 { + pw_pwm2_1 { + groups = "pw_pwm2_grp1"; + function = "pw_pwm2_m1"; + }; + }; + + pw_pwm3_pmx0: pw_pwm3@0 { + pw_pwm3_0 { + groups = "pw_pwm3_grp0"; + function = "pw_pwm3_m0"; + }; + }; + + pw_pwm3_pmx1: pw_pwm3@1 { + pw_pwm3_1 { + groups = "pw_pwm3_grp1"; + function = "pw_pwm3_m1"; + }; + }; + + pw_pwm_cpu_vol_pmx0: pw_pwm_cpu_vol@0 { + pw_pwm_cpu_vol_0 { + groups = "pw_pwm_cpu_vol_grp0"; + function = "pw_pwm_cpu_vol_m0"; + }; + }; + + pw_pwm_cpu_vol_pmx1: pw_pwm_cpu_vol@1 { + pw_pwm_cpu_vol_1 { + groups = "pw_pwm_cpu_vol_grp1"; + function = "pw_pwm_cpu_vol_m1"; + }; + }; + + pw_backlight_pmx0: pw_backlight@0 { + pw_backlight_0 { + groups = "pw_backlight_grp0"; + function = "pw_backlight_m0"; + }; + }; + + pw_backlight_pmx1: pw_backlight@1 { + pw_backlight_1 { + groups = "pw_backlight_grp1"; + function = "pw_backlight_m1"; + }; + }; + + rg_eth_mac_pmx: rg_eth_mac@0 { + rg_eth_mac { + groups = "rg_eth_mac_grp"; + function = "rg_eth_mac"; + }; + }; + + rg_gmac_phy_intr_n_pmx: rg_gmac_phy_intr_n@0 { + rg_gmac_phy_intr_n { + groups = "rg_gmac_phy_intr_n_grp"; + function = "rg_gmac_phy_intr_n"; + }; + }; + + rg_rgmii_mac_pmx: rg_rgmii_mac@0 { + rg_rgmii_mac { + groups = "rg_rgmii_mac_grp"; + function = "rg_rgmii_mac"; + }; + }; + + rg_rgmii_phy_ref_clk_pmx0: rg_rgmii_phy_ref_clk@0 { + rg_rgmii_phy_ref_clk_0 { + groups = + "rg_rgmii_phy_ref_clk_grp0"; + function = + "rg_rgmii_phy_ref_clk_m0"; + }; + }; + + rg_rgmii_phy_ref_clk_pmx1: rg_rgmii_phy_ref_clk@1 { + rg_rgmii_phy_ref_clk_1 { + groups = + "rg_rgmii_phy_ref_clk_grp1"; + function = + "rg_rgmii_phy_ref_clk_m1"; + }; + }; + + sd0_pmx: sd0@0 { + sd0 { + groups = "sd0_grp"; + function = "sd0"; + }; + }; + + sd0_4bit_pmx: sd0_4bit@0 { + sd0_4bit { + groups = "sd0_4bit_grp"; + function = "sd0_4bit"; + }; + }; + + sd1_pmx: sd1@0 { + sd1 { + groups = "sd1_grp"; + function = "sd1"; + }; + }; + + sd1_4bit_pmx0: sd1_4bit@0 { + sd1_4bit_0 { + groups = "sd1_4bit_grp0"; + function = "sd1_4bit_m0"; + }; + }; + + sd1_4bit_pmx1: sd1_4bit@1 { + sd1_4bit_1 { + groups = "sd1_4bit_grp1"; + function = "sd1_4bit_m1"; + }; + }; + + sd2_pmx0: sd2@0 { + sd2_0 { + groups = "sd2_grp0"; + function = "sd2_m0"; + }; + }; + + sd2_no_cdb_pmx0: sd2_no_cdb@0 { + sd2_no_cdb_0 { + groups = "sd2_no_cdb_grp0"; + function = "sd2_no_cdb_m0"; + }; + }; + + sd3_pmx: sd3@0 { + sd3 { + groups = "sd3_grp"; + function = "sd3"; + }; + }; + + sd5_pmx: sd5@0 { + sd5 { + groups = "sd5_grp"; + function = "sd5"; + }; + }; + + sd6_pmx0: sd6@0 { + sd6_0 { + groups = "sd6_grp0"; + function = "sd6_m0"; + }; + }; + + sd6_pmx1: sd6@1 { + sd6_1 { + groups = "sd6_grp1"; + function = "sd6_m1"; + }; + }; + + sp0_ext_ldo_on_pmx: sp0_ext_ldo_on@0 { + sp0_ext_ldo_on { + groups = "sp0_ext_ldo_on_grp"; + function = "sp0_ext_ldo_on"; + }; + }; + + sp0_qspi_pmx: sp0_qspi@0 { + sp0_qspi { + groups = "sp0_qspi_grp"; + function = "sp0_qspi"; + }; + }; + + sp1_spi_pmx: sp1_spi@0 { + sp1_spi { + groups = "sp1_spi_grp"; + function = "sp1_spi"; + }; + }; + + tpiu_trace_pmx: tpiu_trace@0 { + tpiu_trace { + groups = "tpiu_trace_grp"; + function = "tpiu_trace"; + }; + }; + + uart0_pmx: uart0@0 { + uart0 { + groups = "uart0_grp"; + function = "uart0"; + }; + }; + + uart0_nopause_pmx: uart0_nopause@0 { + uart0_nopause { + groups = "uart0_nopause_grp"; + function = "uart0_nopause"; + }; + }; + + uart1_pmx: uart1@0 { + uart1 { + groups = "uart1_grp"; + function = "uart1"; + }; + }; + + uart2_pmx: uart2@0 { + uart2 { + groups = "uart2_grp"; + function = "uart2"; + }; + }; + + uart3_pmx0: uart3@0 { + uart3_0 { + groups = "uart3_grp0"; + function = "uart3_m0"; + }; + }; + + uart3_pmx1: uart3@1 { + uart3_1 { + groups = "uart3_grp1"; + function = "uart3_m1"; + }; + }; + + uart3_pmx2: uart3@2 { + uart3_2 { + groups = "uart3_grp2"; + function = "uart3_m2"; + }; + }; + + uart3_pmx3: uart3@3 { + uart3_3 { + groups = "uart3_grp3"; + function = "uart3_m3"; + }; + }; + + uart3_nopause_pmx0: uart3_nopause@0 { + uart3_nopause_0 { + groups = "uart3_nopause_grp0"; + function = "uart3_nopause_m0"; + }; + }; + + uart3_nopause_pmx1: uart3_nopause@1 { + uart3_nopause_1 { + groups = "uart3_nopause_grp1"; + function = "uart3_nopause_m1"; + }; + }; + + uart4_pmx0: uart4@0 { + uart4_0 { + groups = "uart4_grp0"; + function = "uart4_m0"; + }; + }; + + uart4_pmx1: uart4@1 { + uart4_1 { + groups = "uart4_grp1"; + function = "uart4_m1"; + }; + }; + + uart4_pmx2: uart4@2 { + uart4_2 { + groups = "uart4_grp2"; + function = "uart4_m2"; + }; + }; + + uart4_nopause_pmx: uart4_nopause@0 { + uart4_nopause { + groups = "uart4_nopause_grp"; + function = "uart4_nopause"; + }; + }; + + usb0_drvvbus_pmx: usb0_drvvbus@0 { + usb0_drvvbus { + groups = "usb0_drvvbus_grp"; + function = "usb0_drvvbus"; + }; + }; + + usb1_drvvbus_pmx: usb1_drvvbus@0 { + usb1_drvvbus { + groups = "usb1_drvvbus_grp"; + function = "usb1_drvvbus"; + }; + }; + + visbus_dout_pmx: visbus_dout@0 { + visbus_dout { + groups = "visbus_dout_grp"; + function = "visbus_dout"; + }; + }; + + vi_vip1_pmx: vi_vip1@0 { + vi_vip1 { + groups = "vi_vip1_grp"; + function = "vi_vip1"; + }; + }; + + vi_vip1_ext_pmx: vi_vip1_ext@0 { + vi_vip1_ext { + groups = "vi_vip1_ext_grp"; + function = "vi_vip1_ext"; + }; + }; + + vi_vip1_low8bit_pmx: vi_vip1_low8bit@0 { + vi_vip1_low8bit { + groups = "vi_vip1_low8bit_grp"; + function = "vi_vip1_low8bit"; + }; + }; + + vi_vip1_high8bit_pmx: vi_vip1_high8bit@0 { + vi_vip1_high8bit { + groups = "vi_vip1_high8bit_grp"; + function = "vi_vip1_high8bit"; + }; + }; }; pmipc { @@ -356,6 +1375,12 @@ clock-names = "gpio0_io"; gpio-controller; interrupt-controller; + + gpio-banks = <2>; + gpio-ranges = <&pinctrl 0 0 0>, + <&pinctrl 32 0 0>; + gpio-ranges-group-names = "lvds_gpio_grp", + "uart_nand_gpio_grp"; }; nand@17050000 { @@ -461,11 +1486,22 @@ #interrupt-cells = <2>; compatible = "sirf,atlas7-gpio"; reg = <0x13300000 0x1000>; - interrupts = <0 43 0>, <0 44 0>, <0 45 0>; + interrupts = <0 43 0>, <0 44 0>, + <0 45 0>, <0 46 0>; clocks = <&car 84>; clock-names = "gpio1_io"; gpio-controller; interrupt-controller; + + gpio-banks = <4>; + gpio-ranges = <&pinctrl 0 0 0>, + <&pinctrl 32 0 0>, + <&pinctrl 64 0 0>, + <&pinctrl 96 0 0>; + gpio-ranges-group-names = "gnss_gpio_grp", + "lcd_vip_gpio_grp", + "sdio_i2s_gpio_grp", + "sp_rgmii_gpio_grp"; }; sd2: sdhci@14200000 { @@ -744,6 +1780,10 @@ interrupts = <0 47 0>; gpio-controller; interrupt-controller; + + gpio-banks = <1>; + gpio-ranges = <&pinctrl 0 0 0>; + gpio-ranges-group-names = "rtc_gpio_grp"; }; rtc-iobg@18840000 { diff --git a/arch/arm/boot/dts/sama5d3.dtsi b/arch/arm/boot/dts/sama5d3.dtsi index 5ab7548e04e1..9e2444b07bce 100644 --- a/arch/arm/boot/dts/sama5d3.dtsi +++ b/arch/arm/boot/dts/sama5d3.dtsi @@ -1321,7 +1321,7 @@ usb0: gadget@00500000 { #address-cells = <1>; #size-cells = <0>; - compatible = "atmel,at91sam9rl-udc"; + compatible = "atmel,sama5d3-udc"; reg = <0x00500000 0x100000 0xf8030000 0x4000>; interrupts = <33 IRQ_TYPE_LEVEL_HIGH 2>; diff --git a/arch/arm/boot/dts/sama5d4.dtsi b/arch/arm/boot/dts/sama5d4.dtsi index 653a1f851f2b..3ee22ee13c5a 100644 --- a/arch/arm/boot/dts/sama5d4.dtsi +++ b/arch/arm/boot/dts/sama5d4.dtsi @@ -127,7 +127,7 @@ usb0: gadget@00400000 { #address-cells = <1>; #size-cells = <0>; - compatible = "atmel,at91sam9rl-udc"; + compatible = "atmel,sama5d3-udc"; reg = <0x00400000 0x100000 0xfc02c000 0x4000>; interrupts = <47 IRQ_TYPE_LEVEL_HIGH 2>; diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts index 107395c32d82..17f63f7dfd9e 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts @@ -150,6 +150,16 @@ interface-type = "ace"; reg = <0x5000 0x1000>; }; + + pmu@9000 { + compatible = "arm,cci-400-pmu,r0"; + reg = <0x9000 0x5000>; + interrupts = <0 105 4>, + <0 101 4>, + <0 102 4>, + <0 103 4>, + <0 104 4>; + }; }; memory-controller@7ffd0000 { @@ -187,11 +197,22 @@ <1 10 0xf08>; }; - pmu { + pmu_a15 { compatible = "arm,cortex-a15-pmu"; interrupts = <0 68 4>, <0 69 4>; - interrupt-affinity = <&cpu0>, <&cpu1>; + interrupt-affinity = <&cpu0>, + <&cpu1>; + }; + + pmu_a7 { + compatible = "arm,cortex-a7-pmu"; + interrupts = <0 128 4>, + <0 129 4>, + <0 130 4>; + interrupt-affinity = <&cpu2>, + <&cpu3>, + <&cpu4>; }; oscclk6a: oscclk6a { diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index fd6a6d23bc20..5fd8df6f50ea 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -169,6 +169,7 @@ CONFIG_MTD_BLOCK=y CONFIG_MTD_M25P80=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_ATMEL=y +CONFIG_MTD_NAND_BRCMNAND=y CONFIG_MTD_NAND_DAVINCI=y CONFIG_MTD_SPI_NOR=y CONFIG_MTD_UBI=y @@ -352,7 +353,6 @@ CONFIG_POWER_RESET_AS3722=y CONFIG_POWER_RESET_GPIO=y CONFIG_POWER_RESET_GPIO_RESTART=y CONFIG_POWER_RESET_KEYSTONE=y -CONFIG_POWER_RESET_SUN6I=y CONFIG_POWER_RESET_RMOBILE=y CONFIG_SENSORS_LM90=y CONFIG_SENSORS_LM95245=y diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig index 8ecba00dcd83..7ebc346bf9fa 100644 --- a/arch/arm/configs/sunxi_defconfig +++ b/arch/arm/configs/sunxi_defconfig @@ -2,6 +2,7 @@ CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_BLK_DEV_INITRD=y CONFIG_PERF_EVENTS=y +CONFIG_MODULES=y CONFIG_ARCH_SUNXI=y CONFIG_SMP=y CONFIG_NR_CPUS=8 @@ -77,7 +78,6 @@ CONFIG_SPI_SUN6I=y CONFIG_GPIO_SYSFS=y CONFIG_POWER_SUPPLY=y CONFIG_POWER_RESET=y -CONFIG_POWER_RESET_SUN6I=y CONFIG_THERMAL=y CONFIG_CPU_THERMAL=y CONFIG_WATCHDOG=y @@ -87,6 +87,10 @@ CONFIG_REGULATOR=y CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_REGULATOR_AXP20X=y CONFIG_REGULATOR_GPIO=y +CONFIG_FB=y +CONFIG_FB_SIMPLE=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y CONFIG_USB=y CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_HCD_PLATFORM=y diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index 1c3938f26beb..485982084fe9 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -140,16 +140,11 @@ static inline u32 __raw_readl(const volatile void __iomem *addr) * The _caller variety takes a __builtin_return_address(0) value for * /proc/vmalloc to use - and should only be used in non-inline functions. */ -extern void __iomem *__arm_ioremap_pfn_caller(unsigned long, unsigned long, - size_t, unsigned int, void *); extern void __iomem *__arm_ioremap_caller(phys_addr_t, size_t, unsigned int, void *); - extern void __iomem *__arm_ioremap_pfn(unsigned long, unsigned long, size_t, unsigned int); -extern void __iomem *__arm_ioremap(phys_addr_t, size_t, unsigned int); extern void __iomem *__arm_ioremap_exec(phys_addr_t, size_t, bool cached); extern void __iounmap(volatile void __iomem *addr); -extern void __arm_iounmap(volatile void __iomem *addr); extern void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *); @@ -321,21 +316,24 @@ extern void _memset_io(volatile void __iomem *, int, size_t); static inline void memset_io(volatile void __iomem *dst, unsigned c, size_t count) { - memset((void __force *)dst, c, count); + extern void mmioset(void *, unsigned int, size_t); + mmioset((void __force *)dst, c, count); } #define memset_io(dst,c,count) memset_io(dst,c,count) static inline void memcpy_fromio(void *to, const volatile void __iomem *from, size_t count) { - memcpy(to, (const void __force *)from, count); + extern void mmiocpy(void *, const void *, size_t); + mmiocpy(to, (const void __force *)from, count); } #define memcpy_fromio(to,from,count) memcpy_fromio(to,from,count) static inline void memcpy_toio(volatile void __iomem *to, const void *from, size_t count) { - memcpy((void __force *)to, from, count); + extern void mmiocpy(void *, const void *, size_t); + mmiocpy((void __force *)to, from, count); } #define memcpy_toio(to,from,count) memcpy_toio(to,from,count) @@ -348,18 +346,61 @@ static inline void memcpy_toio(volatile void __iomem *to, const void *from, #endif /* readl */ /* - * ioremap and friends. + * ioremap() and friends. + * + * ioremap() takes a resource address, and size. Due to the ARM memory + * types, it is important to use the correct ioremap() function as each + * mapping has specific properties. + * + * Function Memory type Cacheability Cache hint + * ioremap() Device n/a n/a + * ioremap_nocache() Device n/a n/a + * ioremap_cache() Normal Writeback Read allocate + * ioremap_wc() Normal Non-cacheable n/a + * ioremap_wt() Normal Non-cacheable n/a + * + * All device mappings have the following properties: + * - no access speculation + * - no repetition (eg, on return from an exception) + * - number, order and size of accesses are maintained + * - unaligned accesses are "unpredictable" + * - writes may be delayed before they hit the endpoint device * - * ioremap takes a PCI memory address, as specified in - * Documentation/io-mapping.txt. + * ioremap_nocache() is the same as ioremap() as there are too many device + * drivers using this for device registers, and documentation which tells + * people to use it for such for this to be any different. This is not a + * safe fallback for memory-like mappings, or memory regions where the + * compiler may generate unaligned accesses - eg, via inlining its own + * memcpy. * + * All normal memory mappings have the following properties: + * - reads can be repeated with no side effects + * - repeated reads return the last value written + * - reads can fetch additional locations without side effects + * - writes can be repeated (in certain cases) with no side effects + * - writes can be merged before accessing the target + * - unaligned accesses can be supported + * - ordering is not guaranteed without explicit dependencies or barrier + * instructions + * - writes may be delayed before they hit the endpoint memory + * + * The cache hint is only a performance hint: CPUs may alias these hints. + * Eg, a CPU not implementing read allocate but implementing write allocate + * will provide a write allocate mapping instead. */ -#define ioremap(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE) -#define ioremap_nocache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE) -#define ioremap_cache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_CACHED) -#define ioremap_wc(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_WC) -#define ioremap_wt(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE) -#define iounmap __arm_iounmap +void __iomem *ioremap(resource_size_t res_cookie, size_t size); +#define ioremap ioremap +#define ioremap_nocache ioremap + +void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size); +#define ioremap_cache ioremap_cache + +void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size); +#define ioremap_wc ioremap_wc +#define ioremap_wt ioremap_wc + +void iounmap(volatile void __iomem *iomem_cookie); +#define iounmap iounmap /* * io{read,write}{16,32}be() macros diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 3a72d69b3255..6f225acc07c5 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -275,7 +275,7 @@ static inline void *phys_to_virt(phys_addr_t x) */ #define __pa(x) __virt_to_phys((unsigned long)(x)) #define __va(x) ((void *)__phys_to_virt((phys_addr_t)(x))) -#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) +#define pfn_to_kaddr(pfn) __va((phys_addr_t)(pfn) << PAGE_SHIFT) extern phys_addr_t (*arch_virt_to_idmap)(unsigned long x); diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index bfd662e49a25..aeddd28b3595 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -129,7 +129,36 @@ /* * These are the memory types, defined to be compatible with - * pre-ARMv6 CPUs cacheable and bufferable bits: XXCB + * pre-ARMv6 CPUs cacheable and bufferable bits: n/a,n/a,C,B + * ARMv6+ without TEX remapping, they are a table index. + * ARMv6+ with TEX remapping, they correspond to n/a,TEX(0),C,B + * + * MT type Pre-ARMv6 ARMv6+ type / cacheable status + * UNCACHED Uncached Strongly ordered + * BUFFERABLE Bufferable Normal memory / non-cacheable + * WRITETHROUGH Writethrough Normal memory / write through + * WRITEBACK Writeback Normal memory / write back, read alloc + * MINICACHE Minicache N/A + * WRITEALLOC Writeback Normal memory / write back, write alloc + * DEV_SHARED Uncached Device memory (shared) + * DEV_NONSHARED Uncached Device memory (non-shared) + * DEV_WC Bufferable Normal memory / non-cacheable + * DEV_CACHED Writeback Normal memory / write back, read alloc + * VECTORS Variable Normal memory / variable + * + * All normal memory mappings have the following properties: + * - reads can be repeated with no side effects + * - repeated reads return the last value written + * - reads can fetch additional locations without side effects + * - writes can be repeated (in certain cases) with no side effects + * - writes can be merged before accessing the target + * - unaligned accesses can be supported + * + * All device mappings have the following properties: + * - no access speculation + * - no repetition (eg, on return from an exception) + * - number, order and size of accesses are maintained + * - unaligned accesses are "unpredictable" */ #define L_PTE_MT_UNCACHED (_AT(pteval_t, 0x00) << 2) /* 0000 */ #define L_PTE_MT_BUFFERABLE (_AT(pteval_t, 0x01) << 2) /* 0001 */ diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index a88671cfe1ff..5e5a51a99e68 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -50,6 +50,9 @@ extern void __aeabi_ulcmp(void); extern void fpundefinstr(void); +void mmioset(void *, unsigned int, size_t); +void mmiocpy(void *, const void *, size_t); + /* platform dependent support */ EXPORT_SYMBOL(arm_delay_ops); @@ -88,6 +91,9 @@ EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(memchr); EXPORT_SYMBOL(__memzero); +EXPORT_SYMBOL(mmioset); +EXPORT_SYMBOL(mmiocpy); + #ifdef CONFIG_MMU EXPORT_SYMBOL(copy_page); diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 7dac3086e361..cb4fb1e69778 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -410,7 +410,7 @@ ENDPROC(__fiq_abt) zero_fp .if \trace -#ifdef CONFIG_IRQSOFF_TRACER +#ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off #endif ct_user_exit save = 0 diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 90dfbedfbfb8..3d6b7821cff8 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -578,7 +578,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); if ((unsigned)ipinr < NR_IPI) { - trace_ipi_entry(ipi_types[ipinr]); + trace_ipi_entry_rcuidle(ipi_types[ipinr]); __inc_irq_stat(cpu, ipi_irqs[ipinr]); } @@ -637,7 +637,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) } if ((unsigned)ipinr < NR_IPI) - trace_ipi_exit(ipi_types[ipinr]); + trace_ipi_exit_rcuidle(ipi_types[ipinr]); set_irq_regs(old_regs); } diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S index 7797e81e40e0..64111bd4440b 100644 --- a/arch/arm/lib/memcpy.S +++ b/arch/arm/lib/memcpy.S @@ -61,8 +61,10 @@ /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */ +ENTRY(mmiocpy) ENTRY(memcpy) #include "copy_template.S" ENDPROC(memcpy) +ENDPROC(mmiocpy) diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S index a4ee97b5a2bf..3c65e3bd790f 100644 --- a/arch/arm/lib/memset.S +++ b/arch/arm/lib/memset.S @@ -16,6 +16,7 @@ .text .align 5 +ENTRY(mmioset) ENTRY(memset) UNWIND( .fnstart ) ands r3, r0, #3 @ 1 unaligned? @@ -133,3 +134,4 @@ UNWIND( .fnstart ) b 1b UNWIND( .fnend ) ENDPROC(memset) +ENDPROC(mmioset) diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig index e9184feffc4e..0ac9e4b3b265 100644 --- a/arch/arm/mach-bcm/Kconfig +++ b/arch/arm/mach-bcm/Kconfig @@ -19,7 +19,6 @@ config ARCH_BCM_IPROC select ARCH_REQUIRE_GPIOLIB select ARM_AMBA select PINCTRL - select MTD_NAND_BRCMNAND help This enables support for systems based on Broadcom IPROC architected SoCs. The IPROC complex contains one or more ARM CPUs along with common diff --git a/arch/arm/mach-dove/include/mach/irqs.h b/arch/arm/mach-dove/include/mach/irqs.h index 03d401d20453..3f29e6bca058 100644 --- a/arch/arm/mach-dove/include/mach/irqs.h +++ b/arch/arm/mach-dove/include/mach/irqs.h @@ -14,73 +14,73 @@ /* * Dove Low Interrupt Controller */ -#define IRQ_DOVE_BRIDGE 0 -#define IRQ_DOVE_H2C 1 -#define IRQ_DOVE_C2H 2 -#define IRQ_DOVE_NAND 3 -#define IRQ_DOVE_PDMA 4 -#define IRQ_DOVE_SPI1 5 -#define IRQ_DOVE_SPI0 6 -#define IRQ_DOVE_UART_0 7 -#define IRQ_DOVE_UART_1 8 -#define IRQ_DOVE_UART_2 9 -#define IRQ_DOVE_UART_3 10 -#define IRQ_DOVE_I2C 11 -#define IRQ_DOVE_GPIO_0_7 12 -#define IRQ_DOVE_GPIO_8_15 13 -#define IRQ_DOVE_GPIO_16_23 14 -#define IRQ_DOVE_PCIE0_ERR 15 -#define IRQ_DOVE_PCIE0 16 -#define IRQ_DOVE_PCIE1_ERR 17 -#define IRQ_DOVE_PCIE1 18 -#define IRQ_DOVE_I2S0 19 -#define IRQ_DOVE_I2S0_ERR 20 -#define IRQ_DOVE_I2S1 21 -#define IRQ_DOVE_I2S1_ERR 22 -#define IRQ_DOVE_USB_ERR 23 -#define IRQ_DOVE_USB0 24 -#define IRQ_DOVE_USB1 25 -#define IRQ_DOVE_GE00_RX 26 -#define IRQ_DOVE_GE00_TX 27 -#define IRQ_DOVE_GE00_MISC 28 -#define IRQ_DOVE_GE00_SUM 29 -#define IRQ_DOVE_GE00_ERR 30 -#define IRQ_DOVE_CRYPTO 31 +#define IRQ_DOVE_BRIDGE (1 + 0) +#define IRQ_DOVE_H2C (1 + 1) +#define IRQ_DOVE_C2H (1 + 2) +#define IRQ_DOVE_NAND (1 + 3) +#define IRQ_DOVE_PDMA (1 + 4) +#define IRQ_DOVE_SPI1 (1 + 5) +#define IRQ_DOVE_SPI0 (1 + 6) +#define IRQ_DOVE_UART_0 (1 + 7) +#define IRQ_DOVE_UART_1 (1 + 8) +#define IRQ_DOVE_UART_2 (1 + 9) +#define IRQ_DOVE_UART_3 (1 + 10) +#define IRQ_DOVE_I2C (1 + 11) +#define IRQ_DOVE_GPIO_0_7 (1 + 12) +#define IRQ_DOVE_GPIO_8_15 (1 + 13) +#define IRQ_DOVE_GPIO_16_23 (1 + 14) +#define IRQ_DOVE_PCIE0_ERR (1 + 15) +#define IRQ_DOVE_PCIE0 (1 + 16) +#define IRQ_DOVE_PCIE1_ERR (1 + 17) +#define IRQ_DOVE_PCIE1 (1 + 18) +#define IRQ_DOVE_I2S0 (1 + 19) +#define IRQ_DOVE_I2S0_ERR (1 + 20) +#define IRQ_DOVE_I2S1 (1 + 21) +#define IRQ_DOVE_I2S1_ERR (1 + 22) +#define IRQ_DOVE_USB_ERR (1 + 23) +#define IRQ_DOVE_USB0 (1 + 24) +#define IRQ_DOVE_USB1 (1 + 25) +#define IRQ_DOVE_GE00_RX (1 + 26) +#define IRQ_DOVE_GE00_TX (1 + 27) +#define IRQ_DOVE_GE00_MISC (1 + 28) +#define IRQ_DOVE_GE00_SUM (1 + 29) +#define IRQ_DOVE_GE00_ERR (1 + 30) +#define IRQ_DOVE_CRYPTO (1 + 31) /* * Dove High Interrupt Controller */ -#define IRQ_DOVE_AC97 32 -#define IRQ_DOVE_PMU 33 -#define IRQ_DOVE_CAM 34 -#define IRQ_DOVE_SDIO0 35 -#define IRQ_DOVE_SDIO1 36 -#define IRQ_DOVE_SDIO0_WAKEUP 37 -#define IRQ_DOVE_SDIO1_WAKEUP 38 -#define IRQ_DOVE_XOR_00 39 -#define IRQ_DOVE_XOR_01 40 -#define IRQ_DOVE_XOR0_ERR 41 -#define IRQ_DOVE_XOR_10 42 -#define IRQ_DOVE_XOR_11 43 -#define IRQ_DOVE_XOR1_ERR 44 -#define IRQ_DOVE_LCD_DCON 45 -#define IRQ_DOVE_LCD1 46 -#define IRQ_DOVE_LCD0 47 -#define IRQ_DOVE_GPU 48 -#define IRQ_DOVE_PERFORM_MNTR 49 -#define IRQ_DOVE_VPRO_DMA1 51 -#define IRQ_DOVE_SSP_TIMER 54 -#define IRQ_DOVE_SSP 55 -#define IRQ_DOVE_MC_L2_ERR 56 -#define IRQ_DOVE_CRYPTO_ERR 59 -#define IRQ_DOVE_GPIO_24_31 60 -#define IRQ_DOVE_HIGH_GPIO 61 -#define IRQ_DOVE_SATA 62 +#define IRQ_DOVE_AC97 (1 + 32) +#define IRQ_DOVE_PMU (1 + 33) +#define IRQ_DOVE_CAM (1 + 34) +#define IRQ_DOVE_SDIO0 (1 + 35) +#define IRQ_DOVE_SDIO1 (1 + 36) +#define IRQ_DOVE_SDIO0_WAKEUP (1 + 37) +#define IRQ_DOVE_SDIO1_WAKEUP (1 + 38) +#define IRQ_DOVE_XOR_00 (1 + 39) +#define IRQ_DOVE_XOR_01 (1 + 40) +#define IRQ_DOVE_XOR0_ERR (1 + 41) +#define IRQ_DOVE_XOR_10 (1 + 42) +#define IRQ_DOVE_XOR_11 (1 + 43) +#define IRQ_DOVE_XOR1_ERR (1 + 44) +#define IRQ_DOVE_LCD_DCON (1 + 45) +#define IRQ_DOVE_LCD1 (1 + 46) +#define IRQ_DOVE_LCD0 (1 + 47) +#define IRQ_DOVE_GPU (1 + 48) +#define IRQ_DOVE_PERFORM_MNTR (1 + 49) +#define IRQ_DOVE_VPRO_DMA1 (1 + 51) +#define IRQ_DOVE_SSP_TIMER (1 + 54) +#define IRQ_DOVE_SSP (1 + 55) +#define IRQ_DOVE_MC_L2_ERR (1 + 56) +#define IRQ_DOVE_CRYPTO_ERR (1 + 59) +#define IRQ_DOVE_GPIO_24_31 (1 + 60) +#define IRQ_DOVE_HIGH_GPIO (1 + 61) +#define IRQ_DOVE_SATA (1 + 62) /* * DOVE General Purpose Pins */ -#define IRQ_DOVE_GPIO_START 64 +#define IRQ_DOVE_GPIO_START 65 #define NR_GPIO_IRQS 64 /* diff --git a/arch/arm/mach-dove/irq.c b/arch/arm/mach-dove/irq.c index 4a5a7aedcb76..df0223f76fa9 100644 --- a/arch/arm/mach-dove/irq.c +++ b/arch/arm/mach-dove/irq.c @@ -126,14 +126,14 @@ __exception_irq_entry dove_legacy_handle_irq(struct pt_regs *regs) stat = readl_relaxed(dove_irq_base + IRQ_CAUSE_LOW_OFF); stat &= readl_relaxed(dove_irq_base + IRQ_MASK_LOW_OFF); if (stat) { - unsigned int hwirq = __fls(stat); + unsigned int hwirq = 1 + __fls(stat); handle_IRQ(hwirq, regs); return; } stat = readl_relaxed(dove_irq_base + IRQ_CAUSE_HIGH_OFF); stat &= readl_relaxed(dove_irq_base + IRQ_MASK_HIGH_OFF); if (stat) { - unsigned int hwirq = 32 + __fls(stat); + unsigned int hwirq = 33 + __fls(stat); handle_IRQ(hwirq, regs); return; } @@ -144,8 +144,8 @@ void __init dove_init_irq(void) { int i; - orion_irq_init(0, IRQ_VIRT_BASE + IRQ_MASK_LOW_OFF); - orion_irq_init(32, IRQ_VIRT_BASE + IRQ_MASK_HIGH_OFF); + orion_irq_init(1, IRQ_VIRT_BASE + IRQ_MASK_LOW_OFF); + orion_irq_init(33, IRQ_VIRT_BASE + IRQ_MASK_HIGH_OFF); #ifdef CONFIG_MULTI_IRQ_HANDLER set_handle_irq(dove_legacy_handle_irq); diff --git a/arch/arm/mach-mvebu/pm-board.c b/arch/arm/mach-mvebu/pm-board.c index 6dfd4ab97b2a..301ab38d38ba 100644 --- a/arch/arm/mach-mvebu/pm-board.c +++ b/arch/arm/mach-mvebu/pm-board.c @@ -43,6 +43,9 @@ static void mvebu_armada_xp_gp_pm_enter(void __iomem *sdram_reg, u32 srcmd) for (i = 0; i < ARMADA_XP_GP_PIC_NR_GPIOS; i++) ackcmd |= BIT(pic_raw_gpios[i]); + srcmd = cpu_to_le32(srcmd); + ackcmd = cpu_to_le32(ackcmd); + /* * Wait a while, the PIC needs quite a bit of time between the * two GPIO commands. diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index f1a68c63dc99..903c85be2897 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -274,8 +274,5 @@ obj-y += $(nand-m) $(nand-y) smsc911x-$(CONFIG_SMSC911X) := gpmc-smsc911x.o obj-y += $(smsc911x-m) $(smsc911x-y) -ifneq ($(CONFIG_HWSPINLOCK_OMAP),) -obj-y += hwspinlock.o -endif obj-y += common-board-devices.o twl-common.o dss-common.o diff --git a/arch/arm/mach-omap2/dma.c b/arch/arm/mach-omap2/dma.c index e1a56d87599e..1ed4be184a29 100644 --- a/arch/arm/mach-omap2/dma.c +++ b/arch/arm/mach-omap2/dma.c @@ -117,7 +117,6 @@ static void omap2_show_dma_caps(void) u8 revision = dma_read(REVISION, 0) & 0xff; printk(KERN_INFO "OMAP DMA hardware revision %d.%d\n", revision >> 4, revision & 0xf); - return; } static unsigned configure_dma_errata(void) diff --git a/arch/arm/mach-omap2/hwspinlock.c b/arch/arm/mach-omap2/hwspinlock.c deleted file mode 100644 index ef175acaeaa2..000000000000 --- a/arch/arm/mach-omap2/hwspinlock.c +++ /dev/null @@ -1,60 +0,0 @@ -/* - * OMAP hardware spinlock device initialization - * - * Copyright (C) 2010 Texas Instruments Incorporated - http://www.ti.com - * - * Contact: Simon Que <sque@ti.com> - * Hari Kanigeri <h-kanigeri2@ti.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/err.h> -#include <linux/hwspinlock.h> - -#include "soc.h" -#include "omap_hwmod.h" -#include "omap_device.h" - -static struct hwspinlock_pdata omap_hwspinlock_pdata __initdata = { - .base_id = 0, -}; - -static int __init hwspinlocks_init(void) -{ - int retval = 0; - struct omap_hwmod *oh; - struct platform_device *pdev; - const char *oh_name = "spinlock"; - const char *dev_name = "omap_hwspinlock"; - - /* - * Hwmod lookup will fail in case our platform doesn't support the - * hardware spinlock module, so it is safe to run this initcall - * on all omaps - */ - oh = omap_hwmod_lookup(oh_name); - if (oh == NULL) - return -EINVAL; - - pdev = omap_device_build(dev_name, 0, oh, &omap_hwspinlock_pdata, - sizeof(struct hwspinlock_pdata)); - if (IS_ERR(pdev)) { - pr_err("Can't build omap_device for %s:%s\n", dev_name, - oh_name); - retval = PTR_ERR(pdev); - } - - return retval; -} -/* early board code might need to reserve specific hwspinlock instances */ -omap_postcore_initcall(hwspinlocks_init); diff --git a/arch/arm/mach-prima2/Kconfig b/arch/arm/mach-prima2/Kconfig index e03d8b5c9ad0..9ab8932403e5 100644 --- a/arch/arm/mach-prima2/Kconfig +++ b/arch/arm/mach-prima2/Kconfig @@ -4,6 +4,7 @@ menuconfig ARCH_SIRF select ARCH_REQUIRE_GPIOLIB select GENERIC_IRQ_CHIP select NO_IOPORT_MAP + select REGMAP select PINCTRL select PINCTRL_SIRF help diff --git a/arch/arm/mach-prima2/rtciobrg.c b/arch/arm/mach-prima2/rtciobrg.c index 8f66d8f7ca75..d4852d24dc7d 100644 --- a/arch/arm/mach-prima2/rtciobrg.c +++ b/arch/arm/mach-prima2/rtciobrg.c @@ -1,5 +1,5 @@ /* - * RTC I/O Bridge interfaces for CSR SiRFprimaII + * RTC I/O Bridge interfaces for CSR SiRFprimaII/atlas7 * ARM access the registers of SYSRTC, GPSRTC and PWRC through this module * * Copyright (c) 2011 Cambridge Silicon Radio Limited, a CSR plc group company. @@ -10,6 +10,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/io.h> +#include <linux/regmap.h> #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_device.h> @@ -66,6 +67,7 @@ u32 sirfsoc_rtc_iobrg_readl(u32 addr) { unsigned long flags, val; + /* TODO: add hwspinlock to sync with M3 */ spin_lock_irqsave(&rtciobrg_lock, flags); val = __sirfsoc_rtc_iobrg_readl(addr); @@ -90,6 +92,7 @@ void sirfsoc_rtc_iobrg_writel(u32 val, u32 addr) { unsigned long flags; + /* TODO: add hwspinlock to sync with M3 */ spin_lock_irqsave(&rtciobrg_lock, flags); sirfsoc_rtc_iobrg_pre_writel(val, addr); @@ -102,6 +105,45 @@ void sirfsoc_rtc_iobrg_writel(u32 val, u32 addr) } EXPORT_SYMBOL_GPL(sirfsoc_rtc_iobrg_writel); + +static int regmap_iobg_regwrite(void *context, unsigned int reg, + unsigned int val) +{ + sirfsoc_rtc_iobrg_writel(val, reg); + return 0; +} + +static int regmap_iobg_regread(void *context, unsigned int reg, + unsigned int *val) +{ + *val = (u32)sirfsoc_rtc_iobrg_readl(reg); + return 0; +} + +static struct regmap_bus regmap_iobg = { + .reg_write = regmap_iobg_regwrite, + .reg_read = regmap_iobg_regread, +}; + +/** + * devm_regmap_init_iobg(): Initialise managed register map + * + * @iobg: Device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer + * to a struct regmap. The regmap will be automatically freed by the + * device management code. + */ +struct regmap *devm_regmap_init_iobg(struct device *dev, + const struct regmap_config *config) +{ + const struct regmap_bus *bus = ®map_iobg; + + return devm_regmap_init(dev, bus, dev, config); +} +EXPORT_SYMBOL_GPL(devm_regmap_init_iobg); + static const struct of_device_id rtciobrg_ids[] = { { .compatible = "sirf,prima2-rtciobg" }, {} @@ -132,7 +174,7 @@ static int __init sirfsoc_rtciobrg_init(void) } postcore_initcall(sirfsoc_rtciobrg_init); -MODULE_AUTHOR("Zhiwu Song <zhiwu.song@csr.com>, " - "Barry Song <baohua.song@csr.com>"); +MODULE_AUTHOR("Zhiwu Song <zhiwu.song@csr.com>"); +MODULE_AUTHOR("Barry Song <baohua.song@csr.com>"); MODULE_DESCRIPTION("CSR SiRFprimaII rtc io bridge"); MODULE_LICENSE("GPL v2"); diff --git a/arch/arm/mach-sunxi/Kconfig b/arch/arm/mach-sunxi/Kconfig index 81502b90dd91..4efe2d43a126 100644 --- a/arch/arm/mach-sunxi/Kconfig +++ b/arch/arm/mach-sunxi/Kconfig @@ -35,7 +35,7 @@ config MACH_SUN7I select SUN5I_HSTIMER config MACH_SUN8I - bool "Allwinner A23 (sun8i) SoCs support" + bool "Allwinner sun8i Family SoCs support" default ARCH_SUNXI select ARM_GIC select MFD_SUN6I_PRCM diff --git a/arch/arm/mach-sunxi/sunxi.c b/arch/arm/mach-sunxi/sunxi.c index 1bc811a74a9f..65bab2876343 100644 --- a/arch/arm/mach-sunxi/sunxi.c +++ b/arch/arm/mach-sunxi/sunxi.c @@ -67,10 +67,13 @@ MACHINE_END static const char * const sun8i_board_dt_compat[] = { "allwinner,sun8i-a23", + "allwinner,sun8i-a33", + "allwinner,sun8i-h3", NULL, }; -DT_MACHINE_START(SUN8I_DT, "Allwinner sun8i (A23) Family") +DT_MACHINE_START(SUN8I_DT, "Allwinner sun8i Family") + .init_time = sun6i_timer_init, .dt_compat = sun8i_board_dt_compat, .init_late = sunxi_dt_cpufreq_init, MACHINE_END diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index d1e5ad7ab3bc..0c81056c1dd7 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -255,7 +255,7 @@ remap_area_supersections(unsigned long virt, unsigned long pfn, } #endif -void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, +static void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, unsigned long offset, size_t size, unsigned int mtype, void *caller) { const struct mem_type *type; @@ -363,7 +363,7 @@ __arm_ioremap_pfn(unsigned long pfn, unsigned long offset, size_t size, unsigned int mtype) { return __arm_ioremap_pfn_caller(pfn, offset, size, mtype, - __builtin_return_address(0)); + __builtin_return_address(0)); } EXPORT_SYMBOL(__arm_ioremap_pfn); @@ -371,13 +371,26 @@ void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *) = __arm_ioremap_caller; -void __iomem * -__arm_ioremap(phys_addr_t phys_addr, size_t size, unsigned int mtype) +void __iomem *ioremap(resource_size_t res_cookie, size_t size) +{ + return arch_ioremap_caller(res_cookie, size, MT_DEVICE, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap); + +void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size) +{ + return arch_ioremap_caller(res_cookie, size, MT_DEVICE_CACHED, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_cache); + +void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size) { - return arch_ioremap_caller(phys_addr, size, mtype, - __builtin_return_address(0)); + return arch_ioremap_caller(res_cookie, size, MT_DEVICE_WC, + __builtin_return_address(0)); } -EXPORT_SYMBOL(__arm_ioremap); +EXPORT_SYMBOL(ioremap_wc); /* * Remap an arbitrary physical address space into the kernel virtual @@ -431,11 +444,11 @@ void __iounmap(volatile void __iomem *io_addr) void (*arch_iounmap)(volatile void __iomem *) = __iounmap; -void __arm_iounmap(volatile void __iomem *io_addr) +void iounmap(volatile void __iomem *cookie) { - arch_iounmap(io_addr); + arch_iounmap(cookie); } -EXPORT_SYMBOL(__arm_iounmap); +EXPORT_SYMBOL(iounmap); #ifdef CONFIG_PCI static int pci_ioremap_mem_type = MT_DEVICE; diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 6ca7d9aa896f..870838a46d52 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1072,6 +1072,7 @@ void __init sanity_check_meminfo(void) int highmem = 0; phys_addr_t vmalloc_limit = __pa(vmalloc_min - 1) + 1; struct memblock_region *reg; + bool should_use_highmem = false; for_each_memblock(memory, reg) { phys_addr_t block_start = reg->base; @@ -1090,6 +1091,7 @@ void __init sanity_check_meminfo(void) pr_notice("Ignoring RAM at %pa-%pa (!CONFIG_HIGHMEM)\n", &block_start, &block_end); memblock_remove(reg->base, reg->size); + should_use_highmem = true; continue; } @@ -1100,6 +1102,7 @@ void __init sanity_check_meminfo(void) &block_start, &block_end, &vmalloc_limit); memblock_remove(vmalloc_limit, overlap_size); block_end = vmalloc_limit; + should_use_highmem = true; } } @@ -1134,6 +1137,9 @@ void __init sanity_check_meminfo(void) } } + if (should_use_highmem) + pr_notice("Consider using a HIGHMEM enabled kernel.\n"); + high_memory = __va(arm_lowmem_limit - 1) + 1; /* @@ -1494,6 +1500,7 @@ void __init paging_init(const struct machine_desc *mdesc) build_mem_type_table(); prepare_page_table(); map_lowmem(); + memblock_set_current_limit(arm_lowmem_limit); dma_contiguous_remap(); devicemaps_init(mdesc); kmap_init(); diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index afd7e05d95f1..1dd10936d68d 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -351,30 +351,43 @@ void __iomem *__arm_ioremap_pfn(unsigned long pfn, unsigned long offset, } EXPORT_SYMBOL(__arm_ioremap_pfn); -void __iomem *__arm_ioremap_pfn_caller(unsigned long pfn, unsigned long offset, - size_t size, unsigned int mtype, void *caller) +void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size, + unsigned int mtype, void *caller) { - return __arm_ioremap_pfn(pfn, offset, size, mtype); + return (void __iomem *)phys_addr; } -void __iomem *__arm_ioremap(phys_addr_t phys_addr, size_t size, - unsigned int mtype) +void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *); + +void __iomem *ioremap(resource_size_t res_cookie, size_t size) { - return (void __iomem *)phys_addr; + return __arm_ioremap_caller(res_cookie, size, MT_DEVICE, + __builtin_return_address(0)); } -EXPORT_SYMBOL(__arm_ioremap); +EXPORT_SYMBOL(ioremap); -void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *); +void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size) +{ + return __arm_ioremap_caller(res_cookie, size, MT_DEVICE_CACHED, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_cache); -void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size, - unsigned int mtype, void *caller) +void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size) +{ + return __arm_ioremap_caller(res_cookie, size, MT_DEVICE_WC, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_wc); + +void __iounmap(volatile void __iomem *addr) { - return __arm_ioremap(phys_addr, size, mtype); } +EXPORT_SYMBOL(__iounmap); void (*arch_iounmap)(volatile void __iomem *); -void __arm_iounmap(volatile void __iomem *addr) +void iounmap(volatile void __iomem *addr) { } -EXPORT_SYMBOL(__arm_iounmap); +EXPORT_SYMBOL(iounmap); diff --git a/arch/arm/vdso/vdsomunge.c b/arch/arm/vdso/vdsomunge.c index 9005b07296c8..aedec81d1198 100644 --- a/arch/arm/vdso/vdsomunge.c +++ b/arch/arm/vdso/vdsomunge.c @@ -45,13 +45,11 @@ * it does. */ -#define _GNU_SOURCE - #include <byteswap.h> #include <elf.h> #include <errno.h> -#include <error.h> #include <fcntl.h> +#include <stdarg.h> #include <stdbool.h> #include <stdio.h> #include <stdlib.h> @@ -82,11 +80,25 @@ #define EF_ARM_ABI_FLOAT_HARD 0x400 #endif +static int failed; +static const char *argv0; static const char *outfile; +static void fail(const char *fmt, ...) +{ + va_list ap; + + failed = 1; + fprintf(stderr, "%s: ", argv0); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(EXIT_FAILURE); +} + static void cleanup(void) { - if (error_message_count > 0 && outfile != NULL) + if (failed && outfile != NULL) unlink(outfile); } @@ -119,68 +131,66 @@ int main(int argc, char **argv) int infd; atexit(cleanup); + argv0 = argv[0]; if (argc != 3) - error(EXIT_FAILURE, 0, "Usage: %s [infile] [outfile]", argv[0]); + fail("Usage: %s [infile] [outfile]\n", argv[0]); infile = argv[1]; outfile = argv[2]; infd = open(infile, O_RDONLY); if (infd < 0) - error(EXIT_FAILURE, errno, "Cannot open %s", infile); + fail("Cannot open %s: %s\n", infile, strerror(errno)); if (fstat(infd, &stat) != 0) - error(EXIT_FAILURE, errno, "Failed stat for %s", infile); + fail("Failed stat for %s: %s\n", infile, strerror(errno)); inbuf = mmap(NULL, stat.st_size, PROT_READ, MAP_PRIVATE, infd, 0); if (inbuf == MAP_FAILED) - error(EXIT_FAILURE, errno, "Failed to map %s", infile); + fail("Failed to map %s: %s\n", infile, strerror(errno)); close(infd); inhdr = inbuf; if (memcmp(&inhdr->e_ident, ELFMAG, SELFMAG) != 0) - error(EXIT_FAILURE, 0, "Not an ELF file"); + fail("Not an ELF file\n"); if (inhdr->e_ident[EI_CLASS] != ELFCLASS32) - error(EXIT_FAILURE, 0, "Unsupported ELF class"); + fail("Unsupported ELF class\n"); swap = inhdr->e_ident[EI_DATA] != HOST_ORDER; if (read_elf_half(inhdr->e_type, swap) != ET_DYN) - error(EXIT_FAILURE, 0, "Not a shared object"); + fail("Not a shared object\n"); - if (read_elf_half(inhdr->e_machine, swap) != EM_ARM) { - error(EXIT_FAILURE, 0, "Unsupported architecture %#x", - inhdr->e_machine); - } + if (read_elf_half(inhdr->e_machine, swap) != EM_ARM) + fail("Unsupported architecture %#x\n", inhdr->e_machine); e_flags = read_elf_word(inhdr->e_flags, swap); if (EF_ARM_EABI_VERSION(e_flags) != EF_ARM_EABI_VER5) { - error(EXIT_FAILURE, 0, "Unsupported EABI version %#x", - EF_ARM_EABI_VERSION(e_flags)); + fail("Unsupported EABI version %#x\n", + EF_ARM_EABI_VERSION(e_flags)); } if (e_flags & EF_ARM_ABI_FLOAT_HARD) - error(EXIT_FAILURE, 0, - "Unexpected hard-float flag set in e_flags"); + fail("Unexpected hard-float flag set in e_flags\n"); clear_soft_float = !!(e_flags & EF_ARM_ABI_FLOAT_SOFT); outfd = open(outfile, O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR); if (outfd < 0) - error(EXIT_FAILURE, errno, "Cannot open %s", outfile); + fail("Cannot open %s: %s\n", outfile, strerror(errno)); if (ftruncate(outfd, stat.st_size) != 0) - error(EXIT_FAILURE, errno, "Cannot truncate %s", outfile); + fail("Cannot truncate %s: %s\n", outfile, strerror(errno)); outbuf = mmap(NULL, stat.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, outfd, 0); if (outbuf == MAP_FAILED) - error(EXIT_FAILURE, errno, "Failed to map %s", outfile); + fail("Failed to map %s: %s\n", outfile, strerror(errno)); close(outfd); @@ -195,7 +205,7 @@ int main(int argc, char **argv) } if (msync(outbuf, stat.st_size, MS_SYNC) != 0) - error(EXIT_FAILURE, errno, "Failed to sync %s", outfile); + fail("Failed to sync %s: %s\n", outfile, strerror(errno)); return EXIT_SUCCESS; } diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0f6edb14b7e4..318175f62c24 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -23,9 +23,9 @@ config ARM64 select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK - select EDAC_SUPPORT select CPU_PM if (SUSPEND || CPU_IDLE) select DCACHE_WORD_ACCESS + select EDAC_SUPPORT select GENERIC_ALLOCATOR select GENERIC_CLOCKEVENTS select GENERIC_CLOCKEVENTS_BROADCAST if SMP diff --git a/arch/arm64/boot/dts/apm/apm-mustang.dts b/arch/arm64/boot/dts/apm/apm-mustang.dts index 83578e766b94..4c55833d8a41 100644 --- a/arch/arm64/boot/dts/apm/apm-mustang.dts +++ b/arch/arm64/boot/dts/apm/apm-mustang.dts @@ -23,6 +23,16 @@ device_type = "memory"; reg = < 0x1 0x00000000 0x0 0x80000000 >; /* Updated by bootloader */ }; + + gpio-keys { + compatible = "gpio-keys"; + button@1 { + label = "POWER"; + linux,code = <116>; + linux,input-type = <0x1>; + interrupts = <0x0 0x2d 0x1>; + }; + }; }; &pcie0clk { diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi index 0bb287ca0a98..0689c3fb56e3 100644 --- a/arch/arm64/boot/dts/apm/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi @@ -717,6 +717,19 @@ phy-names = "sata-phy"; }; + sbgpio: sbgpio@17001000{ + compatible = "apm,xgene-gpio-sb"; + reg = <0x0 0x17001000 0x0 0x400>; + #gpio-cells = <2>; + gpio-controller; + interrupts = <0x0 0x28 0x1>, + <0x0 0x29 0x1>, + <0x0 0x2a 0x1>, + <0x0 0x2b 0x1>, + <0x0 0x2c 0x1>, + <0x0 0x2d 0x1>; + }; + rtc: rtc@10510000 { compatible = "apm,xgene-rtc"; reg = <0x0 0x10510000 0x0 0x400>; diff --git a/arch/arm64/boot/dts/arm/Makefile b/arch/arm64/boot/dts/arm/Makefile index c5c98b91514e..bb3c07209676 100644 --- a/arch/arm64/boot/dts/arm/Makefile +++ b/arch/arm64/boot/dts/arm/Makefile @@ -1,6 +1,7 @@ dtb-$(CONFIG_ARCH_VEXPRESS) += foundation-v8.dtb dtb-$(CONFIG_ARCH_VEXPRESS) += juno.dtb juno-r1.dtb dtb-$(CONFIG_ARCH_VEXPRESS) += rtsm_ve-aemv8a.dtb +dtb-$(CONFIG_ARCH_VEXPRESS) += vexpress-v2f-1xv7-ca53x2.dtb always := $(dtb-y) subdir-y := $(dts-dirs) diff --git a/arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts b/arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts new file mode 100644 index 000000000000..5b1d0181023b --- /dev/null +++ b/arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts @@ -0,0 +1,191 @@ +/* + * ARM Ltd. Versatile Express + * + * LogicTile Express 20MG + * V2F-1XV7 + * + * Cortex-A53 (2 cores) Soft Macrocell Model + * + * HBI-0247C + */ + +/dts-v1/; + +#include <dt-bindings/interrupt-controller/arm-gic.h> + +/ { + model = "V2F-1XV7 Cortex-A53x2 SMM"; + arm,hbi = <0x247>; + arm,vexpress,site = <0xf>; + compatible = "arm,vexpress,v2f-1xv7,ca53x2", "arm,vexpress,v2f-1xv7", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + chosen { + stdout-path = "serial0:38400n8"; + }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + i2c0 = &v2m_i2c_dvi; + i2c1 = &v2m_i2c_pcie; + }; + + cpus { + #address-cells = <2>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a53", "arm,armv8"; + reg = <0 0>; + next-level-cache = <&L2_0>; + }; + + cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a53", "arm,armv8"; + reg = <0 1>; + next-level-cache = <&L2_0>; + }; + + L2_0: l2-cache0 { + compatible = "cache"; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0 0x80000000 0 0x80000000>; /* 2GB @ 2GB */ + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,gic-400"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0 0x2c001000 0 0x1000>, + <0 0x2c002000 0 0x2000>, + <0 0x2c004000 0 0x2000>, + <0 0x2c006000 0 0x2000>; + interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_HIGH)>; + }; + + timer { + compatible = "arm,armv8-timer"; + interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>, + <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>, + <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>, + <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>; + }; + + pmu { + compatible = "arm,armv8-pmuv3"; + interrupts = <GIC_SPI 68 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + smbclk: osc@4 { + /* SMC clock */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 4>; + freq-range = <40000000 40000000>; + #clock-cells = <0>; + clock-output-names = "smclk"; + }; + + volt@0 { + /* VIO to expansion board above */ + compatible = "arm,vexpress-volt"; + arm,vexpress-sysreg,func = <2 0>; + regulator-name = "VIO_UP"; + regulator-min-microvolt = <800000>; + regulator-max-microvolt = <1800000>; + regulator-always-on; + }; + + volt@1 { + /* 12V from power connector J6 */ + compatible = "arm,vexpress-volt"; + arm,vexpress-sysreg,func = <2 1>; + regulator-name = "12"; + regulator-always-on; + }; + + temp@0 { + /* FPGA temperature */ + compatible = "arm,vexpress-temp"; + arm,vexpress-sysreg,func = <4 0>; + label = "FPGA"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>, + <0 0 1 &gic GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>, + <0 0 2 &gic GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>, + <0 0 3 &gic GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>, + <0 0 4 &gic GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>, + <0 0 5 &gic GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>, + <0 0 6 &gic GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>, + <0 0 7 &gic GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>, + <0 0 8 &gic GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, + <0 0 9 &gic GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>, + <0 0 10 &gic GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>, + <0 0 11 &gic GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>, + <0 0 12 &gic GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>, + <0 0 13 &gic GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>, + <0 0 14 &gic GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>, + <0 0 15 &gic GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>, + <0 0 16 &gic GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>, + <0 0 17 &gic GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>, + <0 0 18 &gic GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>, + <0 0 19 &gic GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>, + <0 0 20 &gic GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>, + <0 0 21 &gic GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>, + <0 0 22 &gic GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>, + <0 0 23 &gic GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>, + <0 0 24 &gic GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>, + <0 0 25 &gic GIC_SPI 25 IRQ_TYPE_LEVEL_HIGH>, + <0 0 26 &gic GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>, + <0 0 27 &gic GIC_SPI 27 IRQ_TYPE_LEVEL_HIGH>, + <0 0 28 &gic GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>, + <0 0 29 &gic GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>, + <0 0 30 &gic GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>, + <0 0 31 &gic GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>, + <0 0 32 &gic GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>, + <0 0 33 &gic GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>, + <0 0 34 &gic GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>, + <0 0 35 &gic GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>, + <0 0 36 &gic GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>, + <0 0 37 &gic GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>, + <0 0 38 &gic GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>, + <0 0 39 &gic GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>, + <0 0 40 &gic GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>, + <0 0 41 &gic GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>, + <0 0 42 &gic GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>; + + /include/ "../../../../arm/boot/dts/vexpress-v2m-rs1.dtsi" + }; +}; diff --git a/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi b/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi index d8c0bdc51882..9cb7cf94284a 100644 --- a/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi +++ b/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi @@ -376,10 +376,19 @@ gic0: interrupt-controller@8010,00000000 { compatible = "arm,gic-v3"; #interrupt-cells = <3>; + #address-cells = <2>; + #size-cells = <2>; + ranges; interrupt-controller; reg = <0x8010 0x00000000 0x0 0x010000>, /* GICD */ <0x8010 0x80000000 0x0 0x600000>; /* GICR */ interrupts = <1 9 0xf04>; + + its: gic-its@8010,00020000 { + compatible = "arm,gic-v3-its"; + msi-controller; + reg = <0x8010 0x20000 0x0 0x200000>; + }; }; uaa0: serial@87e0,24000000 { diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index f38c94f1d898..4e17e7ede33d 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -83,6 +83,7 @@ CONFIG_BLK_DEV_SD=y CONFIG_ATA=y CONFIG_SATA_AHCI=y CONFIG_SATA_AHCI_PLATFORM=y +CONFIG_AHCI_CEVA=y CONFIG_AHCI_XGENE=y CONFIG_PATA_PLATFORM=y CONFIG_PATA_OF_PLATFORM=y diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 39248d3adf5d..406485ed110a 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -19,6 +19,14 @@ #include <asm/psci.h> #include <asm/smp_plat.h> +/* Macros for consistency checks of the GICC subtable of MADT */ +#define ACPI_MADT_GICC_LENGTH \ + (acpi_gbl_FADT.header.revision < 6 ? 76 : 80) + +#define BAD_MADT_GICC_ENTRY(entry, end) \ + (!(entry) || (unsigned long)(entry) + sizeof(*(entry)) > (end) || \ + (entry)->header.length != ACPI_MADT_GICC_LENGTH) + /* Basic configuration for ACPI */ #ifdef CONFIG_ACPI /* ACPI table mapping after acpi_gbl_permanent_mmap is set */ diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index 7ce589ca54a4..9047cab68fd3 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -15,7 +15,7 @@ #include <asm/cpuidle.h> #include <asm/cpu_ops.h> -int arm_cpuidle_init(unsigned int cpu) +int __init arm_cpuidle_init(unsigned int cpu) { int ret = -EOPNOTSUPP; diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a7691a378668..f860bfda454a 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -352,8 +352,8 @@ el1_inv: // TODO: add support for undefined instructions in kernel mode enable_dbg mov x0, sp + mov x2, x1 mov x1, #BAD_SYNC - mrs x2, esr_el1 b bad_mode ENDPROC(el1_sync) @@ -553,7 +553,7 @@ el0_inv: ct_user_exit mov x0, sp mov x1, #BAD_SYNC - mrs x2, esr_el1 + mov x2, x25 bl bad_mode b ret_to_user ENDPROC(el0_sync) diff --git a/arch/arm64/kernel/entry32.S b/arch/arm64/kernel/entry32.S index bd9bfaa9269b..f332d5d1f6b4 100644 --- a/arch/arm64/kernel/entry32.S +++ b/arch/arm64/kernel/entry32.S @@ -32,13 +32,11 @@ ENTRY(compat_sys_sigreturn_wrapper) mov x0, sp - mov x27, #0 // prevent syscall restart handling (why) b compat_sys_sigreturn ENDPROC(compat_sys_sigreturn_wrapper) ENTRY(compat_sys_rt_sigreturn_wrapper) mov x0, sp - mov x27, #0 // prevent syscall restart handling (why) b compat_sys_rt_sigreturn ENDPROC(compat_sys_rt_sigreturn_wrapper) diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index e7d934d3afe0..7a1a5da6c8c1 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -31,7 +31,6 @@ #include <asm/current.h> #include <asm/debug-monitors.h> #include <asm/hw_breakpoint.h> -#include <asm/kdebug.h> #include <asm/traps.h> #include <asm/cputype.h> #include <asm/system_misc.h> diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 702591f6180a..b31e9a4b6275 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -1318,7 +1318,7 @@ static int armpmu_device_probe(struct platform_device *pdev) /* Don't bother with PPIs; they're already affine */ irq = platform_get_irq(pdev, 0); if (irq >= 0 && irq_is_percpu(irq)) - return 0; + goto out; irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); if (!irqs) @@ -1340,12 +1340,13 @@ static int armpmu_device_probe(struct platform_device *pdev) if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL)) break; - of_node_put(dn); if (cpu >= nr_cpu_ids) { pr_warn("Failed to find logical CPU for %s\n", dn->name); + of_node_put(dn); break; } + of_node_put(dn); irqs[i] = cpu; } @@ -1355,6 +1356,7 @@ static int armpmu_device_probe(struct platform_device *pdev) else kfree(irqs); +out: cpu_pmu->plat_device = pdev; return 0; } diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 4b2121bd7f9c..50fb4696654e 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -396,13 +396,13 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) { u64 hwid = processor->arm_mpidr; - if (hwid & ~MPIDR_HWID_BITMASK || hwid == INVALID_HWID) { - pr_err("skipping CPU entry with invalid MPIDR 0x%llx\n", hwid); + if (!(processor->flags & ACPI_MADT_ENABLED)) { + pr_debug("skipping disabled CPU entry with 0x%llx MPIDR\n", hwid); return; } - if (!(processor->flags & ACPI_MADT_ENABLED)) { - pr_err("skipping disabled CPU entry with 0x%llx MPIDR\n", hwid); + if (hwid & ~MPIDR_HWID_BITMASK || hwid == INVALID_HWID) { + pr_err("skipping CPU entry with invalid MPIDR 0x%llx\n", hwid); return; } @@ -438,7 +438,7 @@ acpi_parse_gic_cpu_interface(struct acpi_subtable_header *header, struct acpi_madt_generic_interrupt *processor; processor = (struct acpi_madt_generic_interrupt *)header; - if (BAD_MADT_ENTRY(processor, end)) + if (BAD_MADT_GICC_ENTRY(processor, end)) return -EINVAL; acpi_table_print_madt_entry(header); @@ -693,7 +693,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); if ((unsigned)ipinr < NR_IPI) { - trace_ipi_entry(ipi_types[ipinr]); + trace_ipi_entry_rcuidle(ipi_types[ipinr]); __inc_irq_stat(cpu, ipi_irqs[ipinr]); } @@ -736,7 +736,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) } if ((unsigned)ipinr < NR_IPI) - trace_ipi_exit(ipi_types[ipinr]); + trace_ipi_exit_rcuidle(ipi_types[ipinr]); set_irq_regs(old_regs); } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index a12251c074a8..566bc4c35040 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -335,7 +335,7 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) if (call_undef_hook(regs) == 0) return; - if (show_unhandled_signals_ratelimited() && unhandled_signal(current, SIGILL)) { + if (unhandled_signal(current, SIGILL) && show_unhandled_signals_ratelimited()) { pr_info("%s[%d]: undefined instruction: pc=%p\n", current->comm, task_pid_nr(current), pc); dump_instr(KERN_INFO, regs); diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index 9d84feb41a16..773d37a14039 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -4,5 +4,3 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ context.o proc.o pageattr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_ARM64_PTDUMP) += dump.o - -CFLAGS_mmu.o := -I$(srctree)/scripts/dtc/libfdt/ diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index b1fc69cd1499..94d98cd1aad8 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -115,7 +115,7 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, { struct siginfo si; - if (show_unhandled_signals_ratelimited() && unhandled_signal(tsk, sig)) { + if (unhandled_signal(tsk, sig) && show_unhandled_signals_ratelimited()) { pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x\n", tsk->comm, task_pid_nr(tsk), fault_name(esr), sig, addr, esr); diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index cccc4af87a03..831ec534d449 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -33,13 +33,13 @@ int pmd_huge(pmd_t pmd) { - return !(pmd_val(pmd) & PMD_TABLE_BIT); + return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); } int pud_huge(pud_t pud) { #ifndef __PAGETABLE_PMD_FOLDED - return !(pud_val(pud) & PUD_TABLE_BIT); + return pud_val(pud) && !(pud_val(pud) & PUD_TABLE_BIT); #else return 0; #endif diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 82d3435bf14f..a4ede4e2ddd1 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -117,7 +117,7 @@ void split_pud(pud_t *old_pud, pmd_t *pmd) int i = 0; do { - set_pmd(pmd, __pmd(addr | prot)); + set_pmd(pmd, __pmd(addr | pgprot_val(prot))); addr += PMD_SIZE; } while (pmd++, i++, i < PTRS_PER_PMD); } diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index de0a81a539a0..98a26ce82d26 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -110,6 +110,10 @@ /* Rd = Rn >> shift; signed */ #define A64_ASR(sf, Rd, Rn, shift) A64_SBFM(sf, Rd, Rn, shift, (sf) ? 63 : 31) +/* Zero extend */ +#define A64_UXTH(sf, Rd, Rn) A64_UBFM(sf, Rd, Rn, 0, 15) +#define A64_UXTW(sf, Rd, Rn) A64_UBFM(sf, Rd, Rn, 0, 31) + /* Move wide (immediate) */ #define A64_MOVEW(sf, Rd, imm16, shift, type) \ aarch64_insn_gen_movewide(Rd, imm16, shift, \ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index dc6a4842683a..c047598b09e0 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -113,9 +113,9 @@ static inline void emit_a64_mov_i(const int is64, const int reg, static inline int bpf2a64_offset(int bpf_to, int bpf_from, const struct jit_ctx *ctx) { - int to = ctx->offset[bpf_to + 1]; + int to = ctx->offset[bpf_to]; /* -1 to account for the Branch instruction */ - int from = ctx->offset[bpf_from + 1] - 1; + int from = ctx->offset[bpf_from] - 1; return to - from; } @@ -289,23 +289,41 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU | BPF_END | BPF_FROM_BE: #ifdef CONFIG_CPU_BIG_ENDIAN if (BPF_SRC(code) == BPF_FROM_BE) - break; + goto emit_bswap_uxt; #else /* !CONFIG_CPU_BIG_ENDIAN */ if (BPF_SRC(code) == BPF_FROM_LE) - break; + goto emit_bswap_uxt; #endif switch (imm) { case 16: emit(A64_REV16(is64, dst, dst), ctx); + /* zero-extend 16 bits into 64 bits */ + emit(A64_UXTH(is64, dst, dst), ctx); break; case 32: emit(A64_REV32(is64, dst, dst), ctx); + /* upper 32 bits already cleared */ break; case 64: emit(A64_REV64(dst, dst), ctx); break; } break; +emit_bswap_uxt: + switch (imm) { + case 16: + /* zero-extend 16 bits into 64 bits */ + emit(A64_UXTH(is64, dst, dst), ctx); + break; + case 32: + /* zero-extend 32 bits into 64 bits */ + emit(A64_UXTW(is64, dst, dst), ctx); + break; + case 64: + /* nop */ + break; + } + break; /* dst = imm */ case BPF_ALU | BPF_MOV | BPF_K: case BPF_ALU64 | BPF_MOV | BPF_K: @@ -640,10 +658,11 @@ static int build_body(struct jit_ctx *ctx) const struct bpf_insn *insn = &prog->insnsi[i]; int ret; + ret = build_insn(insn, ctx); + if (ctx->image == NULL) ctx->offset[i] = ctx->idx; - ret = build_insn(insn, ctx); if (ret > 0) { i++; continue; diff --git a/arch/blackfin/kernel/trace.c b/arch/blackfin/kernel/trace.c index c36efa0c7163..719dd796c12c 100644 --- a/arch/blackfin/kernel/trace.c +++ b/arch/blackfin/kernel/trace.c @@ -136,7 +136,7 @@ void decode_address(char *buf, unsigned long address) struct file *file = vma->vm_file; if (file) { - char *d_name = d_path(&file->f_path, _tmpbuf, + char *d_name = file_path(file, _tmpbuf, sizeof(_tmpbuf)); if (!IS_ERR(d_name)) name = d_name; diff --git a/arch/cris/arch-v32/drivers/sync_serial.c b/arch/cris/arch-v32/drivers/sync_serial.c index 4dda9bd6b8fb..e989cee77414 100644 --- a/arch/cris/arch-v32/drivers/sync_serial.c +++ b/arch/cris/arch-v32/drivers/sync_serial.c @@ -1464,7 +1464,7 @@ static inline void handle_rx_packet(struct sync_port *port) if (port->write_ts_idx == NBR_IN_DESCR) port->write_ts_idx = 0; idx = port->write_ts_idx++; - do_posix_clock_monotonic_gettime(&port->timestamp[idx]); + ktime_get_ts(&port->timestamp[idx]); port->in_buffer_len += port->inbufchunk; } spin_unlock_irqrestore(&port->lock, flags); diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 7f3028965064..97e48b0eefc7 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -215,10 +215,6 @@ put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot) pmd_t *pmd; pte_t *pte; - if (!PageReserved(page)) - printk(KERN_ERR "put_kernel_page: page at 0x%p not in reserved memory\n", - page_address(page)); - pgd = pgd_offset_k(address); /* note: this is NOT pgd_offset()! */ { diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 2a14585c90d2..aab7e46cadd5 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2231,7 +2231,7 @@ config MIPS_CMP config MIPS_CPS bool "MIPS Coherent Processing System support" - depends on SYS_SUPPORTS_MIPS_CPS && !64BIT + depends on SYS_SUPPORTS_MIPS_CPS select MIPS_CM select MIPS_CPC select MIPS_CPS_PM if HOTPLUG_CPU diff --git a/arch/mips/include/asm/mach-loongson64/mmzone.h b/arch/mips/include/asm/mach-loongson64/mmzone.h index 37c08a27b4f0..c9f7e231e66b 100644 --- a/arch/mips/include/asm/mach-loongson64/mmzone.h +++ b/arch/mips/include/asm/mach-loongson64/mmzone.h @@ -1,6 +1,6 @@ /* * Copyright (C) 2010 Loongson Inc. & Lemote Inc. & - * Insititute of Computing Technology + * Institute of Computing Technology * Author: Xiang Gao, gaoxiang@ict.ac.cn * Huacai Chen, chenhc@lemote.com * Xiaofu Meng, Shuangshuang Zhang diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h index 2b25d1ba1ea0..16f1ea9ab191 100644 --- a/arch/mips/include/asm/smp.h +++ b/arch/mips/include/asm/smp.h @@ -23,6 +23,7 @@ extern int smp_num_siblings; extern cpumask_t cpu_sibling_map[]; extern cpumask_t cpu_core_map[]; +extern cpumask_t cpu_foreign_map; #define raw_smp_processor_id() (current_thread_info()->cpu) diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c index c0c5e5972256..d8f9b357b222 100644 --- a/arch/mips/kernel/branch.c +++ b/arch/mips/kernel/branch.c @@ -600,7 +600,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, break; case blezl_op: /* not really i_format */ - if (NO_R6EMU) + if (!insn.i_format.rt && NO_R6EMU) goto sigill_r6; case blez_op: /* @@ -635,7 +635,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, break; case bgtzl_op: - if (NO_R6EMU) + if (!insn.i_format.rt && NO_R6EMU) goto sigill_r6; case bgtz_op: /* diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S index 55b759a0019e..1b6ca634e646 100644 --- a/arch/mips/kernel/cps-vec.S +++ b/arch/mips/kernel/cps-vec.S @@ -60,7 +60,7 @@ LEAF(mips_cps_core_entry) nop /* This is an NMI */ - la k0, nmi_handler + PTR_LA k0, nmi_handler jr k0 nop @@ -107,10 +107,10 @@ not_nmi: mul t1, t1, t0 mul t1, t1, t2 - li a0, KSEG0 - add a1, a0, t1 + li a0, CKSEG0 + PTR_ADD a1, a0, t1 1: cache Index_Store_Tag_I, 0(a0) - add a0, a0, t0 + PTR_ADD a0, a0, t0 bne a0, a1, 1b nop icache_done: @@ -134,12 +134,12 @@ icache_done: mul t1, t1, t0 mul t1, t1, t2 - li a0, KSEG0 - addu a1, a0, t1 - subu a1, a1, t0 + li a0, CKSEG0 + PTR_ADDU a1, a0, t1 + PTR_SUBU a1, a1, t0 1: cache Index_Store_Tag_D, 0(a0) bne a0, a1, 1b - add a0, a0, t0 + PTR_ADD a0, a0, t0 dcache_done: /* Set Kseg0 CCA to that in s0 */ @@ -152,11 +152,11 @@ dcache_done: /* Enter the coherent domain */ li t0, 0xff - sw t0, GCR_CL_COHERENCE_OFS(v1) + PTR_S t0, GCR_CL_COHERENCE_OFS(v1) ehb /* Jump to kseg0 */ - la t0, 1f + PTR_LA t0, 1f jr t0 nop @@ -178,9 +178,9 @@ dcache_done: nop /* Off we go! */ - lw t1, VPEBOOTCFG_PC(v0) - lw gp, VPEBOOTCFG_GP(v0) - lw sp, VPEBOOTCFG_SP(v0) + PTR_L t1, VPEBOOTCFG_PC(v0) + PTR_L gp, VPEBOOTCFG_GP(v0) + PTR_L sp, VPEBOOTCFG_SP(v0) jr t1 nop END(mips_cps_core_entry) @@ -217,7 +217,7 @@ LEAF(excep_intex) .org 0x480 LEAF(excep_ejtag) - la k0, ejtag_debug_handler + PTR_LA k0, ejtag_debug_handler jr k0 nop END(excep_ejtag) @@ -229,7 +229,7 @@ LEAF(mips_cps_core_init) nop .set push - .set mips32r2 + .set mips64r2 .set mt /* Only allow 1 TC per VPE to execute... */ @@ -237,7 +237,7 @@ LEAF(mips_cps_core_init) /* ...and for the moment only 1 VPE */ dvpe - la t1, 1f + PTR_LA t1, 1f jr.hb t1 nop @@ -250,25 +250,25 @@ LEAF(mips_cps_core_init) mfc0 t0, CP0_MVPCONF0 srl t0, t0, MVPCONF0_PVPE_SHIFT andi t0, t0, (MVPCONF0_PVPE >> MVPCONF0_PVPE_SHIFT) - addiu t7, t0, 1 + addiu ta3, t0, 1 /* If there's only 1, we're done */ beqz t0, 2f nop /* Loop through each VPE within this core */ - li t5, 1 + li ta1, 1 1: /* Operate on the appropriate TC */ - mtc0 t5, CP0_VPECONTROL + mtc0 ta1, CP0_VPECONTROL ehb /* Bind TC to VPE (1:1 TC:VPE mapping) */ - mttc0 t5, CP0_TCBIND + mttc0 ta1, CP0_TCBIND /* Set exclusive TC, non-active, master */ li t0, VPECONF0_MVP - sll t1, t5, VPECONF0_XTC_SHIFT + sll t1, ta1, VPECONF0_XTC_SHIFT or t0, t0, t1 mttc0 t0, CP0_VPECONF0 @@ -280,8 +280,8 @@ LEAF(mips_cps_core_init) mttc0 t0, CP0_TCHALT /* Next VPE */ - addiu t5, t5, 1 - slt t0, t5, t7 + addiu ta1, ta1, 1 + slt t0, ta1, ta3 bnez t0, 1b nop @@ -298,19 +298,19 @@ LEAF(mips_cps_core_init) LEAF(mips_cps_boot_vpes) /* Retrieve CM base address */ - la t0, mips_cm_base - lw t0, 0(t0) + PTR_LA t0, mips_cm_base + PTR_L t0, 0(t0) /* Calculate a pointer to this cores struct core_boot_config */ - lw t0, GCR_CL_ID_OFS(t0) + PTR_L t0, GCR_CL_ID_OFS(t0) li t1, COREBOOTCFG_SIZE mul t0, t0, t1 - la t1, mips_cps_core_bootcfg - lw t1, 0(t1) - addu t0, t0, t1 + PTR_LA t1, mips_cps_core_bootcfg + PTR_L t1, 0(t1) + PTR_ADDU t0, t0, t1 /* Calculate this VPEs ID. If the core doesn't support MT use 0 */ - has_mt t6, 1f + has_mt ta2, 1f li t9, 0 /* Find the number of VPEs present in the core */ @@ -334,24 +334,24 @@ LEAF(mips_cps_boot_vpes) 1: /* Calculate a pointer to this VPEs struct vpe_boot_config */ li t1, VPEBOOTCFG_SIZE mul v0, t9, t1 - lw t7, COREBOOTCFG_VPECONFIG(t0) - addu v0, v0, t7 + PTR_L ta3, COREBOOTCFG_VPECONFIG(t0) + PTR_ADDU v0, v0, ta3 #ifdef CONFIG_MIPS_MT /* If the core doesn't support MT then return */ - bnez t6, 1f + bnez ta2, 1f nop jr ra nop .set push - .set mips32r2 + .set mips64r2 .set mt 1: /* Enter VPE configuration state */ dvpe - la t1, 1f + PTR_LA t1, 1f jr.hb t1 nop 1: mfc0 t1, CP0_MVPCONTROL @@ -360,12 +360,12 @@ LEAF(mips_cps_boot_vpes) ehb /* Loop through each VPE */ - lw t6, COREBOOTCFG_VPEMASK(t0) - move t8, t6 - li t5, 0 + PTR_L ta2, COREBOOTCFG_VPEMASK(t0) + move t8, ta2 + li ta1, 0 /* Check whether the VPE should be running. If not, skip it */ -1: andi t0, t6, 1 +1: andi t0, ta2, 1 beqz t0, 2f nop @@ -373,7 +373,7 @@ LEAF(mips_cps_boot_vpes) mfc0 t0, CP0_VPECONTROL ori t0, t0, VPECONTROL_TARGTC xori t0, t0, VPECONTROL_TARGTC - or t0, t0, t5 + or t0, t0, ta1 mtc0 t0, CP0_VPECONTROL ehb @@ -384,8 +384,8 @@ LEAF(mips_cps_boot_vpes) /* Calculate a pointer to the VPEs struct vpe_boot_config */ li t0, VPEBOOTCFG_SIZE - mul t0, t0, t5 - addu t0, t0, t7 + mul t0, t0, ta1 + addu t0, t0, ta3 /* Set the TC restart PC */ lw t1, VPEBOOTCFG_PC(t0) @@ -423,9 +423,9 @@ LEAF(mips_cps_boot_vpes) mttc0 t0, CP0_VPECONF0 /* Next VPE */ -2: srl t6, t6, 1 - addiu t5, t5, 1 - bnez t6, 1b +2: srl ta2, ta2, 1 + addiu ta1, ta1, 1 + bnez ta2, 1b nop /* Leave VPE configuration state */ @@ -445,7 +445,7 @@ LEAF(mips_cps_boot_vpes) /* This VPE should be offline, halt the TC */ li t0, TCHALT_H mtc0 t0, CP0_TCHALT - la t0, 1f + PTR_LA t0, 1f 1: jr.hb t0 nop @@ -466,10 +466,10 @@ LEAF(mips_cps_boot_vpes) .set noat lw $1, TI_CPU(gp) sll $1, $1, LONGLOG - la \dest, __per_cpu_offset + PTR_LA \dest, __per_cpu_offset addu $1, $1, \dest lw $1, 0($1) - la \dest, cps_cpu_state + PTR_LA \dest, cps_cpu_state addu \dest, \dest, $1 .set pop .endm diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 6e8de80bb446..4cc13508d967 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -73,10 +73,11 @@ NESTED(handle_sys, PT_SIZE, sp) .set noreorder .set nomacro -1: user_lw(t5, 16(t0)) # argument #5 from usp -4: user_lw(t6, 20(t0)) # argument #6 from usp -3: user_lw(t7, 24(t0)) # argument #7 from usp -2: user_lw(t8, 28(t0)) # argument #8 from usp +load_a4: user_lw(t5, 16(t0)) # argument #5 from usp +load_a5: user_lw(t6, 20(t0)) # argument #6 from usp +load_a6: user_lw(t7, 24(t0)) # argument #7 from usp +load_a7: user_lw(t8, 28(t0)) # argument #8 from usp +loads_done: sw t5, 16(sp) # argument #5 to ksp sw t6, 20(sp) # argument #6 to ksp @@ -85,10 +86,10 @@ NESTED(handle_sys, PT_SIZE, sp) .set pop .section __ex_table,"a" - PTR 1b,bad_stack - PTR 2b,bad_stack - PTR 3b,bad_stack - PTR 4b,bad_stack + PTR load_a4, bad_stack_a4 + PTR load_a5, bad_stack_a5 + PTR load_a6, bad_stack_a6 + PTR load_a7, bad_stack_a7 .previous lw t0, TI_FLAGS($28) # syscall tracing enabled? @@ -153,8 +154,8 @@ syscall_trace_entry: /* ------------------------------------------------------------------------ */ /* - * The stackpointer for a call with more than 4 arguments is bad. - * We probably should handle this case a bit more drastic. + * Our open-coded access area sanity test for the stack pointer + * failed. We probably should handle this case a bit more drastic. */ bad_stack: li v0, EFAULT @@ -163,6 +164,22 @@ bad_stack: sw t0, PT_R7(sp) j o32_syscall_exit +bad_stack_a4: + li t5, 0 + b load_a5 + +bad_stack_a5: + li t6, 0 + b load_a6 + +bad_stack_a6: + li t7, 0 + b load_a7 + +bad_stack_a7: + li t8, 0 + b loads_done + /* * The system call does not exist in this kernel */ diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index d07b210fbeff..f543ff4feef9 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -69,16 +69,17 @@ NESTED(handle_sys, PT_SIZE, sp) daddu t1, t0, 32 bltz t1, bad_stack -1: lw a4, 16(t0) # argument #5 from usp -2: lw a5, 20(t0) # argument #6 from usp -3: lw a6, 24(t0) # argument #7 from usp -4: lw a7, 28(t0) # argument #8 from usp (for indirect syscalls) +load_a4: lw a4, 16(t0) # argument #5 from usp +load_a5: lw a5, 20(t0) # argument #6 from usp +load_a6: lw a6, 24(t0) # argument #7 from usp +load_a7: lw a7, 28(t0) # argument #8 from usp +loads_done: .section __ex_table,"a" - PTR 1b, bad_stack - PTR 2b, bad_stack - PTR 3b, bad_stack - PTR 4b, bad_stack + PTR load_a4, bad_stack_a4 + PTR load_a5, bad_stack_a5 + PTR load_a6, bad_stack_a6 + PTR load_a7, bad_stack_a7 .previous li t1, _TIF_WORK_SYSCALL_ENTRY @@ -167,6 +168,22 @@ bad_stack: sd t0, PT_R7(sp) j o32_syscall_exit +bad_stack_a4: + li a4, 0 + b load_a5 + +bad_stack_a5: + li a5, 0 + b load_a6 + +bad_stack_a6: + li a6, 0 + b load_a7 + +bad_stack_a7: + li a7, 0 + b loads_done + not_o32_scall: /* * This is not an o32 compatibility syscall, pass it on @@ -383,7 +400,7 @@ EXPORT(sys32_call_table) PTR sys_connect /* 4170 */ PTR sys_getpeername PTR sys_getsockname - PTR sys_getsockopt + PTR compat_sys_getsockopt PTR sys_listen PTR compat_sys_recv /* 4175 */ PTR compat_sys_recvfrom diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index be73c491182b..008b3378653a 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -337,6 +337,11 @@ static void __init bootmem_init(void) min_low_pfn = start; if (end <= reserved_end) continue; +#ifdef CONFIG_BLK_DEV_INITRD + /* mapstart should be after initrd_end */ + if (initrd_end && end <= (unsigned long)PFN_UP(__pa(initrd_end))) + continue; +#endif if (start >= mapstart) continue; mapstart = max(reserved_end, start); @@ -366,14 +371,6 @@ static void __init bootmem_init(void) max_low_pfn = PFN_DOWN(HIGHMEM_START); } -#ifdef CONFIG_BLK_DEV_INITRD - /* - * mapstart should be after initrd_end - */ - if (initrd_end) - mapstart = max(mapstart, (unsigned long)PFN_UP(__pa(initrd_end))); -#endif - /* * Initialize the boot-time allocator with low memory only. */ diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index 4251d390b5b6..c88937745b4e 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -133,7 +133,7 @@ static void __init cps_prepare_cpus(unsigned int max_cpus) /* * Patch the start of mips_cps_core_entry to provide: * - * v0 = CM base address + * v1 = CM base address * s0 = kseg0 CCA */ entry_code = (u32 *)&mips_cps_core_entry; @@ -369,7 +369,7 @@ void play_dead(void) static void wait_for_sibling_halt(void *ptr_cpu) { - unsigned cpu = (unsigned)ptr_cpu; + unsigned cpu = (unsigned long)ptr_cpu; unsigned vpe_id = cpu_vpe_id(&cpu_data[cpu]); unsigned halted; unsigned long flags; @@ -430,7 +430,7 @@ static void cps_cpu_die(unsigned int cpu) */ err = smp_call_function_single(cpu_death_sibling, wait_for_sibling_halt, - (void *)cpu, 1); + (void *)(unsigned long)cpu, 1); if (err) panic("Failed to call remote sibling CPU\n"); } diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index faa46ebd9dda..d0744cc77ea7 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -63,6 +63,13 @@ EXPORT_SYMBOL(cpu_sibling_map); cpumask_t cpu_core_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_core_map); +/* + * A logcal cpu mask containing only one VPE per core to + * reduce the number of IPIs on large MT systems. + */ +cpumask_t cpu_foreign_map __read_mostly; +EXPORT_SYMBOL(cpu_foreign_map); + /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; @@ -103,6 +110,29 @@ static inline void set_cpu_core_map(int cpu) } } +/* + * Calculate a new cpu_foreign_map mask whenever a + * new cpu appears or disappears. + */ +static inline void calculate_cpu_foreign_map(void) +{ + int i, k, core_present; + cpumask_t temp_foreign_map; + + /* Re-calculate the mask */ + for_each_online_cpu(i) { + core_present = 0; + for_each_cpu(k, &temp_foreign_map) + if (cpu_data[i].package == cpu_data[k].package && + cpu_data[i].core == cpu_data[k].core) + core_present = 1; + if (!core_present) + cpumask_set_cpu(i, &temp_foreign_map); + } + + cpumask_copy(&cpu_foreign_map, &temp_foreign_map); +} + struct plat_smp_ops *mp_ops; EXPORT_SYMBOL(mp_ops); @@ -146,6 +176,8 @@ asmlinkage void start_secondary(void) set_cpu_sibling_map(cpu); set_cpu_core_map(cpu); + calculate_cpu_foreign_map(); + cpumask_set_cpu(cpu, &cpu_callin_map); synchronise_count_slave(cpu); @@ -173,9 +205,18 @@ void __irq_entry smp_call_function_interrupt(void) static void stop_this_cpu(void *dummy) { /* - * Remove this CPU: + * Remove this CPU. Be a bit slow here and + * set the bits for every online CPU so we don't miss + * any IPI whilst taking this VPE down. */ + + cpumask_copy(&cpu_foreign_map, cpu_online_mask); + + /* Make it visible to every other CPU */ + smp_mb(); + set_cpu_online(smp_processor_id(), false); + calculate_cpu_foreign_map(); local_irq_disable(); while (1); } @@ -197,6 +238,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) mp_ops->prepare_cpus(max_cpus); set_cpu_sibling_map(0); set_cpu_core_map(0); + calculate_cpu_foreign_map(); #ifndef CONFIG_HOTPLUG_CPU init_cpu_present(cpu_possible_mask); #endif diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 2a7b38ed23f0..e207a43b5f8f 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -2130,10 +2130,10 @@ void per_cpu_trap_init(bool is_boot_cpu) BUG_ON(current->mm); enter_lazy_tlb(&init_mm, current); - /* Boot CPU's cache setup in setup_arch(). */ - if (!is_boot_cpu) - cpu_cache_init(); - tlb_init(); + /* Boot CPU's cache setup in setup_arch(). */ + if (!is_boot_cpu) + cpu_cache_init(); + tlb_init(); TLBMISS_HANDLER_SETUP(); } diff --git a/arch/mips/loongson64/common/bonito-irq.c b/arch/mips/loongson64/common/bonito-irq.c index cc0e4fd548e6..4e116d23bab3 100644 --- a/arch/mips/loongson64/common/bonito-irq.c +++ b/arch/mips/loongson64/common/bonito-irq.c @@ -3,7 +3,7 @@ * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net * Copyright (C) 2000, 2001 Ralf Baechle (ralf@gnu.org) * - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology * Author: Fuxin Zhang, zhangfx@lemote.com * * This program is free software; you can redistribute it and/or modify it diff --git a/arch/mips/loongson64/common/cmdline.c b/arch/mips/loongson64/common/cmdline.c index 72fed003a536..01fbed137028 100644 --- a/arch/mips/loongson64/common/cmdline.c +++ b/arch/mips/loongson64/common/cmdline.c @@ -6,7 +6,7 @@ * Copyright 2003 ICT CAS * Author: Michael Guo <guoyi@ict.ac.cn> * - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology * Author: Fuxin Zhang, zhangfx@lemote.com * * Copyright (C) 2009 Lemote Inc. diff --git a/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c b/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c index 12c75db23420..875037063a80 100644 --- a/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c +++ b/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c @@ -1,7 +1,7 @@ /* * CS5536 General timer functions * - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology * Author: Yanhua, yanh@lemote.com * * Copyright (C) 2009 Lemote Inc. diff --git a/arch/mips/loongson64/common/env.c b/arch/mips/loongson64/common/env.c index 22f04ca2ff3e..f6c44dd332e2 100644 --- a/arch/mips/loongson64/common/env.c +++ b/arch/mips/loongson64/common/env.c @@ -6,7 +6,7 @@ * Copyright 2003 ICT CAS * Author: Michael Guo <guoyi@ict.ac.cn> * - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology * Author: Fuxin Zhang, zhangfx@lemote.com * * Copyright (C) 2009 Lemote Inc. diff --git a/arch/mips/loongson64/common/irq.c b/arch/mips/loongson64/common/irq.c index 687003b19b45..d36d969a4a87 100644 --- a/arch/mips/loongson64/common/irq.c +++ b/arch/mips/loongson64/common/irq.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology * Author: Fuxin Zhang, zhangfx@lemote.com * * This program is free software; you can redistribute it and/or modify it diff --git a/arch/mips/loongson64/common/setup.c b/arch/mips/loongson64/common/setup.c index d477dd6bb326..2dc5122f0e09 100644 --- a/arch/mips/loongson64/common/setup.c +++ b/arch/mips/loongson64/common/setup.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology * Author: Fuxin Zhang, zhangfx@lemote.com * * This program is free software; you can redistribute it and/or modify it diff --git a/arch/mips/loongson64/fuloong-2e/irq.c b/arch/mips/loongson64/fuloong-2e/irq.c index ef5ec8f3de5f..892963f860b7 100644 --- a/arch/mips/loongson64/fuloong-2e/irq.c +++ b/arch/mips/loongson64/fuloong-2e/irq.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology * Author: Fuxin Zhang, zhangfx@lemote.com * * This program is free software; you can redistribute it and/or modify it diff --git a/arch/mips/loongson64/lemote-2f/clock.c b/arch/mips/loongson64/lemote-2f/clock.c index 462e34d46b4a..a78fb657068c 100644 --- a/arch/mips/loongson64/lemote-2f/clock.c +++ b/arch/mips/loongson64/lemote-2f/clock.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2006 - 2008 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2006 - 2008 Lemote Inc. & Institute of Computing Technology * Author: Yanhua, yanh@lemote.com * * This file is subject to the terms and conditions of the GNU General Public @@ -15,7 +15,7 @@ #include <linux/spinlock.h> #include <asm/clock.h> -#include <asm/mach-loongson/loongson.h> +#include <asm/mach-loongson64/loongson.h> static LIST_HEAD(clock_list); static DEFINE_SPINLOCK(clock_lock); diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/loongson-3/numa.c index 12d14ed48778..6f9e010cec4d 100644 --- a/arch/mips/loongson64/loongson-3/numa.c +++ b/arch/mips/loongson64/loongson-3/numa.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2010 Loongson Inc. & Lemote Inc. & - * Insititute of Computing Technology + * Institute of Computing Technology * Author: Xiang Gao, gaoxiang@ict.ac.cn * Huacai Chen, chenhc@lemote.com * Xiaofu Meng, Shuangshuang Zhang diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 22b9b2cb9219..712f17a2ecf2 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -451,7 +451,7 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, /* Fall through */ case jr_op: /* For R6, JR already emulated in jalr_op */ - if (NO_R6EMU && insn.r_format.opcode == jr_op) + if (NO_R6EMU && insn.r_format.func == jr_op) break; *contpc = regs->regs[insn.r_format.rs]; return 1; @@ -551,7 +551,7 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, dec_insn.next_pc_inc; return 1; case blezl_op: - if (NO_R6EMU) + if (!insn.i_format.rt && NO_R6EMU) break; case blez_op: @@ -588,7 +588,7 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, dec_insn.next_pc_inc; return 1; case bgtzl_op: - if (NO_R6EMU) + if (!insn.i_format.rt && NO_R6EMU) break; case bgtz_op: /* diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 7f660dc67596..fbea4432f3f2 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -37,6 +37,7 @@ #include <asm/cacheflush.h> /* for run_uncached() */ #include <asm/traps.h> #include <asm/dma-coherence.h> +#include <asm/mips-cm.h> /* * Special Variant of smp_call_function for use by cache functions: @@ -51,9 +52,16 @@ static inline void r4k_on_each_cpu(void (*func) (void *info), void *info) { preempt_disable(); -#ifndef CONFIG_MIPS_MT_SMP - smp_call_function(func, info, 1); -#endif + /* + * The Coherent Manager propagates address-based cache ops to other + * cores but not index-based ops. However, r4k_on_each_cpu is used + * in both cases so there is no easy way to tell what kind of op is + * executed to the other cores. The best we can probably do is + * to restrict that call when a CM is not present because both + * CM-based SMP protocols (CMP & CPS) restrict index-based cache ops. + */ + if (!mips_cm_present()) + smp_call_function_many(&cpu_foreign_map, func, info, 1); func(info); preempt_enable(); } @@ -937,7 +945,9 @@ static void b5k_instruction_hazard(void) } static char *way_string[] = { NULL, "direct mapped", "2-way", - "3-way", "4-way", "5-way", "6-way", "7-way", "8-way" + "3-way", "4-way", "5-way", "6-way", "7-way", "8-way", + "9-way", "10-way", "11-way", "12-way", + "13-way", "14-way", "15-way", "16-way", }; static void probe_pcache(void) diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c index 185e68261f45..5625b190edc0 100644 --- a/arch/mips/mti-malta/malta-time.c +++ b/arch/mips/mti-malta/malta-time.c @@ -119,18 +119,24 @@ void read_persistent_clock(struct timespec *ts) int get_c0_fdc_int(void) { - int mips_cpu_fdc_irq; + /* + * Some cores claim the FDC is routable through the GIC, but it doesn't + * actually seem to be connected for those Malta bitstreams. + */ + switch (current_cpu_type()) { + case CPU_INTERAPTIV: + case CPU_PROAPTIV: + return -1; + }; if (cpu_has_veic) - mips_cpu_fdc_irq = -1; + return -1; else if (gic_present) - mips_cpu_fdc_irq = gic_get_c0_fdc_int(); + return gic_get_c0_fdc_int(); else if (cp0_fdc_irq >= 0) - mips_cpu_fdc_irq = MIPS_CPU_IRQ_BASE + cp0_fdc_irq; + return MIPS_CPU_IRQ_BASE + cp0_fdc_irq; else - mips_cpu_fdc_irq = -1; - - return mips_cpu_fdc_irq; + return -1; } int get_c0_perfcount_int(void) diff --git a/arch/mips/pistachio/init.c b/arch/mips/pistachio/init.c index d2dc836523a3..8bd8ebb20a72 100644 --- a/arch/mips/pistachio/init.c +++ b/arch/mips/pistachio/init.c @@ -63,13 +63,19 @@ void __init plat_mem_setup(void) plat_setup_iocoherency(); } -#define DEFAULT_CPC_BASE_ADDR 0x1bde0000 +#define DEFAULT_CPC_BASE_ADDR 0x1bde0000 +#define DEFAULT_CDMM_BASE_ADDR 0x1bdd0000 phys_addr_t mips_cpc_default_phys_base(void) { return DEFAULT_CPC_BASE_ADDR; } +phys_addr_t mips_cdmm_phys_base(void) +{ + return DEFAULT_CDMM_BASE_ADDR; +} + static void __init mips_nmi_setup(void) { void *base; diff --git a/arch/mips/pistachio/time.c b/arch/mips/pistachio/time.c index 67889fcea8aa..7c73fcb92a10 100644 --- a/arch/mips/pistachio/time.c +++ b/arch/mips/pistachio/time.c @@ -27,6 +27,11 @@ int get_c0_perfcount_int(void) return gic_get_c0_perfcount_int(); } +int get_c0_fdc_int(void) +{ + return gic_get_c0_fdc_int(); +} + void __init plat_time_init(void) { struct device_node *np; diff --git a/arch/nios2/kernel/time.c b/arch/nios2/kernel/time.c index be186a75f622..9e3cc8a40ee9 100644 --- a/arch/nios2/kernel/time.c +++ b/arch/nios2/kernel/time.c @@ -19,7 +19,9 @@ #include <linux/io.h> #include <linux/slab.h> -#define ALTERA_TIMER_STATUS_REG 0 +#define ALTR_TIMER_COMPATIBLE "altr,timer-1.0" + +#define ALTERA_TIMER_STATUS_REG 0 #define ALTERA_TIMER_CONTROL_REG 4 #define ALTERA_TIMER_PERIODL_REG 8 #define ALTERA_TIMER_PERIODH_REG 12 @@ -304,7 +306,16 @@ void read_persistent_clock(struct timespec *ts) void __init time_init(void) { + struct device_node *np; + int count = 0; + + for_each_compatible_node(np, NULL, ALTR_TIMER_COMPATIBLE) + count++; + + if (count < 2) + panic("%d timer is found, it needs 2 timers in system\n", count); + clocksource_of_init(); } -CLOCKSOURCE_OF_DECLARE(nios2_timer, "altr,timer-1.0", nios2_time_init); +CLOCKSOURCE_OF_DECLARE(nios2_timer, ALTR_TIMER_COMPATIBLE, nios2_time_init); diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 0a183756d6ec..f93c4a4e6580 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -16,7 +16,7 @@ #include <asm/processor.h> #include <asm/cache.h> -extern spinlock_t pa_dbit_lock; +extern spinlock_t pa_tlb_lock; /* * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel @@ -33,6 +33,19 @@ extern spinlock_t pa_dbit_lock; */ #define kern_addr_valid(addr) (1) +/* Purge data and instruction TLB entries. Must be called holding + * the pa_tlb_lock. The TLB purge instructions are slow on SMP + * machines since the purge must be broadcast to all CPUs. + */ + +static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) +{ + mtsp(mm->context, 1); + pdtlb(addr); + if (unlikely(split_tlb)) + pitlb(addr); +} + /* Certain architectures need to do special things when PTEs * within a page table are directly modified. Thus, the following * hook is made available. @@ -42,15 +55,20 @@ extern spinlock_t pa_dbit_lock; *(pteptr) = (pteval); \ } while(0) -extern void purge_tlb_entries(struct mm_struct *, unsigned long); +#define pte_inserted(x) \ + ((pte_val(x) & (_PAGE_PRESENT|_PAGE_ACCESSED)) \ + == (_PAGE_PRESENT|_PAGE_ACCESSED)) -#define set_pte_at(mm, addr, ptep, pteval) \ - do { \ +#define set_pte_at(mm, addr, ptep, pteval) \ + do { \ + pte_t old_pte; \ unsigned long flags; \ - spin_lock_irqsave(&pa_dbit_lock, flags); \ - set_pte(ptep, pteval); \ - purge_tlb_entries(mm, addr); \ - spin_unlock_irqrestore(&pa_dbit_lock, flags); \ + spin_lock_irqsave(&pa_tlb_lock, flags); \ + old_pte = *ptep; \ + set_pte(ptep, pteval); \ + if (pte_inserted(old_pte)) \ + purge_tlb_entries(mm, addr); \ + spin_unlock_irqrestore(&pa_tlb_lock, flags); \ } while (0) #endif /* !__ASSEMBLY__ */ @@ -268,7 +286,7 @@ extern unsigned long *empty_zero_page; #define pte_none(x) (pte_val(x) == 0) #define pte_present(x) (pte_val(x) & _PAGE_PRESENT) -#define pte_clear(mm,addr,xp) do { pte_val(*(xp)) = 0; } while (0) +#define pte_clear(mm, addr, xp) set_pte_at(mm, addr, xp, __pte(0)) #define pmd_flag(x) (pmd_val(x) & PxD_FLAG_MASK) #define pmd_address(x) ((unsigned long)(pmd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT) @@ -435,15 +453,15 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned if (!pte_young(*ptep)) return 0; - spin_lock_irqsave(&pa_dbit_lock, flags); + spin_lock_irqsave(&pa_tlb_lock, flags); pte = *ptep; if (!pte_young(pte)) { - spin_unlock_irqrestore(&pa_dbit_lock, flags); + spin_unlock_irqrestore(&pa_tlb_lock, flags); return 0; } set_pte(ptep, pte_mkold(pte)); purge_tlb_entries(vma->vm_mm, addr); - spin_unlock_irqrestore(&pa_dbit_lock, flags); + spin_unlock_irqrestore(&pa_tlb_lock, flags); return 1; } @@ -453,11 +471,12 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t old_pte; unsigned long flags; - spin_lock_irqsave(&pa_dbit_lock, flags); + spin_lock_irqsave(&pa_tlb_lock, flags); old_pte = *ptep; - pte_clear(mm,addr,ptep); - purge_tlb_entries(mm, addr); - spin_unlock_irqrestore(&pa_dbit_lock, flags); + set_pte(ptep, __pte(0)); + if (pte_inserted(old_pte)) + purge_tlb_entries(mm, addr); + spin_unlock_irqrestore(&pa_tlb_lock, flags); return old_pte; } @@ -465,10 +484,10 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { unsigned long flags; - spin_lock_irqsave(&pa_dbit_lock, flags); + spin_lock_irqsave(&pa_tlb_lock, flags); set_pte(ptep, pte_wrprotect(*ptep)); purge_tlb_entries(mm, addr); - spin_unlock_irqrestore(&pa_dbit_lock, flags); + spin_unlock_irqrestore(&pa_tlb_lock, flags); } #define pte_same(A,B) (pte_val(A) == pte_val(B)) diff --git a/arch/parisc/include/asm/tlbflush.h b/arch/parisc/include/asm/tlbflush.h index 9d086a599fa0..e84b96478193 100644 --- a/arch/parisc/include/asm/tlbflush.h +++ b/arch/parisc/include/asm/tlbflush.h @@ -13,6 +13,9 @@ * active at any one time on the Merced bus. This tlb purge * synchronisation is fairly lightweight and harmless so we activate * it on all systems not just the N class. + + * It is also used to ensure PTE updates are atomic and consistent + * with the TLB. */ extern spinlock_t pa_tlb_lock; @@ -24,20 +27,24 @@ extern void flush_tlb_all_local(void *); #define smp_flush_tlb_all() flush_tlb_all() +int __flush_tlb_range(unsigned long sid, + unsigned long start, unsigned long end); + +#define flush_tlb_range(vma, start, end) \ + __flush_tlb_range((vma)->vm_mm->context, start, end) + +#define flush_tlb_kernel_range(start, end) \ + __flush_tlb_range(0, start, end) + /* * flush_tlb_mm() * - * XXX This code is NOT valid for HP-UX compatibility processes, - * (although it will probably work 99% of the time). HP-UX - * processes are free to play with the space id's and save them - * over long periods of time, etc. so we have to preserve the - * space and just flush the entire tlb. We need to check the - * personality in order to do that, but the personality is not - * currently being set correctly. - * - * Of course, Linux processes could do the same thing, but - * we don't support that (and the compilers, dynamic linker, - * etc. do not do that). + * The code to switch to a new context is NOT valid for processes + * which play with the space id's. Thus, we have to preserve the + * space and just flush the entire tlb. However, the compilers, + * dynamic linker, etc, do not manipulate space id's, so there + * could be a significant performance benefit in switching contexts + * and not flushing the whole tlb. */ static inline void flush_tlb_mm(struct mm_struct *mm) @@ -45,10 +52,18 @@ static inline void flush_tlb_mm(struct mm_struct *mm) BUG_ON(mm == &init_mm); /* Should never happen */ #if 1 || defined(CONFIG_SMP) + /* Except for very small threads, flushing the whole TLB is + * faster than using __flush_tlb_range. The pdtlb and pitlb + * instructions are very slow because of the TLB broadcast. + * It might be faster to do local range flushes on all CPUs + * on PA 2.0 systems. + */ flush_tlb_all(); #else /* FIXME: currently broken, causing space id and protection ids - * to go out of sync, resulting in faults on userspace accesses. + * to go out of sync, resulting in faults on userspace accesses. + * This approach needs further investigation since running many + * small applications (e.g., GCC testsuite) is faster on HP-UX. */ if (mm) { if (mm->context != 0) @@ -65,22 +80,12 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, { unsigned long flags, sid; - /* For one page, it's not worth testing the split_tlb variable */ - - mb(); sid = vma->vm_mm->context; purge_tlb_start(flags); mtsp(sid, 1); pdtlb(addr); - pitlb(addr); + if (unlikely(split_tlb)) + pitlb(addr); purge_tlb_end(flags); } - -void __flush_tlb_range(unsigned long sid, - unsigned long start, unsigned long end); - -#define flush_tlb_range(vma,start,end) __flush_tlb_range((vma)->vm_mm->context,start,end) - -#define flush_tlb_kernel_range(start, end) __flush_tlb_range(0,start,end) - #endif diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index f6448c7c62b5..cda6dbbe9842 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -342,12 +342,15 @@ EXPORT_SYMBOL(flush_data_cache_local); EXPORT_SYMBOL(flush_kernel_icache_range_asm); #define FLUSH_THRESHOLD 0x80000 /* 0.5MB */ -int parisc_cache_flush_threshold __read_mostly = FLUSH_THRESHOLD; +static unsigned long parisc_cache_flush_threshold __read_mostly = FLUSH_THRESHOLD; + +#define FLUSH_TLB_THRESHOLD (2*1024*1024) /* 2MB initial TLB threshold */ +static unsigned long parisc_tlb_flush_threshold __read_mostly = FLUSH_TLB_THRESHOLD; void __init parisc_setup_cache_timing(void) { unsigned long rangetime, alltime; - unsigned long size; + unsigned long size, start; alltime = mfctl(16); flush_data_cache(); @@ -364,14 +367,43 @@ void __init parisc_setup_cache_timing(void) /* Racy, but if we see an intermediate value, it's ok too... */ parisc_cache_flush_threshold = size * alltime / rangetime; - parisc_cache_flush_threshold = (parisc_cache_flush_threshold + L1_CACHE_BYTES - 1) &~ (L1_CACHE_BYTES - 1); + parisc_cache_flush_threshold = L1_CACHE_ALIGN(parisc_cache_flush_threshold); if (!parisc_cache_flush_threshold) parisc_cache_flush_threshold = FLUSH_THRESHOLD; if (parisc_cache_flush_threshold > cache_info.dc_size) parisc_cache_flush_threshold = cache_info.dc_size; - printk(KERN_INFO "Setting cache flush threshold to %x (%d CPUs online)\n", parisc_cache_flush_threshold, num_online_cpus()); + printk(KERN_INFO "Setting cache flush threshold to %lu kB\n", + parisc_cache_flush_threshold/1024); + + /* calculate TLB flush threshold */ + + alltime = mfctl(16); + flush_tlb_all(); + alltime = mfctl(16) - alltime; + + size = PAGE_SIZE; + start = (unsigned long) _text; + rangetime = mfctl(16); + while (start < (unsigned long) _end) { + flush_tlb_kernel_range(start, start + PAGE_SIZE); + start += PAGE_SIZE; + size += PAGE_SIZE; + } + rangetime = mfctl(16) - rangetime; + + printk(KERN_DEBUG "Whole TLB flush %lu cycles, flushing %lu bytes %lu cycles\n", + alltime, size, rangetime); + + parisc_tlb_flush_threshold = size * alltime / rangetime; + parisc_tlb_flush_threshold *= num_online_cpus(); + parisc_tlb_flush_threshold = PAGE_ALIGN(parisc_tlb_flush_threshold); + if (!parisc_tlb_flush_threshold) + parisc_tlb_flush_threshold = FLUSH_TLB_THRESHOLD; + + printk(KERN_INFO "Setting TLB flush threshold to %lu kB\n", + parisc_tlb_flush_threshold/1024); } extern void purge_kernel_dcache_page_asm(unsigned long); @@ -403,48 +435,45 @@ void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, } EXPORT_SYMBOL(copy_user_page); -void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) -{ - unsigned long flags; - - /* Note: purge_tlb_entries can be called at startup with - no context. */ - - purge_tlb_start(flags); - mtsp(mm->context, 1); - pdtlb(addr); - pitlb(addr); - purge_tlb_end(flags); -} -EXPORT_SYMBOL(purge_tlb_entries); - -void __flush_tlb_range(unsigned long sid, unsigned long start, - unsigned long end) +/* __flush_tlb_range() + * + * returns 1 if all TLBs were flushed. + */ +int __flush_tlb_range(unsigned long sid, unsigned long start, + unsigned long end) { - unsigned long npages; + unsigned long flags, size; - npages = ((end - (start & PAGE_MASK)) + (PAGE_SIZE - 1)) >> PAGE_SHIFT; - if (npages >= 512) /* 2MB of space: arbitrary, should be tuned */ + size = (end - start); + if (size >= parisc_tlb_flush_threshold) { flush_tlb_all(); - else { - unsigned long flags; + return 1; + } + /* Purge TLB entries for small ranges using the pdtlb and + pitlb instructions. These instructions execute locally + but cause a purge request to be broadcast to other TLBs. */ + if (likely(!split_tlb)) { + while (start < end) { + purge_tlb_start(flags); + mtsp(sid, 1); + pdtlb(start); + purge_tlb_end(flags); + start += PAGE_SIZE; + } + return 0; + } + + /* split TLB case */ + while (start < end) { purge_tlb_start(flags); mtsp(sid, 1); - if (split_tlb) { - while (npages--) { - pdtlb(start); - pitlb(start); - start += PAGE_SIZE; - } - } else { - while (npages--) { - pdtlb(start); - start += PAGE_SIZE; - } - } + pdtlb(start); + pitlb(start); purge_tlb_end(flags); + start += PAGE_SIZE; } + return 0; } static void cacheflush_h_tmp_function(void *dummy) diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 75819617f93b..c5ef4081b01d 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -45,7 +45,7 @@ .level 2.0 #endif - .import pa_dbit_lock,data + .import pa_tlb_lock,data /* space_to_prot macro creates a prot id from a space id */ @@ -420,8 +420,8 @@ SHLREG %r9,PxD_VALUE_SHIFT,\pmd extru \va,31-PAGE_SHIFT,ASM_BITS_PER_PTE,\index dep %r0,31,PAGE_SHIFT,\pmd /* clear offset */ - shladd \index,BITS_PER_PTE_ENTRY,\pmd,\pmd - LDREG %r0(\pmd),\pte /* pmd is now pte */ + shladd \index,BITS_PER_PTE_ENTRY,\pmd,\pmd /* pmd is now pte */ + LDREG %r0(\pmd),\pte bb,>=,n \pte,_PAGE_PRESENT_BIT,\fault .endm @@ -453,57 +453,53 @@ L2_ptep \pgd,\pte,\index,\va,\fault .endm - /* Acquire pa_dbit_lock lock. */ - .macro dbit_lock spc,tmp,tmp1 + /* Acquire pa_tlb_lock lock and recheck page is still present. */ + .macro tlb_lock spc,ptp,pte,tmp,tmp1,fault #ifdef CONFIG_SMP cmpib,COND(=),n 0,\spc,2f - load32 PA(pa_dbit_lock),\tmp + load32 PA(pa_tlb_lock),\tmp 1: LDCW 0(\tmp),\tmp1 cmpib,COND(=) 0,\tmp1,1b nop + LDREG 0(\ptp),\pte + bb,<,n \pte,_PAGE_PRESENT_BIT,2f + b \fault + stw \spc,0(\tmp) 2: #endif .endm - /* Release pa_dbit_lock lock without reloading lock address. */ - .macro dbit_unlock0 spc,tmp + /* Release pa_tlb_lock lock without reloading lock address. */ + .macro tlb_unlock0 spc,tmp #ifdef CONFIG_SMP or,COND(=) %r0,\spc,%r0 stw \spc,0(\tmp) #endif .endm - /* Release pa_dbit_lock lock. */ - .macro dbit_unlock1 spc,tmp + /* Release pa_tlb_lock lock. */ + .macro tlb_unlock1 spc,tmp #ifdef CONFIG_SMP - load32 PA(pa_dbit_lock),\tmp - dbit_unlock0 \spc,\tmp + load32 PA(pa_tlb_lock),\tmp + tlb_unlock0 \spc,\tmp #endif .endm /* Set the _PAGE_ACCESSED bit of the PTE. Be clever and * don't needlessly dirty the cache line if it was already set */ - .macro update_ptep spc,ptep,pte,tmp,tmp1 -#ifdef CONFIG_SMP - or,COND(=) %r0,\spc,%r0 - LDREG 0(\ptep),\pte -#endif + .macro update_accessed ptp,pte,tmp,tmp1 ldi _PAGE_ACCESSED,\tmp1 or \tmp1,\pte,\tmp and,COND(<>) \tmp1,\pte,%r0 - STREG \tmp,0(\ptep) + STREG \tmp,0(\ptp) .endm /* Set the dirty bit (and accessed bit). No need to be * clever, this is only used from the dirty fault */ - .macro update_dirty spc,ptep,pte,tmp -#ifdef CONFIG_SMP - or,COND(=) %r0,\spc,%r0 - LDREG 0(\ptep),\pte -#endif + .macro update_dirty ptp,pte,tmp ldi _PAGE_ACCESSED|_PAGE_DIRTY,\tmp or \tmp,\pte,\pte - STREG \pte,0(\ptep) + STREG \pte,0(\ptp) .endm /* bitshift difference between a PFN (based on kernel's PAGE_SIZE) @@ -1148,14 +1144,14 @@ dtlb_miss_20w: L3_ptep ptp,pte,t0,va,dtlb_check_alias_20w - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,dtlb_check_alias_20w + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot idtlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1174,14 +1170,14 @@ nadtlb_miss_20w: L3_ptep ptp,pte,t0,va,nadtlb_check_alias_20w - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,nadtlb_check_alias_20w + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot idtlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1202,20 +1198,20 @@ dtlb_miss_11: L2_ptep ptp,pte,t0,va,dtlb_check_alias_11 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,dtlb_check_alias_11 + update_accessed ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot - mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */ + mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */ mtsp spc,%sr1 idtlba pte,(%sr1,va) idtlbp prot,(%sr1,va) - mtsp t0, %sr1 /* Restore sr1 */ - dbit_unlock1 spc,t0 + mtsp t1, %sr1 /* Restore sr1 */ + tlb_unlock1 spc,t0 rfir nop @@ -1235,21 +1231,20 @@ nadtlb_miss_11: L2_ptep ptp,pte,t0,va,nadtlb_check_alias_11 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,nadtlb_check_alias_11 + update_accessed ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot - - mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */ + mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */ mtsp spc,%sr1 idtlba pte,(%sr1,va) idtlbp prot,(%sr1,va) - mtsp t0, %sr1 /* Restore sr1 */ - dbit_unlock1 spc,t0 + mtsp t1, %sr1 /* Restore sr1 */ + tlb_unlock1 spc,t0 rfir nop @@ -1269,16 +1264,16 @@ dtlb_miss_20: L2_ptep ptp,pte,t0,va,dtlb_check_alias_20 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,dtlb_check_alias_20 + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot - f_extend pte,t0 + f_extend pte,t1 idtlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1297,16 +1292,16 @@ nadtlb_miss_20: L2_ptep ptp,pte,t0,va,nadtlb_check_alias_20 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,nadtlb_check_alias_20 + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot - f_extend pte,t0 + f_extend pte,t1 - idtlbt pte,prot - dbit_unlock1 spc,t0 + idtlbt pte,prot + tlb_unlock1 spc,t0 rfir nop @@ -1406,14 +1401,14 @@ itlb_miss_20w: L3_ptep ptp,pte,t0,va,itlb_fault - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,itlb_fault + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot iitlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1430,14 +1425,14 @@ naitlb_miss_20w: L3_ptep ptp,pte,t0,va,naitlb_check_alias_20w - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,naitlb_check_alias_20w + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot iitlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1458,20 +1453,20 @@ itlb_miss_11: L2_ptep ptp,pte,t0,va,itlb_fault - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,itlb_fault + update_accessed ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot - mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */ + mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */ mtsp spc,%sr1 iitlba pte,(%sr1,va) iitlbp prot,(%sr1,va) - mtsp t0, %sr1 /* Restore sr1 */ - dbit_unlock1 spc,t0 + mtsp t1, %sr1 /* Restore sr1 */ + tlb_unlock1 spc,t0 rfir nop @@ -1482,20 +1477,20 @@ naitlb_miss_11: L2_ptep ptp,pte,t0,va,naitlb_check_alias_11 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,naitlb_check_alias_11 + update_accessed ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot - mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */ + mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */ mtsp spc,%sr1 iitlba pte,(%sr1,va) iitlbp prot,(%sr1,va) - mtsp t0, %sr1 /* Restore sr1 */ - dbit_unlock1 spc,t0 + mtsp t1, %sr1 /* Restore sr1 */ + tlb_unlock1 spc,t0 rfir nop @@ -1516,16 +1511,16 @@ itlb_miss_20: L2_ptep ptp,pte,t0,va,itlb_fault - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,itlb_fault + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot - f_extend pte,t0 + f_extend pte,t1 iitlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1536,16 +1531,16 @@ naitlb_miss_20: L2_ptep ptp,pte,t0,va,naitlb_check_alias_20 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,naitlb_check_alias_20 + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot - f_extend pte,t0 + f_extend pte,t1 iitlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1568,14 +1563,14 @@ dbit_trap_20w: L3_ptep ptp,pte,t0,va,dbit_fault - dbit_lock spc,t0,t1 - update_dirty spc,ptp,pte,t1 + tlb_lock spc,ptp,pte,t0,t1,dbit_fault + update_dirty ptp,pte,t1 make_insert_tlb spc,pte,prot idtlbt pte,prot - dbit_unlock0 spc,t0 + tlb_unlock0 spc,t0 rfir nop #else @@ -1588,8 +1583,8 @@ dbit_trap_11: L2_ptep ptp,pte,t0,va,dbit_fault - dbit_lock spc,t0,t1 - update_dirty spc,ptp,pte,t1 + tlb_lock spc,ptp,pte,t0,t1,dbit_fault + update_dirty ptp,pte,t1 make_insert_tlb_11 spc,pte,prot @@ -1600,8 +1595,8 @@ dbit_trap_11: idtlbp prot,(%sr1,va) mtsp t1, %sr1 /* Restore sr1 */ - dbit_unlock0 spc,t0 + tlb_unlock0 spc,t0 rfir nop @@ -1612,16 +1607,16 @@ dbit_trap_20: L2_ptep ptp,pte,t0,va,dbit_fault - dbit_lock spc,t0,t1 - update_dirty spc,ptp,pte,t1 + tlb_lock spc,ptp,pte,t0,t1,dbit_fault + update_dirty ptp,pte,t1 make_insert_tlb spc,pte,prot f_extend pte,t1 - idtlbt pte,prot - dbit_unlock0 spc,t0 + idtlbt pte,prot + tlb_unlock0 spc,t0 rfir nop #endif diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 6548fd1d2e62..b99b39f1da02 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -43,10 +43,6 @@ #include "../math-emu/math-emu.h" /* for handle_fpe() */ -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) -DEFINE_SPINLOCK(pa_dbit_lock); -#endif - static void parisc_show_stack(struct task_struct *task, unsigned long *sp, struct pt_regs *regs); diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index ccde8f084ce4..112ccf497562 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -52,6 +52,22 @@ .text /* + * Used by threads when the lock bit of core_idle_state is set. + * Threads will spin in HMT_LOW until the lock bit is cleared. + * r14 - pointer to core_idle_state + * r15 - used to load contents of core_idle_state + */ + +core_idle_lock_held: + HMT_LOW +3: lwz r15,0(r14) + andi. r15,r15,PNV_CORE_IDLE_LOCK_BIT + bne 3b + HMT_MEDIUM + lwarx r15,0,r14 + blr + +/* * Pass requested state in r3: * r3 - PNV_THREAD_NAP/SLEEP/WINKLE * @@ -150,6 +166,10 @@ power7_enter_nap_mode: ld r14,PACA_CORE_IDLE_STATE_PTR(r13) lwarx_loop1: lwarx r15,0,r14 + + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + bnel core_idle_lock_held + andc r15,r15,r7 /* Clear thread bit */ andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS @@ -294,7 +314,7 @@ lwarx_loop2: * workaround undo code or resyncing timebase or restoring context * In either case loop until the lock bit is cleared. */ - bne core_idle_lock_held + bnel core_idle_lock_held cmpwi cr2,r15,0 lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) @@ -319,15 +339,6 @@ lwarx_loop2: isync b common_exit -core_idle_lock_held: - HMT_LOW -core_idle_lock_loop: - lwz r15,0(14) - andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT - bne core_idle_lock_loop - HMT_MEDIUM - b lwarx_loop2 - first_thread_in_subcore: /* First thread in subcore to wakeup */ ori r15,r15,PNV_CORE_IDLE_LOCK_BIT diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 6530f1b8874d..37de90f8a845 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -297,6 +297,8 @@ long machine_check_early(struct pt_regs *regs) __this_cpu_inc(irq_stat.mce_exceptions); + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); + if (cur_cpu_spec && cur_cpu_spec->machine_check_early) handled = cur_cpu_spec->machine_check_early(regs); return handled; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 6d535973b200..a67c6d781c52 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -529,6 +529,10 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) printk(KERN_ALERT "Unable to handle kernel paging request for " "instruction fetch\n"); break; + case 0x600: + printk(KERN_ALERT "Unable to handle kernel paging request for " + "unaligned access at address 0x%08lx\n", regs->dar); + break; default: printk(KERN_ALERT "Unable to handle kernel paging request for " "unknown fault\n"); diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index ec2eb20631d1..df956295c2a7 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -320,6 +320,8 @@ static struct attribute *device_str_attr_create_(char *name, char *str) if (!attr) return NULL; + sysfs_attr_init(&attr->attr.attr); + attr->var = str; attr->attr.attr.name = name; attr->attr.attr.mode = 0444; diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 1ba6307be4db..11634fa7ab3c 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -166,7 +166,7 @@ static void spufs_prune_dir(struct dentry *dir) mutex_lock(&d_inode(dir)->i_mutex); list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_child) { spin_lock(&dentry->d_lock); - if (!(d_unhashed(dentry)) && d_really_is_positive(dentry)) { + if (simple_positive(dentry)) { dget_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 4949ef0d9400..37f959bf392e 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -237,7 +237,7 @@ static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type) return elog; } -static void elog_work_fn(struct work_struct *work) +static irqreturn_t elog_event(int irq, void *data) { __be64 size; __be64 id; @@ -251,7 +251,7 @@ static void elog_work_fn(struct work_struct *work) rc = opal_get_elog_size(&id, &size, &type); if (rc != OPAL_SUCCESS) { pr_err("ELOG: OPAL log info read failed\n"); - return; + return IRQ_HANDLED; } elog_size = be64_to_cpu(size); @@ -270,16 +270,10 @@ static void elog_work_fn(struct work_struct *work) * entries. */ if (kset_find_obj(elog_kset, name)) - return; + return IRQ_HANDLED; create_elog_obj(log_id, elog_size, elog_type); -} - -static DECLARE_WORK(elog_work, elog_work_fn); -static irqreturn_t elog_event(int irq, void *data) -{ - schedule_work(&elog_work); return IRQ_HANDLED; } @@ -304,8 +298,8 @@ int __init opal_elog_init(void) return irq; } - rc = request_irq(irq, elog_event, - IRQ_TYPE_LEVEL_HIGH, "opal-elog", NULL); + rc = request_threaded_irq(irq, NULL, elog_event, + IRQF_TRIGGER_HIGH | IRQF_ONESHOT, "opal-elog", NULL); if (rc) { pr_err("%s: Can't request OPAL event irq (%d)\n", __func__, rc); diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c index 46cb3feb0a13..4ece8e40dd54 100644 --- a/arch/powerpc/platforms/powernv/opal-prd.c +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -112,6 +112,7 @@ static int opal_prd_open(struct inode *inode, struct file *file) static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma) { size_t addr, size; + pgprot_t page_prot; int rc; pr_devel("opal_prd_mmap(0x%016lx, 0x%016lx, 0x%lx, 0x%lx)\n", @@ -125,13 +126,11 @@ static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma) if (!opal_prd_range_is_valid(addr, size)) return -EINVAL; - vma->vm_page_prot = __pgprot(pgprot_val(phys_mem_access_prot(file, - vma->vm_pgoff, - size, vma->vm_page_prot)) - | _PAGE_SPECIAL); + page_prot = phys_mem_access_prot(file, vma->vm_pgoff, + size, vma->vm_page_prot); rc = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size, - vma->vm_page_prot); + page_prot); return rc; } diff --git a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c index 2bc33674ebfc..87f9623ca805 100644 --- a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c @@ -18,6 +18,7 @@ #include <linux/pci.h> #include <linux/semaphore.h> #include <asm/msi_bitmap.h> +#include <asm/ppc-pci.h> struct ppc4xx_hsta_msi { struct device *dev; diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index d3f896a35b98..b2e5902bd8f4 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -62,18 +62,13 @@ static void hypfs_add_dentry(struct dentry *dentry) hypfs_last_dentry = dentry; } -static inline int hypfs_positive(struct dentry *dentry) -{ - return d_really_is_positive(dentry) && !d_unhashed(dentry); -} - static void hypfs_remove(struct dentry *dentry) { struct dentry *parent; parent = dentry->d_parent; mutex_lock(&d_inode(parent)->i_mutex); - if (hypfs_positive(dentry)) { + if (simple_positive(dentry)) { if (d_is_dir(dentry)) simple_rmdir(d_inode(parent), dentry); else @@ -456,8 +451,6 @@ static const struct super_operations hypfs_s_ops = { .show_options = hypfs_show_options, }; -static struct kobject *s390_kobj; - static int __init hypfs_init(void) { int rc; @@ -481,18 +474,16 @@ static int __init hypfs_init(void) rc = -ENODATA; goto fail_hypfs_sprp_exit; } - s390_kobj = kobject_create_and_add("s390", hypervisor_kobj); - if (!s390_kobj) { - rc = -ENOMEM; + rc = sysfs_create_mount_point(hypervisor_kobj, "s390"); + if (rc) goto fail_hypfs_diag0c_exit; - } rc = register_filesystem(&hypfs_type); if (rc) goto fail_filesystem; return 0; fail_filesystem: - kobject_put(s390_kobj); + sysfs_remove_mount_point(hypervisor_kobj, "s390"); fail_hypfs_diag0c_exit: hypfs_diag0c_exit(); fail_hypfs_sprp_exit: @@ -510,7 +501,7 @@ fail_dbfs_exit: static void __exit hypfs_exit(void) { unregister_filesystem(&hypfs_type); - kobject_put(s390_kobj); + sysfs_remove_mount_point(hypervisor_kobj, "s390"); hypfs_diag0c_exit(); hypfs_sprp_exit(); hypfs_vm_exit(); diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index 35d34635e4f1..402b9c85a894 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -332,7 +332,7 @@ static void describe_addr(struct KBacktraceIterator *kbt, } if (vma->vm_file) { - p = d_path(&vma->vm_file->f_path, buf, bufsize); + p = file_path(vma->vm_file, buf, bufsize); if (IS_ERR(p)) p = "?"; name = kbasename(p); diff --git a/arch/tile/lib/memcpy_user_64.c b/arch/tile/lib/memcpy_user_64.c index 88c7016492c4..97bbb6060b25 100644 --- a/arch/tile/lib/memcpy_user_64.c +++ b/arch/tile/lib/memcpy_user_64.c @@ -28,7 +28,7 @@ #define _ST(p, inst, v) \ ({ \ asm("1: " #inst " %0, %1;" \ - ".pushsection .coldtext.memcpy,\"ax\";" \ + ".pushsection .coldtext,\"ax\";" \ "2: { move r0, %2; jrp lr };" \ ".section __ex_table,\"a\";" \ ".align 8;" \ @@ -41,7 +41,7 @@ ({ \ unsigned long __v; \ asm("1: " #inst " %0, %1;" \ - ".pushsection .coldtext.memcpy,\"ax\";" \ + ".pushsection .coldtext,\"ax\";" \ "2: { move r0, %2; jrp lr };" \ ".section __ex_table,\"a\";" \ ".align 8;" \ diff --git a/arch/tile/mm/elf.c b/arch/tile/mm/elf.c index f7ddae3725a4..6225cc998db1 100644 --- a/arch/tile/mm/elf.c +++ b/arch/tile/mm/elf.c @@ -56,7 +56,7 @@ static int notify_exec(struct mm_struct *mm) if (exe_file == NULL) goto done_free; - path = d_path(&exe_file->f_path, buf, PAGE_SIZE); + path = file_path(exe_file, buf, PAGE_SIZE); if (IS_ERR(path)) goto done_put; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 55bced17dc95..3dbb7e7909ca 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -254,6 +254,11 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y +config KASAN_SHADOW_OFFSET + hex + depends on KASAN + default 0xdffffc0000000000 + config HAVE_INTEL_TXT def_bool y depends on INTEL_IOMMU && ACPI @@ -2015,7 +2020,7 @@ config CMDLINE_BOOL To compile command line arguments into the kernel, set this option to 'Y', then fill in the - the boot arguments in CONFIG_CMDLINE. + boot arguments in CONFIG_CMDLINE. Systems with fully functional boot loaders (i.e. non-embedded) should leave this option set to 'N'. diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h index 99efebb2f69d..ca3ce9ab9385 100644 --- a/arch/x86/include/asm/espfix.h +++ b/arch/x86/include/asm/espfix.h @@ -9,7 +9,7 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack); DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr); extern void init_espfix_bsp(void); -extern void init_espfix_ap(void); +extern void init_espfix_ap(int cpu); #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/include/asm/intel_pmc_ipc.h b/arch/x86/include/asm/intel_pmc_ipc.h new file mode 100644 index 000000000000..200ec2e7821d --- /dev/null +++ b/arch/x86/include/asm/intel_pmc_ipc.h @@ -0,0 +1,82 @@ +#ifndef _ASM_X86_INTEL_PMC_IPC_H_ +#define _ASM_X86_INTEL_PMC_IPC_H_ + +/* Commands */ +#define PMC_IPC_PMIC_ACCESS 0xFF +#define PMC_IPC_PMIC_ACCESS_READ 0x0 +#define PMC_IPC_PMIC_ACCESS_WRITE 0x1 +#define PMC_IPC_USB_PWR_CTRL 0xF0 +#define PMC_IPC_PMIC_BLACKLIST_SEL 0xEF +#define PMC_IPC_PHY_CONFIG 0xEE +#define PMC_IPC_NORTHPEAK_CTRL 0xED +#define PMC_IPC_PM_DEBUG 0xEC +#define PMC_IPC_PMC_TELEMTRY 0xEB +#define PMC_IPC_PMC_FW_MSG_CTRL 0xEA + +/* IPC return code */ +#define IPC_ERR_NONE 0 +#define IPC_ERR_CMD_NOT_SUPPORTED 1 +#define IPC_ERR_CMD_NOT_SERVICED 2 +#define IPC_ERR_UNABLE_TO_SERVICE 3 +#define IPC_ERR_CMD_INVALID 4 +#define IPC_ERR_CMD_FAILED 5 +#define IPC_ERR_EMSECURITY 6 +#define IPC_ERR_UNSIGNEDKERNEL 7 + +#if IS_ENABLED(CONFIG_INTEL_PMC_IPC) + +/* + * intel_pmc_ipc_simple_command + * @cmd: command + * @sub: sub type + */ +int intel_pmc_ipc_simple_command(int cmd, int sub); + +/* + * intel_pmc_ipc_raw_cmd + * @cmd: command + * @sub: sub type + * @in: input data + * @inlen: input length in bytes + * @out: output data + * @outlen: output length in dwords + * @sptr: data writing to SPTR register + * @dptr: data writing to DPTR register + */ +int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen, + u32 *out, u32 outlen, u32 dptr, u32 sptr); + +/* + * intel_pmc_ipc_command + * @cmd: command + * @sub: sub type + * @in: input data + * @inlen: input length in bytes + * @out: output data + * @outlen: output length in dwords + */ +int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen, + u32 *out, u32 outlen); + +#else + +static inline int intel_pmc_ipc_simple_command(int cmd, int sub) +{ + return -EINVAL; +} + +static inline int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen, + u32 *out, u32 outlen, u32 dptr, u32 sptr) +{ + return -EINVAL; +} + +static inline int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen, + u32 *out, u32 outlen) +{ + return -EINVAL; +} + +#endif /*CONFIG_INTEL_PMC_IPC*/ + +#endif diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h index 8b22422fbad8..74a2a8dc9908 100644 --- a/arch/x86/include/asm/kasan.h +++ b/arch/x86/include/asm/kasan.h @@ -14,15 +14,11 @@ #ifndef __ASSEMBLY__ -extern pte_t kasan_zero_pte[]; -extern pte_t kasan_zero_pmd[]; -extern pte_t kasan_zero_pud[]; - #ifdef CONFIG_KASAN -void __init kasan_map_early_shadow(pgd_t *pgd); +void __init kasan_early_init(void); void __init kasan_init(void); #else -static inline void kasan_map_early_shadow(pgd_t *pgd) { } +static inline void kasan_early_init(void) { } static inline void kasan_init(void) { } #endif diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c7fa57b529d2..2a7f5d782c33 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -607,7 +607,7 @@ struct kvm_arch { struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; - int vapics_in_nmi_mode; + atomic_t vapics_in_nmi_mode; struct mutex apic_map_lock; struct kvm_apic_map *apic_map; diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index ce6068dbcfbc..8fba544e9cc4 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -199,6 +199,17 @@ #define HV_X64_MSR_STIMER3_CONFIG 0x400000B6 #define HV_X64_MSR_STIMER3_COUNT 0x400000B7 +/* Hyper-V guest crash notification MSR's */ +#define HV_X64_MSR_CRASH_P0 0x40000100 +#define HV_X64_MSR_CRASH_P1 0x40000101 +#define HV_X64_MSR_CRASH_P2 0x40000102 +#define HV_X64_MSR_CRASH_P3 0x40000103 +#define HV_X64_MSR_CRASH_P4 0x40000104 +#define HV_X64_MSR_CRASH_CTL 0x40000105 +#define HV_X64_MSR_CRASH_CTL_NOTIFY (1ULL << 63) +#define HV_X64_MSR_CRASH_PARAMS \ + (1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0)) + #define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001 #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12 #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \ diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 28eba2d38b15..f813261d9740 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -409,12 +409,6 @@ static void __setup_vector_irq(int cpu) int irq, vector; struct apic_chip_data *data; - /* - * vector_lock will make sure that we don't run into irq vector - * assignments that might be happening on another cpu in parallel, - * while we setup our initial vector to irq mappings. - */ - raw_spin_lock(&vector_lock); /* Mark the inuse vectors */ for_each_active_irq(irq) { data = apic_chip_data(irq_get_irq_data(irq)); @@ -436,16 +430,16 @@ static void __setup_vector_irq(int cpu) if (!cpumask_test_cpu(cpu, data->domain)) per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; } - raw_spin_unlock(&vector_lock); } /* - * Setup the vector to irq mappings. + * Setup the vector to irq mappings. Must be called with vector_lock held. */ void setup_vector_irq(int cpu) { int irq; + lockdep_assert_held(&vector_lock); /* * On most of the platforms, legacy PIC delivers the interrupts on the * boot cpu. But there are certain platforms where PIC interrupts are diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9fc5e3d9d9c8..922c5e0cea4c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -742,7 +742,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) cpu_detect(c); get_cpu_vendor(c); get_cpu_cap(c); - fpu__init_system(c); if (this_cpu->c_early_init) this_cpu->c_early_init(c); @@ -754,6 +753,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) this_cpu->c_bsp_init(c); setup_force_cpu_cap(X86_FEATURE_ALWAYS); + fpu__init_system(c); } void __init early_cpu_init(void) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5801a14f7524..3658de47900f 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -357,34 +357,24 @@ void x86_release_hardware(void) */ int x86_add_exclusive(unsigned int what) { - int ret = -EBUSY, i; - - if (atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) - return 0; + int i; - mutex_lock(&pmc_reserve_mutex); - for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) { - if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i])) - goto out; + if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) { + mutex_lock(&pmc_reserve_mutex); + for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) { + if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i])) + goto fail_unlock; + } + atomic_inc(&x86_pmu.lbr_exclusive[what]); + mutex_unlock(&pmc_reserve_mutex); } - atomic_inc(&x86_pmu.lbr_exclusive[what]); - ret = 0; + atomic_inc(&active_events); + return 0; -out: +fail_unlock: mutex_unlock(&pmc_reserve_mutex); - - /* - * Assuming that all exclusive events will share the PMI handler - * (which checks active_events for whether there is work to do), - * we can bump active_events counter right here, except for - * x86_lbr_exclusive_lbr events that go through x86_pmu_event_init() - * path, which already bumps active_events for them. - */ - if (!ret && what != x86_lbr_exclusive_lbr) - atomic_inc(&active_events); - - return ret; + return -EBUSY; } void x86_del_exclusive(unsigned int what) diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 89427d8d4fc5..eec40f595ab9 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -175,7 +175,9 @@ static __init void early_serial_init(char *s) } if (*s) { - if (kstrtoul(s, 0, &baud) < 0 || baud == 0) + baud = simple_strtoull(s, &e, 0); + + if (baud == 0 || s == e) baud = DEFAULT_BAUD; } diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index f5d0730e7b08..ce95676abd60 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -131,25 +131,24 @@ void __init init_espfix_bsp(void) init_espfix_random(); /* The rest is the same as for any other processor */ - init_espfix_ap(); + init_espfix_ap(0); } -void init_espfix_ap(void) +void init_espfix_ap(int cpu) { - unsigned int cpu, page; + unsigned int page; unsigned long addr; pud_t pud, *pud_p; pmd_t pmd, *pmd_p; pte_t pte, *pte_p; - int n; + int n, node; void *stack_page; pteval_t ptemask; /* We only have to do this once... */ - if (likely(this_cpu_read(espfix_stack))) + if (likely(per_cpu(espfix_stack, cpu))) return; /* Already initialized */ - cpu = smp_processor_id(); addr = espfix_base_addr(cpu); page = cpu/ESPFIX_STACKS_PER_PAGE; @@ -165,12 +164,15 @@ void init_espfix_ap(void) if (stack_page) goto unlock_done; + node = cpu_to_node(cpu); ptemask = __supported_pte_mask; pud_p = &espfix_pud_page[pud_index(addr)]; pud = *pud_p; if (!pud_present(pud)) { - pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP); + struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0); + + pmd_p = (pmd_t *)page_address(page); pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask)); paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT); for (n = 0; n < ESPFIX_PUD_CLONES; n++) @@ -180,7 +182,9 @@ void init_espfix_ap(void) pmd_p = pmd_offset(&pud, addr); pmd = *pmd_p; if (!pmd_present(pmd)) { - pte_p = (pte_t *)__get_free_page(PGALLOC_GFP); + struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0); + + pte_p = (pte_t *)page_address(page); pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask)); paravirt_alloc_pte(&init_mm, __pa(pte_p) >> PAGE_SHIFT); for (n = 0; n < ESPFIX_PMD_CLONES; n++) @@ -188,7 +192,7 @@ void init_espfix_ap(void) } pte_p = pte_offset_kernel(&pmd, addr); - stack_page = (void *)__get_free_page(GFP_KERNEL); + stack_page = page_address(alloc_pages_node(node, GFP_KERNEL, 0)); pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask)); for (n = 0; n < ESPFIX_PTE_CLONES; n++) set_pte(&pte_p[n*PTE_STRIDE], pte); @@ -199,7 +203,7 @@ void init_espfix_ap(void) unlock_done: mutex_unlock(&espfix_init_mutex); done: - this_cpu_write(espfix_stack, addr); - this_cpu_write(espfix_waddr, (unsigned long)stack_page - + (addr & ~PAGE_MASK)); + per_cpu(espfix_stack, cpu) = addr; + per_cpu(espfix_waddr, cpu) = (unsigned long)stack_page + + (addr & ~PAGE_MASK); } diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index fc878fee6a51..32826791e675 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -95,11 +95,12 @@ static void __init fpu__init_system_mxcsr(void) unsigned int mask = 0; if (cpu_has_fxsr) { - struct fxregs_state fx_tmp __aligned(32) = { }; + /* Static because GCC does not get 16-byte stack alignment right: */ + static struct fxregs_state fxregs __initdata; - asm volatile("fxsave %0" : "+m" (fx_tmp)); + asm volatile("fxsave %0" : "+m" (fxregs)); - mask = fx_tmp.mxcsr_mask; + mask = fxregs.mxcsr_mask; /* * If zero then use the default features mask, diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 5a4668136e98..f129a9af6357 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -161,11 +161,12 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) /* Kill off the identity-map trampoline */ reset_early_page_tables(); - kasan_map_early_shadow(early_level4_pgt); - - /* clear bss before set_intr_gate with early_idt_handler */ clear_bss(); + clear_page(init_level4_pgt); + + kasan_early_init(); + for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) set_intr_gate(i, early_idt_handler_array[i]); load_idt((const struct desc_ptr *)&idt_descr); @@ -177,12 +178,9 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) */ load_ucode_bsp(); - clear_page(init_level4_pgt); /* set init_level4_pgt kernel high mapping*/ init_level4_pgt[511] = early_level4_pgt[511]; - kasan_map_early_shadow(init_level4_pgt); - x86_64_start_reservations(real_mode_data); } diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index e5c27f729a38..1d40ca8a73f2 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -516,38 +516,9 @@ ENTRY(phys_base) /* This must match the first entry in level2_kernel_pgt */ .quad 0x0000000000000000 -#ifdef CONFIG_KASAN -#define FILL(VAL, COUNT) \ - .rept (COUNT) ; \ - .quad (VAL) ; \ - .endr - -NEXT_PAGE(kasan_zero_pte) - FILL(kasan_zero_page - __START_KERNEL_map + _KERNPG_TABLE, 512) -NEXT_PAGE(kasan_zero_pmd) - FILL(kasan_zero_pte - __START_KERNEL_map + _KERNPG_TABLE, 512) -NEXT_PAGE(kasan_zero_pud) - FILL(kasan_zero_pmd - __START_KERNEL_map + _KERNPG_TABLE, 512) - -#undef FILL -#endif - - #include "../../x86/xen/xen-head.S" __PAGE_ALIGNED_BSS NEXT_PAGE(empty_zero_page) .skip PAGE_SIZE -#ifdef CONFIG_KASAN -/* - * This page used as early shadow. We don't use empty_zero_page - * at early stages, stack instrumentation could write some garbage - * to this page. - * Latter we reuse it as zero shadow for large ranges of memory - * that allowed to access, but not instrumented by kasan - * (vmalloc/vmemmap ...). - */ -NEXT_PAGE(kasan_zero_page) - .skip PAGE_SIZE -#endif diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 88b366487b0e..c7dfe1be784e 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -347,14 +347,22 @@ int check_irq_vectors_for_cpu_disable(void) if (!desc) continue; + /* + * Protect against concurrent action removal, + * affinity changes etc. + */ + raw_spin_lock(&desc->lock); data = irq_desc_get_irq_data(desc); cpumask_copy(&affinity_new, data->affinity); cpumask_clear_cpu(this_cpu, &affinity_new); /* Do not count inactive or per-cpu irqs. */ - if (!irq_has_action(irq) || irqd_is_per_cpu(data)) + if (!irq_has_action(irq) || irqd_is_per_cpu(data)) { + raw_spin_unlock(&desc->lock); continue; + } + raw_spin_unlock(&desc->lock); /* * A single irq may be mapped to multiple * cpu's vector_irq[] (for example IOAPIC cluster @@ -385,6 +393,9 @@ int check_irq_vectors_for_cpu_disable(void) * vector. If the vector is marked in the used vectors * bitmap or an irq is assigned to it, we don't count * it as available. + * + * As this is an inaccurate snapshot anyway, we can do + * this w/o holding vector_lock. */ for (vector = FIRST_EXTERNAL_VECTOR; vector < first_system_vector; vector++) { @@ -486,6 +497,11 @@ void fixup_irqs(void) */ mdelay(1); + /* + * We can walk the vector array of this cpu without holding + * vector_lock because the cpu is already marked !online, so + * nothing else will touch it. + */ for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { unsigned int irr; @@ -497,9 +513,9 @@ void fixup_irqs(void) irq = __this_cpu_read(vector_irq[vector]); desc = irq_to_desc(irq); + raw_spin_lock(&desc->lock); data = irq_desc_get_irq_data(desc); chip = irq_data_get_irq_chip(data); - raw_spin_lock(&desc->lock); if (chip->irq_retrigger) { chip->irq_retrigger(data); __this_cpu_write(vector_irq[vector], VECTOR_RETRIGGERED); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d3b95b89e9b2..80f874bf999e 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -461,19 +461,18 @@ static void __init e820_reserve_setup_data(void) { struct setup_data *data; u64 pa_data; - int found = 0; pa_data = boot_params.hdr.setup_data; + if (!pa_data) + return; + while (pa_data) { data = early_memremap(pa_data, sizeof(*data)); e820_update_range(pa_data, sizeof(*data)+data->len, E820_RAM, E820_RESERVED_KERN); - found = 1; pa_data = data->next; early_memunmap(data, sizeof(*data)); } - if (!found) - return; sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); memcpy(&e820_saved, &e820, sizeof(struct e820map)); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8add66b22f33..d3010aa79daf 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -171,11 +171,6 @@ static void smp_callin(void) apic_ap_setup(); /* - * Need to setup vector mappings before we enable interrupts. - */ - setup_vector_irq(smp_processor_id()); - - /* * Save our processor parameters. Note: this information * is needed for clock calibration. */ @@ -239,18 +234,13 @@ static void notrace start_secondary(void *unused) check_tsc_sync_target(); /* - * Enable the espfix hack for this CPU - */ -#ifdef CONFIG_X86_ESPFIX64 - init_espfix_ap(); -#endif - - /* - * We need to hold vector_lock so there the set of online cpus - * does not change while we are assigning vectors to cpus. Holding - * this lock ensures we don't half assign or remove an irq from a cpu. + * Lock vector_lock and initialize the vectors on this cpu + * before setting the cpu online. We must set it online with + * vector_lock held to prevent a concurrent setup/teardown + * from seeing a half valid vector space. */ lock_vector_lock(); + setup_vector_irq(smp_processor_id()); set_cpu_online(smp_processor_id(), true); unlock_vector_lock(); cpu_set_state_online(smp_processor_id()); @@ -854,6 +844,13 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) initial_code = (unsigned long)start_secondary; stack_start = idle->thread.sp; + /* + * Enable the espfix hack for this CPU + */ +#ifdef CONFIG_X86_ESPFIX64 + init_espfix_ap(cpu); +#endif + /* So we see what's up */ announce_cpu(cpu, apicid); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 505449700e0c..7437b41f6a47 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -598,10 +598,19 @@ static unsigned long quick_pit_calibrate(void) if (!pit_expect_msb(0xff-i, &delta, &d2)) break; + delta -= tsc; + + /* + * Extrapolate the error and fail fast if the error will + * never be below 500 ppm. + */ + if (i == 1 && + d1 + d2 >= (delta * MAX_QUICK_PIT_ITERATIONS) >> 11) + return 0; + /* * Iterate until the error is less than 500 ppm */ - delta -= tsc; if (d1+d2 >= delta >> 11) continue; diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 4dce6f8b6129..f90952f64e79 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -305,7 +305,7 @@ static void pit_do_work(struct kthread_work *work) * LVT0 to NMI delivery. Other PIC interrupts are just sent to * VCPU0, and only if its LVT0 is in EXTINT mode. */ - if (kvm->arch.vapics_in_nmi_mode > 0) + if (atomic_read(&kvm->arch.vapics_in_nmi_mode) > 0) kvm_for_each_vcpu(i, vcpu, kvm) kvm_apic_nmi_wd_deliver(vcpu); } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 36e9de1b4127..954e98a8c2e3 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1257,16 +1257,17 @@ static void start_apic_timer(struct kvm_lapic *apic) static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) { - int nmi_wd_enabled = apic_lvt_nmi_mode(kvm_apic_get_reg(apic, APIC_LVT0)); + bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val); - if (apic_lvt_nmi_mode(lvt0_val)) { - if (!nmi_wd_enabled) { + if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) { + apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode; + if (lvt0_in_nmi_mode) { apic_debug("Receive NMI setting on APIC_LVT0 " "for cpu %d\n", apic->vcpu->vcpu_id); - apic->vcpu->kvm->arch.vapics_in_nmi_mode++; - } - } else if (nmi_wd_enabled) - apic->vcpu->kvm->arch.vapics_in_nmi_mode--; + atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); + } else + atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); + } } static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) @@ -1597,6 +1598,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) if (!(vcpu->kvm->arch.disabled_quirks & KVM_QUIRK_LINT0_REENABLED)) apic_set_reg(apic, APIC_LVT0, SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); + apic_manage_nmi_watchdog(apic, kvm_apic_get_reg(apic, APIC_LVT0)); apic_set_reg(apic, APIC_DFR, 0xffffffffU); apic_set_spiv(apic, 0xff); @@ -1822,6 +1824,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, apic_update_ppr(apic); hrtimer_cancel(&apic->lapic_timer.timer); apic_update_lvtt(apic); + apic_manage_nmi_watchdog(apic, kvm_apic_get_reg(apic, APIC_LVT0)); update_divide_count(apic); start_apic_timer(apic); apic->irr_pending = true; diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index f2f4e10ab772..71952748222a 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -26,6 +26,7 @@ struct kvm_lapic { struct kvm_vcpu *vcpu; bool sw_enabled; bool irr_pending; + bool lvt0_in_nmi_mode; /* Number of bits set in ISR. */ s16 isr_count; /* The highest vector set in ISR; if -1 - invalid, must scan ISR. */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ac165c2fb8e5..bbaf44e8f0d3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2379,8 +2379,6 @@ static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { - u64 data; - switch (msr_info->index) { case MSR_IA32_PLATFORM_ID: case MSR_IA32_EBL_CR_POWERON: @@ -2453,7 +2451,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* TSC increment by tick */ msr_info->data = 1000ULL; /* CPU multiplier */ - data |= (((uint64_t)4ULL) << 40); + msr_info->data |= (((uint64_t)4ULL) << 40); break; case MSR_EFER: msr_info->data = vcpu->arch.efer; diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index ddf9ecb53cc3..e342586db6e4 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -20,7 +20,7 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) unsigned long ret; if (__range_not_ok(from, n, TASK_SIZE)) - return 0; + return n; /* * Even though this function is typically called from NMI/IRQ context diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 4860906c6b9f..e1840f3db5b5 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -1,3 +1,4 @@ +#define pr_fmt(fmt) "kasan: " fmt #include <linux/bootmem.h> #include <linux/kasan.h> #include <linux/kdebug.h> @@ -11,7 +12,19 @@ extern pgd_t early_level4_pgt[PTRS_PER_PGD]; extern struct range pfn_mapped[E820_X_MAX]; -extern unsigned char kasan_zero_page[PAGE_SIZE]; +static pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss; +static pmd_t kasan_zero_pmd[PTRS_PER_PMD] __page_aligned_bss; +static pte_t kasan_zero_pte[PTRS_PER_PTE] __page_aligned_bss; + +/* + * This page used as early shadow. We don't use empty_zero_page + * at early stages, stack instrumentation could write some garbage + * to this page. + * Latter we reuse it as zero shadow for large ranges of memory + * that allowed to access, but not instrumented by kasan + * (vmalloc/vmemmap ...). + */ +static unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss; static int __init map_range(struct range *range) { @@ -36,7 +49,7 @@ static void __init clear_pgds(unsigned long start, pgd_clear(pgd_offset_k(start)); } -void __init kasan_map_early_shadow(pgd_t *pgd) +static void __init kasan_map_early_shadow(pgd_t *pgd) { int i; unsigned long start = KASAN_SHADOW_START; @@ -73,7 +86,7 @@ static int __init zero_pmd_populate(pud_t *pud, unsigned long addr, while (IS_ALIGNED(addr, PMD_SIZE) && addr + PMD_SIZE <= end) { WARN_ON(!pmd_none(*pmd)); set_pmd(pmd, __pmd(__pa_nodebug(kasan_zero_pte) - | __PAGE_KERNEL_RO)); + | _KERNPG_TABLE)); addr += PMD_SIZE; pmd = pmd_offset(pud, addr); } @@ -99,7 +112,7 @@ static int __init zero_pud_populate(pgd_t *pgd, unsigned long addr, while (IS_ALIGNED(addr, PUD_SIZE) && addr + PUD_SIZE <= end) { WARN_ON(!pud_none(*pud)); set_pud(pud, __pud(__pa_nodebug(kasan_zero_pmd) - | __PAGE_KERNEL_RO)); + | _KERNPG_TABLE)); addr += PUD_SIZE; pud = pud_offset(pgd, addr); } @@ -124,7 +137,7 @@ static int __init zero_pgd_populate(unsigned long addr, unsigned long end) while (IS_ALIGNED(addr, PGDIR_SIZE) && addr + PGDIR_SIZE <= end) { WARN_ON(!pgd_none(*pgd)); set_pgd(pgd, __pgd(__pa_nodebug(kasan_zero_pud) - | __PAGE_KERNEL_RO)); + | _KERNPG_TABLE)); addr += PGDIR_SIZE; pgd = pgd_offset_k(addr); } @@ -166,6 +179,26 @@ static struct notifier_block kasan_die_notifier = { }; #endif +void __init kasan_early_init(void) +{ + int i; + pteval_t pte_val = __pa_nodebug(kasan_zero_page) | __PAGE_KERNEL; + pmdval_t pmd_val = __pa_nodebug(kasan_zero_pte) | _KERNPG_TABLE; + pudval_t pud_val = __pa_nodebug(kasan_zero_pmd) | _KERNPG_TABLE; + + for (i = 0; i < PTRS_PER_PTE; i++) + kasan_zero_pte[i] = __pte(pte_val); + + for (i = 0; i < PTRS_PER_PMD; i++) + kasan_zero_pmd[i] = __pmd(pmd_val); + + for (i = 0; i < PTRS_PER_PUD; i++) + kasan_zero_pud[i] = __pud(pud_val); + + kasan_map_early_shadow(early_level4_pgt); + kasan_map_early_shadow(init_level4_pgt); +} + void __init kasan_init(void) { int i; @@ -176,6 +209,7 @@ void __init kasan_init(void) memcpy(early_level4_pgt, init_level4_pgt, sizeof(early_level4_pgt)); load_cr3(early_level4_pgt); + __flush_tlb_all(); clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); @@ -202,5 +236,8 @@ void __init kasan_init(void) memset(kasan_zero_page, 0, PAGE_SIZE); load_cr3(init_level4_pgt); + __flush_tlb_all(); init_task.kasan_depth = 0; + + pr_info("Kernel address sanitizer initialized\n"); } diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index f15db002be8e..114cf48085ab 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -80,6 +80,26 @@ config ACPI_PROCFS_POWER Say N to delete power /proc/acpi/ directories that have moved to /sys/ +config ACPI_REV_OVERRIDE_POSSIBLE + bool "Allow supported ACPI revision to be overriden" + depends on X86 + default y + help + The platform firmware on some systems expects Linux to return "5" as + the supported ACPI revision which makes it expose system configuration + information in a special way. + + For example, based on what ACPI exports as the supported revision, + Dell XPS 13 (2015) configures its audio device to either work in HDA + mode or in I2S mode, where the former is supposed to be used on Linux + until the latter is fully supported (in the kernel as well as in user + space). + + This option enables a DMI-based quirk for the above Dell machine (so + that HDA audio is exposed by the platform firmware to the kernel) and + makes it possible to force the kernel to return "5" as the supported + ACPI revision via the "acpi_rev_override" command line switch. + config ACPI_EC_DEBUGFS tristate "EC read/write access through /sys/kernel/debug/ec" default n diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index 569ee090343f..46b58abb08c5 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -352,13 +352,16 @@ static int acpi_lpss_create_device(struct acpi_device *adev, pdata->mmio_size = resource_size(rentry->res); pdata->mmio_base = ioremap(rentry->res->start, pdata->mmio_size); - if (!pdata->mmio_base) - goto err_out; break; } acpi_dev_free_resource_list(&resource_list); + if (!pdata->mmio_base) { + ret = -ENOMEM; + goto err_out; + } + pdata->dev_desc = dev_desc; if (dev_desc->setup) diff --git a/drivers/acpi/acpica/accommon.h b/drivers/acpi/acpica/accommon.h index 853aa2dbdb61..a8d8092ee391 100644 --- a/drivers/acpi/acpica/accommon.h +++ b/drivers/acpi/acpica/accommon.h @@ -59,5 +59,8 @@ #include "acglobal.h" /* All global variables */ #include "achware.h" /* Hardware defines and interfaces */ #include "acutils.h" /* Utility interfaces */ +#ifndef ACPI_USE_SYSTEM_CLIBRARY +#include "acclib.h" /* C library interfaces */ +#endif /* !ACPI_USE_SYSTEM_CLIBRARY */ #endif /* __ACCOMMON_H__ */ diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h index a0c478784314..53f96a370762 100644 --- a/drivers/acpi/acpica/acglobal.h +++ b/drivers/acpi/acpica/acglobal.h @@ -61,6 +61,8 @@ ACPI_GLOBAL(struct acpi_table_header, acpi_gbl_original_dsdt_header); #if (!ACPI_REDUCED_HARDWARE) ACPI_GLOBAL(struct acpi_table_facs *, acpi_gbl_FACS); +ACPI_GLOBAL(struct acpi_table_facs *, acpi_gbl_facs32); +ACPI_GLOBAL(struct acpi_table_facs *, acpi_gbl_facs64); #endif /* !ACPI_REDUCED_HARDWARE */ diff --git a/drivers/acpi/acpica/acinterp.h b/drivers/acpi/acpica/acinterp.h index 1886bde54b5d..7ac98000b46b 100644 --- a/drivers/acpi/acpica/acinterp.h +++ b/drivers/acpi/acpica/acinterp.h @@ -468,6 +468,8 @@ void acpi_ex_eisa_id_to_string(char *dest, u64 compressed_id); void acpi_ex_integer_to_string(char *dest, u64 value); +void acpi_ex_pci_cls_to_string(char *dest, u8 class_code[3]); + u8 acpi_is_valid_space_id(u8 space_id); /* diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h index ffdb956391f6..bc600969c6a1 100644 --- a/drivers/acpi/acpica/aclocal.h +++ b/drivers/acpi/acpica/aclocal.h @@ -213,6 +213,7 @@ struct acpi_table_list { #define ACPI_TABLE_INDEX_DSDT (0) #define ACPI_TABLE_INDEX_FACS (1) +#define ACPI_TABLE_INDEX_X_FACS (2) struct acpi_find_context { char *search_for; diff --git a/drivers/acpi/acpica/acnamesp.h b/drivers/acpi/acpica/acnamesp.h index 952fbe0b7231..0dd088290d80 100644 --- a/drivers/acpi/acpica/acnamesp.h +++ b/drivers/acpi/acpica/acnamesp.h @@ -66,6 +66,7 @@ #define ACPI_NS_PREFIX_IS_SCOPE 0x10 #define ACPI_NS_EXTERNAL 0x20 #define ACPI_NS_TEMPORARY 0x40 +#define ACPI_NS_OVERRIDE_IF_FOUND 0x80 /* Flags for acpi_ns_walk_namespace */ diff --git a/drivers/acpi/acpica/acobject.h b/drivers/acpi/acpica/acobject.h index 3e9720e1f34f..c81d98d09cac 100644 --- a/drivers/acpi/acpica/acobject.h +++ b/drivers/acpi/acpica/acobject.h @@ -335,6 +335,7 @@ struct acpi_object_reference { void *object; /* name_op=>HANDLE to obj, index_op=>union acpi_operand_object */ struct acpi_namespace_node *node; /* ref_of or Namepath */ union acpi_operand_object **where; /* Target of Index */ + u8 *index_pointer; /* Used for Buffers and Strings */ u32 value; /* Used for Local/Arg/Index/ddb_handle */ }; diff --git a/drivers/acpi/acpica/acstruct.h b/drivers/acpi/acpica/acstruct.h index 87c7860b3394..44997ca02ae2 100644 --- a/drivers/acpi/acpica/acstruct.h +++ b/drivers/acpi/acpica/acstruct.h @@ -82,6 +82,7 @@ struct acpi_walk_state { u8 return_used; u8 scope_depth; u8 pass_number; /* Parse pass during table load */ + u8 namespace_override; /* Override existing objects */ u8 result_size; /* Total elements for the result stack */ u8 result_count; /* Current number of occupied elements of result stack */ u32 aml_offset; diff --git a/drivers/acpi/acpica/acutils.h b/drivers/acpi/acpica/acutils.h index d49f5c7a20d9..6de0d3573037 100644 --- a/drivers/acpi/acpica/acutils.h +++ b/drivers/acpi/acpica/acutils.h @@ -205,66 +205,6 @@ acpi_status acpi_ut_hardware_initialize(void); void acpi_ut_subsystem_shutdown(void); -/* - * utclib - Local implementations of C library functions - */ -#ifndef ACPI_USE_SYSTEM_CLIBRARY - -acpi_size acpi_ut_strlen(const char *string); - -char *acpi_ut_strchr(const char *string, int ch); - -char *acpi_ut_strcpy(char *dst_string, const char *src_string); - -char *acpi_ut_strncpy(char *dst_string, - const char *src_string, acpi_size count); - -int acpi_ut_memcmp(const char *buffer1, const char *buffer2, acpi_size count); - -int acpi_ut_strncmp(const char *string1, const char *string2, acpi_size count); - -int acpi_ut_strcmp(const char *string1, const char *string2); - -char *acpi_ut_strcat(char *dst_string, const char *src_string); - -char *acpi_ut_strncat(char *dst_string, - const char *src_string, acpi_size count); - -u32 acpi_ut_strtoul(const char *string, char **terminator, u32 base); - -char *acpi_ut_strstr(char *string1, char *string2); - -void *acpi_ut_memcpy(void *dest, const void *src, acpi_size count); - -void *acpi_ut_memset(void *dest, u8 value, acpi_size count); - -int acpi_ut_to_upper(int c); - -int acpi_ut_to_lower(int c); - -extern const u8 _acpi_ctype[]; - -#define _ACPI_XA 0x00 /* extra alphabetic - not supported */ -#define _ACPI_XS 0x40 /* extra space */ -#define _ACPI_BB 0x00 /* BEL, BS, etc. - not supported */ -#define _ACPI_CN 0x20 /* CR, FF, HT, NL, VT */ -#define _ACPI_DI 0x04 /* '0'-'9' */ -#define _ACPI_LO 0x02 /* 'a'-'z' */ -#define _ACPI_PU 0x10 /* punctuation */ -#define _ACPI_SP 0x08 /* space, tab, CR, LF, VT, FF */ -#define _ACPI_UP 0x01 /* 'A'-'Z' */ -#define _ACPI_XD 0x80 /* '0'-'9', 'A'-'F', 'a'-'f' */ - -#define ACPI_IS_DIGIT(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_DI)) -#define ACPI_IS_SPACE(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_SP)) -#define ACPI_IS_XDIGIT(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_XD)) -#define ACPI_IS_UPPER(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_UP)) -#define ACPI_IS_LOWER(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_LO)) -#define ACPI_IS_PRINT(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_LO | _ACPI_UP | _ACPI_DI | _ACPI_XS | _ACPI_PU)) -#define ACPI_IS_ALPHA(c) (_acpi_ctype[(unsigned char)(c)] & (_ACPI_LO | _ACPI_UP)) - -#endif /* !ACPI_USE_SYSTEM_CLIBRARY */ - #define ACPI_IS_ASCII(c) ((c) < 0x80) /* @@ -430,6 +370,10 @@ acpi_status acpi_ut_execute_CID(struct acpi_namespace_node *device_node, struct acpi_pnp_device_id_list ** return_cid_list); +acpi_status +acpi_ut_execute_CLS(struct acpi_namespace_node *device_node, + struct acpi_pnp_device_id **return_id); + /* * utlock - reader/writer locks */ diff --git a/drivers/acpi/acpica/dsfield.c b/drivers/acpi/acpica/dsfield.c index 43b40de90484..20de148594fd 100644 --- a/drivers/acpi/acpica/dsfield.c +++ b/drivers/acpi/acpica/dsfield.c @@ -502,7 +502,7 @@ acpi_ds_create_field(union acpi_parse_object *op, } } - ACPI_MEMSET(&info, 0, sizeof(struct acpi_create_field_info)); + memset(&info, 0, sizeof(struct acpi_create_field_info)); /* Second arg is the field flags */ diff --git a/drivers/acpi/acpica/dsinit.c b/drivers/acpi/acpica/dsinit.c index bbe74bcebbae..95779e8ec3bb 100644 --- a/drivers/acpi/acpica/dsinit.c +++ b/drivers/acpi/acpica/dsinit.c @@ -207,7 +207,7 @@ acpi_ds_initialize_objects(u32 table_index, /* Set all init info to zero */ - ACPI_MEMSET(&info, 0, sizeof(struct acpi_init_walk_info)); + memset(&info, 0, sizeof(struct acpi_init_walk_info)); info.owner_id = owner_id; info.table_index = table_index; diff --git a/drivers/acpi/acpica/dsobject.c b/drivers/acpi/acpica/dsobject.c index 8a7b07b6adc8..2beb7fd674ae 100644 --- a/drivers/acpi/acpica/dsobject.c +++ b/drivers/acpi/acpica/dsobject.c @@ -339,8 +339,8 @@ acpi_ds_build_internal_buffer_obj(struct acpi_walk_state *walk_state, /* Initialize buffer from the byte_list (if present) */ if (byte_list) { - ACPI_MEMCPY(obj_desc->buffer.pointer, - byte_list->named.data, byte_list_length); + memcpy(obj_desc->buffer.pointer, byte_list->named.data, + byte_list_length); } } @@ -750,8 +750,7 @@ acpi_ds_init_object_from_op(struct acpi_walk_state *walk_state, case ACPI_TYPE_STRING: obj_desc->string.pointer = op->common.value.string; - obj_desc->string.length = - (u32) ACPI_STRLEN(op->common.value.string); + obj_desc->string.length = (u32)strlen(op->common.value.string); /* * The string is contained in the ACPI table, don't ever try diff --git a/drivers/acpi/acpica/dsutils.c b/drivers/acpi/acpica/dsutils.c index deeddd6d2f05..ebc577baeaf9 100644 --- a/drivers/acpi/acpica/dsutils.c +++ b/drivers/acpi/acpica/dsutils.c @@ -572,8 +572,8 @@ acpi_ds_create_operand(struct acpi_walk_state *walk_state, obj_desc = acpi_ut_create_string_object((acpi_size) name_length); - ACPI_STRNCPY(obj_desc->string.pointer, - name_string, name_length); + strncpy(obj_desc->string.pointer, + name_string, name_length); status = AE_OK; } else { /* diff --git a/drivers/acpi/acpica/dswload.c b/drivers/acpi/acpica/dswload.c index 843942fb4be5..845ff44919c3 100644 --- a/drivers/acpi/acpica/dswload.c +++ b/drivers/acpi/acpica/dswload.c @@ -315,10 +315,19 @@ acpi_ds_load1_begin_op(struct acpi_walk_state * walk_state, flags = ACPI_NS_NO_UPSEARCH; if ((walk_state->opcode != AML_SCOPE_OP) && (!(walk_state->parse_flags & ACPI_PARSE_DEFERRED_OP))) { - flags |= ACPI_NS_ERROR_IF_FOUND; - ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH, - "[%s] Cannot already exist\n", - acpi_ut_get_type_name(object_type))); + if (walk_state->namespace_override) { + flags |= ACPI_NS_OVERRIDE_IF_FOUND; + ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH, + "[%s] Override allowed\n", + acpi_ut_get_type_name + (object_type))); + } else { + flags |= ACPI_NS_ERROR_IF_FOUND; + ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH, + "[%s] Cannot already exist\n", + acpi_ut_get_type_name + (object_type))); + } } else { ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH, "[%s] Both Find or Create allowed\n", diff --git a/drivers/acpi/acpica/evgpeinit.c b/drivers/acpi/acpica/evgpeinit.c index 8840296d5b20..ea4c0d3fca2d 100644 --- a/drivers/acpi/acpica/evgpeinit.c +++ b/drivers/acpi/acpica/evgpeinit.c @@ -377,7 +377,7 @@ acpi_ev_match_gpe_method(acpi_handle obj_handle, /* 4) The last two characters of the name are the hex GPE Number */ - gpe_number = ACPI_STRTOUL(&name[2], NULL, 16); + gpe_number = strtoul(&name[2], NULL, 16); if (gpe_number == ACPI_UINT32_MAX) { /* Conversion failed; invalid method, just ignore it */ diff --git a/drivers/acpi/acpica/exconfig.c b/drivers/acpi/acpica/exconfig.c index 6e0df2b9d5a4..24a4c5c2b124 100644 --- a/drivers/acpi/acpica/exconfig.c +++ b/drivers/acpi/acpica/exconfig.c @@ -470,7 +470,7 @@ acpi_ex_load_op(union acpi_operand_object *obj_desc, return_ACPI_STATUS(AE_NO_MEMORY); } - ACPI_MEMCPY(table, table_header, length); + memcpy(table, table_header, length); break; default: diff --git a/drivers/acpi/acpica/exconvrt.c b/drivers/acpi/acpica/exconvrt.c index 89a976b4ccf2..075d654c837f 100644 --- a/drivers/acpi/acpica/exconvrt.c +++ b/drivers/acpi/acpica/exconvrt.c @@ -227,9 +227,8 @@ acpi_ex_convert_to_buffer(union acpi_operand_object *obj_desc, /* Copy the integer to the buffer, LSB first */ new_buf = return_desc->buffer.pointer; - ACPI_MEMCPY(new_buf, - &obj_desc->integer.value, - acpi_gbl_integer_byte_width); + memcpy(new_buf, + &obj_desc->integer.value, acpi_gbl_integer_byte_width); break; case ACPI_TYPE_STRING: @@ -252,8 +251,8 @@ acpi_ex_convert_to_buffer(union acpi_operand_object *obj_desc, /* Copy the string to the buffer */ new_buf = return_desc->buffer.pointer; - ACPI_STRNCPY((char *)new_buf, (char *)obj_desc->string.pointer, - obj_desc->string.length); + strncpy((char *)new_buf, (char *)obj_desc->string.pointer, + obj_desc->string.length); break; default: diff --git a/drivers/acpi/acpica/exdebug.c b/drivers/acpi/acpica/exdebug.c index e67d0aca3fe6..815442bbd051 100644 --- a/drivers/acpi/acpica/exdebug.c +++ b/drivers/acpi/acpica/exdebug.c @@ -76,6 +76,8 @@ acpi_ex_do_debug_object(union acpi_operand_object *source_desc, { u32 i; u32 timer; + union acpi_operand_object *object_desc; + u32 value; ACPI_FUNCTION_TRACE_PTR(ex_do_debug_object, source_desc); @@ -254,8 +256,44 @@ acpi_ex_do_debug_object(union acpi_operand_object *source_desc, object)->object, level + 4, 0); } else { - acpi_ex_do_debug_object(source_desc->reference. - object, level + 4, 0); + object_desc = source_desc->reference.object; + value = source_desc->reference.value; + + switch (object_desc->common.type) { + case ACPI_TYPE_BUFFER: + + acpi_os_printf("Buffer[%u] = 0x%2.2X\n", + value, + *source_desc->reference. + index_pointer); + break; + + case ACPI_TYPE_STRING: + + acpi_os_printf + ("String[%u] = \"%c\" (0x%2.2X)\n", + value, + *source_desc->reference. + index_pointer, + *source_desc->reference. + index_pointer); + break; + + case ACPI_TYPE_PACKAGE: + + acpi_os_printf("Package[%u] = ", value); + acpi_ex_do_debug_object(*source_desc-> + reference.where, + level + 4, 0); + break; + + default: + + acpi_os_printf + ("Unknown Reference object type %X\n", + object_desc->common.type); + break; + } } } break; diff --git a/drivers/acpi/acpica/exdump.c b/drivers/acpi/acpica/exdump.c index 1da52bef632e..401e7edcd419 100644 --- a/drivers/acpi/acpica/exdump.c +++ b/drivers/acpi/acpica/exdump.c @@ -224,7 +224,7 @@ static struct acpi_exdump_info acpi_ex_dump_index_field[5] = { {ACPI_EXD_POINTER, ACPI_EXD_OFFSET(index_field.data_obj), "Data Object"} }; -static struct acpi_exdump_info acpi_ex_dump_reference[8] = { +static struct acpi_exdump_info acpi_ex_dump_reference[9] = { {ACPI_EXD_INIT, ACPI_EXD_TABLE_SIZE(acpi_ex_dump_reference), NULL}, {ACPI_EXD_UINT8, ACPI_EXD_OFFSET(reference.class), "Class"}, {ACPI_EXD_UINT8, ACPI_EXD_OFFSET(reference.target_type), "Target Type"}, @@ -232,6 +232,8 @@ static struct acpi_exdump_info acpi_ex_dump_reference[8] = { {ACPI_EXD_POINTER, ACPI_EXD_OFFSET(reference.object), "Object Desc"}, {ACPI_EXD_NODE, ACPI_EXD_OFFSET(reference.node), "Node"}, {ACPI_EXD_POINTER, ACPI_EXD_OFFSET(reference.where), "Where"}, + {ACPI_EXD_POINTER, ACPI_EXD_OFFSET(reference.index_pointer), + "Index Pointer"}, {ACPI_EXD_REFERENCE, 0, NULL} }; @@ -1005,14 +1007,13 @@ static void acpi_ex_dump_reference_obj(union acpi_operand_object *obj_desc) } else if (obj_desc->reference.object) { if (ACPI_GET_DESCRIPTOR_TYPE(obj_desc) == ACPI_DESC_TYPE_OPERAND) { - acpi_os_printf(" Target: %p", + acpi_os_printf("%22s %p", "Target :", obj_desc->reference.object); if (obj_desc->reference.class == ACPI_REFCLASS_TABLE) { acpi_os_printf(" Table Index: %X\n", obj_desc->reference.value); } else { - acpi_os_printf(" Target: %p [%s]\n", - obj_desc->reference.object, + acpi_os_printf(" [%s]\n", acpi_ut_get_type_name(((union acpi_operand_object *) diff --git a/drivers/acpi/acpica/exfield.c b/drivers/acpi/acpica/exfield.c index c161dd974f74..61fd9c7b88bc 100644 --- a/drivers/acpi/acpica/exfield.c +++ b/drivers/acpi/acpica/exfield.c @@ -428,7 +428,7 @@ acpi_ex_write_data_to_field(union acpi_operand_object *source_desc, } buffer = buffer_desc->buffer.pointer; - ACPI_MEMCPY(buffer, source_desc->buffer.pointer, length); + memcpy(buffer, source_desc->buffer.pointer, length); /* Lock entire transaction if requested */ diff --git a/drivers/acpi/acpica/exfldio.c b/drivers/acpi/acpica/exfldio.c index 725a3746a2df..70b7bbbb860b 100644 --- a/drivers/acpi/acpica/exfldio.c +++ b/drivers/acpi/acpica/exfldio.c @@ -416,22 +416,22 @@ acpi_ex_field_datum_io(union acpi_operand_object *obj_desc, * Copy the data from the source buffer. * Length is the field width in bytes. */ - ACPI_MEMCPY(value, - (obj_desc->buffer_field.buffer_obj)->buffer. - pointer + - obj_desc->buffer_field.base_byte_offset + - field_datum_byte_offset, - obj_desc->common_field.access_byte_width); + memcpy(value, + (obj_desc->buffer_field.buffer_obj)->buffer. + pointer + + obj_desc->buffer_field.base_byte_offset + + field_datum_byte_offset, + obj_desc->common_field.access_byte_width); } else { /* * Copy the data to the target buffer. * Length is the field width in bytes. */ - ACPI_MEMCPY((obj_desc->buffer_field.buffer_obj)->buffer. - pointer + - obj_desc->buffer_field.base_byte_offset + - field_datum_byte_offset, value, - obj_desc->common_field.access_byte_width); + memcpy((obj_desc->buffer_field.buffer_obj)->buffer. + pointer + + obj_desc->buffer_field.base_byte_offset + + field_datum_byte_offset, value, + obj_desc->common_field.access_byte_width); } status = AE_OK; @@ -703,7 +703,7 @@ acpi_ex_extract_from_field(union acpi_operand_object *obj_desc, return_ACPI_STATUS(AE_BUFFER_OVERFLOW); } - ACPI_MEMSET(buffer, 0, buffer_length); + memset(buffer, 0, buffer_length); access_bit_width = ACPI_MUL_8(obj_desc->common_field.access_byte_width); /* Handle the simple case here */ @@ -720,7 +720,7 @@ acpi_ex_extract_from_field(union acpi_operand_object *obj_desc, status = acpi_ex_field_datum_io(obj_desc, 0, &raw_datum, ACPI_READ); - ACPI_MEMCPY(buffer, &raw_datum, buffer_length); + memcpy(buffer, &raw_datum, buffer_length); } return_ACPI_STATUS(status); @@ -793,9 +793,9 @@ acpi_ex_extract_from_field(union acpi_operand_object *obj_desc, /* Write merged datum to target buffer */ - ACPI_MEMCPY(((char *)buffer) + buffer_offset, &merged_datum, - ACPI_MIN(obj_desc->common_field.access_byte_width, - buffer_length - buffer_offset)); + memcpy(((char *)buffer) + buffer_offset, &merged_datum, + ACPI_MIN(obj_desc->common_field.access_byte_width, + buffer_length - buffer_offset)); buffer_offset += obj_desc->common_field.access_byte_width; merged_datum = @@ -811,9 +811,9 @@ acpi_ex_extract_from_field(union acpi_operand_object *obj_desc, /* Write the last datum to the buffer */ - ACPI_MEMCPY(((char *)buffer) + buffer_offset, &merged_datum, - ACPI_MIN(obj_desc->common_field.access_byte_width, - buffer_length - buffer_offset)); + memcpy(((char *)buffer) + buffer_offset, &merged_datum, + ACPI_MIN(obj_desc->common_field.access_byte_width, + buffer_length - buffer_offset)); return_ACPI_STATUS(AE_OK); } @@ -878,7 +878,7 @@ acpi_ex_insert_into_field(union acpi_operand_object *obj_desc, * at Byte zero. All unused (upper) bytes of the * buffer will be 0. */ - ACPI_MEMCPY((char *)new_buffer, (char *)buffer, buffer_length); + memcpy((char *)new_buffer, (char *)buffer, buffer_length); buffer = new_buffer; buffer_length = required_length; } @@ -918,9 +918,9 @@ acpi_ex_insert_into_field(union acpi_operand_object *obj_desc, /* Get initial Datum from the input buffer */ - ACPI_MEMCPY(&raw_datum, buffer, - ACPI_MIN(obj_desc->common_field.access_byte_width, - buffer_length - buffer_offset)); + memcpy(&raw_datum, buffer, + ACPI_MIN(obj_desc->common_field.access_byte_width, + buffer_length - buffer_offset)); merged_datum = raw_datum << obj_desc->common_field.start_field_bit_offset; @@ -970,9 +970,9 @@ acpi_ex_insert_into_field(union acpi_operand_object *obj_desc, /* Get the next input datum from the buffer */ buffer_offset += obj_desc->common_field.access_byte_width; - ACPI_MEMCPY(&raw_datum, ((char *)buffer) + buffer_offset, - ACPI_MIN(obj_desc->common_field.access_byte_width, - buffer_length - buffer_offset)); + memcpy(&raw_datum, ((char *)buffer) + buffer_offset, + ACPI_MIN(obj_desc->common_field.access_byte_width, + buffer_length - buffer_offset)); merged_datum |= raw_datum << obj_desc->common_field.start_field_bit_offset; diff --git a/drivers/acpi/acpica/exmisc.c b/drivers/acpi/acpica/exmisc.c index b56fc9d6f48e..d02afece0f10 100644 --- a/drivers/acpi/acpica/exmisc.c +++ b/drivers/acpi/acpica/exmisc.c @@ -209,8 +209,8 @@ acpi_ex_concat_template(union acpi_operand_object *operand0, * end_tag descriptor is copied from Operand1. */ new_buf = return_desc->buffer.pointer; - ACPI_MEMCPY(new_buf, operand0->buffer.pointer, length0); - ACPI_MEMCPY(new_buf + length0, operand1->buffer.pointer, length1); + memcpy(new_buf, operand0->buffer.pointer, length0); + memcpy(new_buf + length0, operand1->buffer.pointer, length1); /* Insert end_tag and set the checksum to zero, means "ignore checksum" */ @@ -318,14 +318,14 @@ acpi_ex_do_concatenate(union acpi_operand_object *operand0, /* Copy the first integer, LSB first */ - ACPI_MEMCPY(new_buf, &operand0->integer.value, - acpi_gbl_integer_byte_width); + memcpy(new_buf, &operand0->integer.value, + acpi_gbl_integer_byte_width); /* Copy the second integer (LSB first) after the first */ - ACPI_MEMCPY(new_buf + acpi_gbl_integer_byte_width, - &local_operand1->integer.value, - acpi_gbl_integer_byte_width); + memcpy(new_buf + acpi_gbl_integer_byte_width, + &local_operand1->integer.value, + acpi_gbl_integer_byte_width); break; case ACPI_TYPE_STRING: @@ -346,9 +346,9 @@ acpi_ex_do_concatenate(union acpi_operand_object *operand0, /* Concatenate the strings */ - ACPI_STRCPY(new_buf, operand0->string.pointer); - ACPI_STRCPY(new_buf + operand0->string.length, - local_operand1->string.pointer); + strcpy(new_buf, operand0->string.pointer); + strcpy(new_buf + operand0->string.length, + local_operand1->string.pointer); break; case ACPI_TYPE_BUFFER: @@ -369,11 +369,11 @@ acpi_ex_do_concatenate(union acpi_operand_object *operand0, /* Concatenate the buffers */ - ACPI_MEMCPY(new_buf, operand0->buffer.pointer, - operand0->buffer.length); - ACPI_MEMCPY(new_buf + operand0->buffer.length, - local_operand1->buffer.pointer, - local_operand1->buffer.length); + memcpy(new_buf, operand0->buffer.pointer, + operand0->buffer.length); + memcpy(new_buf + operand0->buffer.length, + local_operand1->buffer.pointer, + local_operand1->buffer.length); break; default: @@ -660,9 +660,9 @@ acpi_ex_do_logical_op(u16 opcode, /* Lexicographic compare: compare the data bytes */ - compare = ACPI_MEMCMP(operand0->buffer.pointer, - local_operand1->buffer.pointer, - (length0 > length1) ? length1 : length0); + compare = memcmp(operand0->buffer.pointer, + local_operand1->buffer.pointer, + (length0 > length1) ? length1 : length0); switch (opcode) { case AML_LEQUAL_OP: /* LEqual (Operand0, Operand1) */ diff --git a/drivers/acpi/acpica/exnames.c b/drivers/acpi/acpica/exnames.c index 453b00c30177..20e87813c7d7 100644 --- a/drivers/acpi/acpica/exnames.c +++ b/drivers/acpi/acpica/exnames.c @@ -192,7 +192,7 @@ static acpi_status acpi_ex_name_segment(u8 ** in_aml_address, char *name_string) char_buf[4] = '\0'; if (name_string) { - ACPI_STRCAT(name_string, char_buf); + strcat(name_string, char_buf); ACPI_DEBUG_PRINT((ACPI_DB_NAMES, "Appended to - %s\n", name_string)); } else { diff --git a/drivers/acpi/acpica/exoparg2.c b/drivers/acpi/acpica/exoparg2.c index fcc618aa2061..b8944ebb1081 100644 --- a/drivers/acpi/acpica/exoparg2.c +++ b/drivers/acpi/acpica/exoparg2.c @@ -337,8 +337,8 @@ acpi_status acpi_ex_opcode_2A_1T_1R(struct acpi_walk_state *walk_state) * Copy the raw buffer data with no transform. * (NULL terminated already) */ - ACPI_MEMCPY(return_desc->string.pointer, - operand[0]->buffer.pointer, length); + memcpy(return_desc->string.pointer, + operand[0]->buffer.pointer, length); break; case AML_CONCAT_RES_OP: @@ -380,6 +380,8 @@ acpi_status acpi_ex_opcode_2A_1T_1R(struct acpi_walk_state *walk_state) return_desc->reference.target_type = ACPI_TYPE_BUFFER_FIELD; + return_desc->reference.index_pointer = + &(operand[0]->buffer.pointer[index]); break; case ACPI_TYPE_BUFFER: @@ -391,6 +393,8 @@ acpi_status acpi_ex_opcode_2A_1T_1R(struct acpi_walk_state *walk_state) return_desc->reference.target_type = ACPI_TYPE_BUFFER_FIELD; + return_desc->reference.index_pointer = + &(operand[0]->buffer.pointer[index]); break; case ACPI_TYPE_PACKAGE: diff --git a/drivers/acpi/acpica/exoparg3.c b/drivers/acpi/acpica/exoparg3.c index 1c64a988cbee..fa100b3b92ee 100644 --- a/drivers/acpi/acpica/exoparg3.c +++ b/drivers/acpi/acpica/exoparg3.c @@ -237,8 +237,8 @@ acpi_status acpi_ex_opcode_3A_1T_1R(struct acpi_walk_state *walk_state) /* We have a buffer, copy the portion requested */ - ACPI_MEMCPY(buffer, operand[0]->string.pointer + index, - length); + memcpy(buffer, operand[0]->string.pointer + index, + length); } /* Set the length of the new String/Buffer */ diff --git a/drivers/acpi/acpica/exregion.c b/drivers/acpi/acpica/exregion.c index f6c2f5499935..b4a5e44c00dd 100644 --- a/drivers/acpi/acpica/exregion.c +++ b/drivers/acpi/acpica/exregion.c @@ -517,15 +517,14 @@ acpi_ex_data_table_space_handler(u32 function, switch (function) { case ACPI_READ: - ACPI_MEMCPY(ACPI_CAST_PTR(char, value), - ACPI_PHYSADDR_TO_PTR(address), - ACPI_DIV_8(bit_width)); + memcpy(ACPI_CAST_PTR(char, value), + ACPI_PHYSADDR_TO_PTR(address), ACPI_DIV_8(bit_width)); break; case ACPI_WRITE: - ACPI_MEMCPY(ACPI_PHYSADDR_TO_PTR(address), - ACPI_CAST_PTR(char, value), ACPI_DIV_8(bit_width)); + memcpy(ACPI_PHYSADDR_TO_PTR(address), + ACPI_CAST_PTR(char, value), ACPI_DIV_8(bit_width)); break; default: diff --git a/drivers/acpi/acpica/exstorob.c b/drivers/acpi/acpica/exstorob.c index 6fa3c8d8fc5f..e1d4f4d51b97 100644 --- a/drivers/acpi/acpica/exstorob.c +++ b/drivers/acpi/acpica/exstorob.c @@ -100,9 +100,9 @@ acpi_ex_store_buffer_to_buffer(union acpi_operand_object *source_desc, /* Clear existing buffer and copy in the new one */ - ACPI_MEMSET(target_desc->buffer.pointer, 0, - target_desc->buffer.length); - ACPI_MEMCPY(target_desc->buffer.pointer, buffer, length); + memset(target_desc->buffer.pointer, 0, + target_desc->buffer.length); + memcpy(target_desc->buffer.pointer, buffer, length); #ifdef ACPI_OBSOLETE_BEHAVIOR /* @@ -129,8 +129,8 @@ acpi_ex_store_buffer_to_buffer(union acpi_operand_object *source_desc, } else { /* Truncate the source, copy only what will fit */ - ACPI_MEMCPY(target_desc->buffer.pointer, buffer, - target_desc->buffer.length); + memcpy(target_desc->buffer.pointer, buffer, + target_desc->buffer.length); ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Truncating source buffer from %X to %X\n", @@ -187,9 +187,9 @@ acpi_ex_store_string_to_string(union acpi_operand_object *source_desc, * String will fit in existing non-static buffer. * Clear old string and copy in the new one */ - ACPI_MEMSET(target_desc->string.pointer, 0, - (acpi_size) target_desc->string.length + 1); - ACPI_MEMCPY(target_desc->string.pointer, buffer, length); + memset(target_desc->string.pointer, 0, + (acpi_size) target_desc->string.length + 1); + memcpy(target_desc->string.pointer, buffer, length); } else { /* * Free the current buffer, then allocate a new buffer @@ -210,7 +210,7 @@ acpi_ex_store_string_to_string(union acpi_operand_object *source_desc, } target_desc->common.flags &= ~AOPOBJ_STATIC_POINTER; - ACPI_MEMCPY(target_desc->string.pointer, buffer, length); + memcpy(target_desc->string.pointer, buffer, length); } /* Set the new target length */ diff --git a/drivers/acpi/acpica/exutils.c b/drivers/acpi/acpica/exutils.c index 3f4225e95d93..30c3f464fda5 100644 --- a/drivers/acpi/acpica/exutils.c +++ b/drivers/acpi/acpica/exutils.c @@ -380,6 +380,38 @@ void acpi_ex_integer_to_string(char *out_string, u64 value) /******************************************************************************* * + * FUNCTION: acpi_ex_pci_cls_to_string + * + * PARAMETERS: out_string - Where to put the converted string (7 bytes) + * PARAMETERS: class_code - PCI class code to be converted (3 bytes) + * + * RETURN: None + * + * DESCRIPTION: Convert 3-bytes PCI class code to string representation. + * Return buffer must be large enough to hold the string. The + * string returned is always exactly of length + * ACPI_PCICLS_STRING_SIZE (includes null terminator). + * + ******************************************************************************/ + +void acpi_ex_pci_cls_to_string(char *out_string, u8 class_code[3]) +{ + + ACPI_FUNCTION_ENTRY(); + + /* All 3 bytes are hexadecimal */ + + out_string[0] = acpi_ut_hex_to_ascii_char((u64)class_code[0], 4); + out_string[1] = acpi_ut_hex_to_ascii_char((u64)class_code[0], 0); + out_string[2] = acpi_ut_hex_to_ascii_char((u64)class_code[1], 4); + out_string[3] = acpi_ut_hex_to_ascii_char((u64)class_code[1], 0); + out_string[4] = acpi_ut_hex_to_ascii_char((u64)class_code[2], 4); + out_string[5] = acpi_ut_hex_to_ascii_char((u64)class_code[2], 0); + out_string[6] = 0; +} + +/******************************************************************************* + * * FUNCTION: acpi_is_valid_space_id * * PARAMETERS: space_id - ID to be validated diff --git a/drivers/acpi/acpica/hwxfsleep.c b/drivers/acpi/acpica/hwxfsleep.c index 3b3767698827..52dfd0d050fa 100644 --- a/drivers/acpi/acpica/hwxfsleep.c +++ b/drivers/acpi/acpica/hwxfsleep.c @@ -50,6 +50,13 @@ ACPI_MODULE_NAME("hwxfsleep") /* Local prototypes */ +#if (!ACPI_REDUCED_HARDWARE) +static acpi_status +acpi_hw_set_firmware_waking_vectors(struct acpi_table_facs *facs, + acpi_physical_address physical_address, + acpi_physical_address physical_address64); +#endif + static acpi_status acpi_hw_sleep_dispatch(u8 sleep_state, u32 function_id); /* @@ -72,6 +79,7 @@ static struct acpi_sleep_functions acpi_sleep_dispatch[] = { /* * These functions are removed for the ACPI_REDUCED_HARDWARE case: + * acpi_set_firmware_waking_vectors * acpi_set_firmware_waking_vector * acpi_set_firmware_waking_vector64 * acpi_enter_sleep_state_s4bios @@ -80,20 +88,26 @@ static struct acpi_sleep_functions acpi_sleep_dispatch[] = { #if (!ACPI_REDUCED_HARDWARE) /******************************************************************************* * - * FUNCTION: acpi_set_firmware_waking_vector + * FUNCTION: acpi_hw_set_firmware_waking_vectors * - * PARAMETERS: physical_address - 32-bit physical address of ACPI real mode + * PARAMETERS: facs - Pointer to FACS table + * physical_address - 32-bit physical address of ACPI real mode * entry point. + * physical_address64 - 64-bit physical address of ACPI protected + * mode entry point. * * RETURN: Status * - * DESCRIPTION: Sets the 32-bit firmware_waking_vector field of the FACS + * DESCRIPTION: Sets the firmware_waking_vector fields of the FACS * ******************************************************************************/ -acpi_status acpi_set_firmware_waking_vector(u32 physical_address) +static acpi_status +acpi_hw_set_firmware_waking_vectors(struct acpi_table_facs *facs, + acpi_physical_address physical_address, + acpi_physical_address physical_address64) { - ACPI_FUNCTION_TRACE(acpi_set_firmware_waking_vector); + ACPI_FUNCTION_TRACE(acpi_hw_set_firmware_waking_vectors); /* @@ -106,17 +120,92 @@ acpi_status acpi_set_firmware_waking_vector(u32 physical_address) /* Set the 32-bit vector */ - acpi_gbl_FACS->firmware_waking_vector = physical_address; + facs->firmware_waking_vector = (u32)physical_address; - /* Clear the 64-bit vector if it exists */ + if (facs->length > 32) { + if (facs->version >= 1) { - if ((acpi_gbl_FACS->length > 32) && (acpi_gbl_FACS->version >= 1)) { - acpi_gbl_FACS->xfirmware_waking_vector = 0; + /* Set the 64-bit vector */ + + facs->xfirmware_waking_vector = physical_address64; + } else { + /* Clear the 64-bit vector if it exists */ + + facs->xfirmware_waking_vector = 0; + } } return_ACPI_STATUS(AE_OK); } +/******************************************************************************* + * + * FUNCTION: acpi_set_firmware_waking_vectors + * + * PARAMETERS: physical_address - 32-bit physical address of ACPI real mode + * entry point. + * physical_address64 - 64-bit physical address of ACPI protected + * mode entry point. + * + * RETURN: Status + * + * DESCRIPTION: Sets the firmware_waking_vector fields of the FACS + * + ******************************************************************************/ + +acpi_status +acpi_set_firmware_waking_vectors(acpi_physical_address physical_address, + acpi_physical_address physical_address64) +{ + + ACPI_FUNCTION_TRACE(acpi_set_firmware_waking_vectors); + + /* If Hardware Reduced flag is set, there is no FACS */ + + if (acpi_gbl_reduced_hardware) { + return_ACPI_STATUS (AE_OK); + } + + if (acpi_gbl_facs32) { + (void)acpi_hw_set_firmware_waking_vectors(acpi_gbl_facs32, + physical_address, + physical_address64); + } + if (acpi_gbl_facs64) { + (void)acpi_hw_set_firmware_waking_vectors(acpi_gbl_facs64, + physical_address, + physical_address64); + } + + return_ACPI_STATUS(AE_OK); +} + +ACPI_EXPORT_SYMBOL(acpi_set_firmware_waking_vectors) + +/******************************************************************************* + * + * FUNCTION: acpi_set_firmware_waking_vector + * + * PARAMETERS: physical_address - 32-bit physical address of ACPI real mode + * entry point. + * + * RETURN: Status + * + * DESCRIPTION: Sets the 32-bit firmware_waking_vector field of the FACS + * + ******************************************************************************/ +acpi_status acpi_set_firmware_waking_vector(u32 physical_address) +{ + acpi_status status; + + ACPI_FUNCTION_TRACE(acpi_set_firmware_waking_vector); + + status = acpi_set_firmware_waking_vectors((acpi_physical_address) + physical_address, 0); + + return_ACPI_STATUS(status); +} + ACPI_EXPORT_SYMBOL(acpi_set_firmware_waking_vector) #if ACPI_MACHINE_WIDTH == 64 @@ -136,25 +225,19 @@ ACPI_EXPORT_SYMBOL(acpi_set_firmware_waking_vector) ******************************************************************************/ acpi_status acpi_set_firmware_waking_vector64(u64 physical_address) { - ACPI_FUNCTION_TRACE(acpi_set_firmware_waking_vector64); - - - /* Determine if the 64-bit vector actually exists */ + acpi_status status; - if ((acpi_gbl_FACS->length <= 32) || (acpi_gbl_FACS->version < 1)) { - return_ACPI_STATUS(AE_NOT_EXIST); - } + ACPI_FUNCTION_TRACE(acpi_set_firmware_waking_vector64); - /* Clear 32-bit vector, set the 64-bit X_ vector */ + status = acpi_set_firmware_waking_vectors(0, + (acpi_physical_address) + physical_address); - acpi_gbl_FACS->firmware_waking_vector = 0; - acpi_gbl_FACS->xfirmware_waking_vector = physical_address; - return_ACPI_STATUS(AE_OK); + return_ACPI_STATUS(status); } ACPI_EXPORT_SYMBOL(acpi_set_firmware_waking_vector64) #endif - /******************************************************************************* * * FUNCTION: acpi_enter_sleep_state_s4bios diff --git a/drivers/acpi/acpica/nsaccess.c b/drivers/acpi/acpica/nsaccess.c index 24fa19a76d70..c687b9979fb2 100644 --- a/drivers/acpi/acpica/nsaccess.c +++ b/drivers/acpi/acpica/nsaccess.c @@ -102,7 +102,7 @@ acpi_status acpi_ns_root_initialize(void) /* _OSI is optional for now, will be permanent later */ - if (!ACPI_STRCMP(init_val->name, "_OSI") + if (!strcmp(init_val->name, "_OSI") && !acpi_gbl_create_osi_method) { continue; } @@ -180,7 +180,7 @@ acpi_status acpi_ns_root_initialize(void) /* Build an object around the static string */ - obj_desc->string.length = (u32)ACPI_STRLEN(val); + obj_desc->string.length = (u32)strlen(val); obj_desc->string.pointer = val; obj_desc->common.flags |= AOPOBJ_STATIC_POINTER; break; @@ -203,7 +203,7 @@ acpi_status acpi_ns_root_initialize(void) /* Special case for ACPI Global Lock */ - if (ACPI_STRCMP(init_val->name, "_GL_") == 0) { + if (strcmp(init_val->name, "_GL_") == 0) { acpi_gbl_global_lock_mutex = obj_desc; /* Create additional counting semaphore for global lock */ @@ -304,7 +304,9 @@ acpi_ns_lookup(union acpi_generic_state *scope_info, return_ACPI_STATUS(AE_BAD_PARAMETER); } - local_flags = flags & ~(ACPI_NS_ERROR_IF_FOUND | ACPI_NS_SEARCH_PARENT); + local_flags = flags & + ~(ACPI_NS_ERROR_IF_FOUND | ACPI_NS_OVERRIDE_IF_FOUND | + ACPI_NS_SEARCH_PARENT); *return_node = ACPI_ENTRY_NOT_FOUND; acpi_gbl_ns_lookup_count++; @@ -547,6 +549,12 @@ acpi_ns_lookup(union acpi_generic_state *scope_info, if (flags & ACPI_NS_ERROR_IF_FOUND) { local_flags |= ACPI_NS_ERROR_IF_FOUND; } + + /* Set override flag according to caller */ + + if (flags & ACPI_NS_OVERRIDE_IF_FOUND) { + local_flags |= ACPI_NS_OVERRIDE_IF_FOUND; + } } /* Extract one ACPI name from the front of the pathname */ diff --git a/drivers/acpi/acpica/nsconvert.c b/drivers/acpi/acpica/nsconvert.c index 1a8b39c8d969..da55a1c60da1 100644 --- a/drivers/acpi/acpica/nsconvert.c +++ b/drivers/acpi/acpica/nsconvert.c @@ -187,8 +187,8 @@ acpi_ns_convert_to_string(union acpi_operand_object *original_object, * Copy the raw buffer data with no transform. String is already NULL * terminated at Length+1. */ - ACPI_MEMCPY(new_object->string.pointer, - original_object->buffer.pointer, length); + memcpy(new_object->string.pointer, + original_object->buffer.pointer, length); break; default: @@ -251,9 +251,9 @@ acpi_ns_convert_to_buffer(union acpi_operand_object *original_object, return (AE_NO_MEMORY); } - ACPI_MEMCPY(new_object->buffer.pointer, - original_object->string.pointer, - original_object->string.length); + memcpy(new_object->buffer.pointer, + original_object->string.pointer, + original_object->string.length); break; case ACPI_TYPE_PACKAGE: diff --git a/drivers/acpi/acpica/nsdump.c b/drivers/acpi/acpica/nsdump.c index d259393505fa..0f1daba640e7 100644 --- a/drivers/acpi/acpica/nsdump.c +++ b/drivers/acpi/acpica/nsdump.c @@ -101,7 +101,7 @@ void acpi_ns_print_pathname(u32 num_segments, char *pathname) while (num_segments) { for (i = 0; i < 4; i++) { - ACPI_IS_PRINT(pathname[i]) ? + isprint((int)pathname[i]) ? acpi_os_printf("%c", pathname[i]) : acpi_os_printf("?"); } diff --git a/drivers/acpi/acpica/nseval.c b/drivers/acpi/acpica/nseval.c index 7bcc68f57afa..80670cb32b5a 100644 --- a/drivers/acpi/acpica/nseval.c +++ b/drivers/acpi/acpica/nseval.c @@ -59,15 +59,14 @@ acpi_ns_exec_module_code(union acpi_operand_object *method_obj, * * FUNCTION: acpi_ns_evaluate * - * PARAMETERS: info - Evaluation info block, contains: + * PARAMETERS: info - Evaluation info block, contains these fields + * and more: * prefix_node - Prefix or Method/Object Node to execute * relative_path - Name of method to execute, If NULL, the * Node is the object to execute * parameters - List of parameters to pass to the method, * terminated by NULL. Params itself may be * NULL if no parameters are being passed. - * return_object - Where to put method's return value (if - * any). If NULL, no value is returned. * parameter_type - Type of Parameter list * return_object - Where to put method's return value (if * any). If NULL, no value is returned. @@ -440,7 +439,7 @@ acpi_ns_exec_module_code(union acpi_operand_object *method_obj, /* Initialize the evaluation information block */ - ACPI_MEMSET(info, 0, sizeof(struct acpi_evaluate_info)); + memset(info, 0, sizeof(struct acpi_evaluate_info)); info->prefix_node = parent_node; /* diff --git a/drivers/acpi/acpica/nsinit.c b/drivers/acpi/acpica/nsinit.c index 4a85c4517988..b744a53618eb 100644 --- a/drivers/acpi/acpica/nsinit.c +++ b/drivers/acpi/acpica/nsinit.c @@ -90,7 +90,7 @@ acpi_status acpi_ns_initialize_objects(void) /* Set all init info to zero */ - ACPI_MEMSET(&info, 0, sizeof(struct acpi_init_walk_info)); + memset(&info, 0, sizeof(struct acpi_init_walk_info)); /* Walk entire namespace from the supplied root */ @@ -566,7 +566,7 @@ acpi_ns_init_one_device(acpi_handle obj_handle, ACPI_DEBUG_EXEC(acpi_ut_display_init_pathname (ACPI_TYPE_METHOD, device_node, METHOD_NAME__INI)); - ACPI_MEMSET(info, 0, sizeof(struct acpi_evaluate_info)); + memset(info, 0, sizeof(struct acpi_evaluate_info)); info->prefix_node = device_node; info->relative_pathname = METHOD_NAME__INI; info->parameters = NULL; diff --git a/drivers/acpi/acpica/nsparse.c b/drivers/acpi/acpica/nsparse.c index c95a119767b5..57a4cfe547e4 100644 --- a/drivers/acpi/acpica/nsparse.c +++ b/drivers/acpi/acpica/nsparse.c @@ -117,6 +117,13 @@ acpi_ns_one_complete_parse(u32 pass_number, (u8) pass_number); } + /* Found OSDT table, enable the namespace override feature */ + + if (ACPI_COMPARE_NAME(table->signature, ACPI_SIG_OSDT) && + pass_number == ACPI_IMODE_LOAD_PASS1) { + walk_state->namespace_override = TRUE; + } + if (ACPI_FAILURE(status)) { acpi_ds_delete_walk_state(walk_state); goto cleanup; diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c index c30672d23878..0515a70f42a4 100644 --- a/drivers/acpi/acpica/nsrepair2.c +++ b/drivers/acpi/acpica/nsrepair2.c @@ -580,7 +580,7 @@ acpi_ns_repair_HID(struct acpi_evaluate_info *info, * # is a hex digit. */ for (dest = new_string->string.pointer; *source; dest++, source++) { - *dest = (char)ACPI_TOUPPER(*source); + *dest = (char)toupper((int)*source); } acpi_ut_remove_reference(return_object); diff --git a/drivers/acpi/acpica/nssearch.c b/drivers/acpi/acpica/nssearch.c index 4a9d4a66016e..d73904013830 100644 --- a/drivers/acpi/acpica/nssearch.c +++ b/drivers/acpi/acpica/nssearch.c @@ -325,8 +325,41 @@ acpi_ns_search_and_enter(u32 target_name, * If we found it AND the request specifies that a find is an error, * return the error */ - if ((status == AE_OK) && (flags & ACPI_NS_ERROR_IF_FOUND)) { - status = AE_ALREADY_EXISTS; + if (status == AE_OK) { + + /* The node was found in the namespace */ + + /* + * If the namespace override feature is enabled for this node, + * delete any existing attached sub-object and make the node + * look like a new node that is owned by the override table. + */ + if (flags & ACPI_NS_OVERRIDE_IF_FOUND) { + ACPI_DEBUG_PRINT((ACPI_DB_NAMES, + "Namespace override: %4.4s pass %u type %X Owner %X\n", + ACPI_CAST_PTR(char, + &target_name), + interpreter_mode, + (*return_node)->type, + walk_state->owner_id)); + + acpi_ns_delete_children(*return_node); + if (acpi_gbl_runtime_namespace_override) { + acpi_ut_remove_reference((*return_node)->object); + (*return_node)->object = NULL; + (*return_node)->owner_id = + walk_state->owner_id; + } else { + acpi_ns_remove_node(*return_node); + *return_node = ACPI_ENTRY_NOT_FOUND; + } + } + + /* Return an error if we don't expect to find the object */ + + else if (flags & ACPI_NS_ERROR_IF_FOUND) { + status = AE_ALREADY_EXISTS; + } } #ifdef ACPI_ASL_COMPILER if (*return_node && (*return_node)->type == ACPI_TYPE_ANY) { diff --git a/drivers/acpi/acpica/nsutils.c b/drivers/acpi/acpica/nsutils.c index 6ad02008c0c2..8d8104b8bd28 100644 --- a/drivers/acpi/acpica/nsutils.c +++ b/drivers/acpi/acpica/nsutils.c @@ -292,8 +292,7 @@ acpi_status acpi_ns_build_internal_name(struct acpi_namestring_info *info) } else { /* Convert the character to uppercase and save it */ - result[i] = - (char)ACPI_TOUPPER((int)*external_name); + result[i] = (char)toupper((int)*external_name); external_name++; } } diff --git a/drivers/acpi/acpica/nsxfeval.c b/drivers/acpi/acpica/nsxfeval.c index b6030a2deee1..6ee1e52b903d 100644 --- a/drivers/acpi/acpica/nsxfeval.c +++ b/drivers/acpi/acpica/nsxfeval.c @@ -696,7 +696,7 @@ acpi_ns_get_device_callback(acpi_handle obj_handle, return (AE_CTRL_DEPTH); } - no_match = ACPI_STRCMP(hid->string, info->hid); + no_match = strcmp(hid->string, info->hid); ACPI_FREE(hid); if (no_match) { @@ -715,8 +715,7 @@ acpi_ns_get_device_callback(acpi_handle obj_handle, found = FALSE; for (i = 0; i < cid->count; i++) { - if (ACPI_STRCMP(cid->ids[i].string, info->hid) - == 0) { + if (strcmp(cid->ids[i].string, info->hid) == 0) { /* Found a matching CID */ diff --git a/drivers/acpi/acpica/nsxfname.c b/drivers/acpi/acpica/nsxfname.c index d66c326485d8..9ff643b9553f 100644 --- a/drivers/acpi/acpica/nsxfname.c +++ b/drivers/acpi/acpica/nsxfname.c @@ -114,7 +114,7 @@ acpi_get_handle(acpi_handle parent, /* Special case for root-only, since we can't search for it */ - if (!ACPI_STRCMP(pathname, ACPI_NS_ROOT_PATH)) { + if (!strcmp(pathname, ACPI_NS_ROOT_PATH)) { *ret_handle = ACPI_CAST_PTR(acpi_handle, acpi_gbl_root_node); return (AE_OK); @@ -242,7 +242,7 @@ static char *acpi_ns_copy_device_id(struct acpi_pnp_device_id *dest, /* Copy actual string and return a pointer to the next string area */ - ACPI_MEMCPY(string_area, source->string, source->length); + memcpy(string_area, source->string, source->length); return (string_area + source->length); } @@ -260,7 +260,7 @@ static char *acpi_ns_copy_device_id(struct acpi_pnp_device_id *dest, * control methods (Such as in the case of a device.) * * For Device and Processor objects, run the Device _HID, _UID, _CID, _SUB, - * _STA, _ADR, _sx_w, and _sx_d methods. + * _CLS, _STA, _ADR, _sx_w, and _sx_d methods. * * Note: Allocates the return buffer, must be freed by the caller. * @@ -276,11 +276,12 @@ acpi_get_object_info(acpi_handle handle, struct acpi_pnp_device_id *hid = NULL; struct acpi_pnp_device_id *uid = NULL; struct acpi_pnp_device_id *sub = NULL; + struct acpi_pnp_device_id *cls = NULL; char *next_id_string; acpi_object_type type; acpi_name name; u8 param_count = 0; - u8 valid = 0; + u16 valid = 0; u32 info_size; u32 i; acpi_status status; @@ -320,7 +321,7 @@ acpi_get_object_info(acpi_handle handle, if ((type == ACPI_TYPE_DEVICE) || (type == ACPI_TYPE_PROCESSOR)) { /* * Get extra info for ACPI Device/Processor objects only: - * Run the Device _HID, _UID, _SUB, and _CID methods. + * Run the Device _HID, _UID, _SUB, _CID, and _CLS methods. * * Note: none of these methods are required, so they may or may * not be present for this device. The Info->Valid bitfield is used @@ -363,6 +364,14 @@ acpi_get_object_info(acpi_handle handle, sizeof(struct acpi_pnp_device_id_list)); valid |= ACPI_VALID_CID; } + + /* Execute the Device._CLS method */ + + status = acpi_ut_execute_CLS(node, &cls); + if (ACPI_SUCCESS(status)) { + info_size += cls->length; + valid |= ACPI_VALID_CLS; + } } /* @@ -486,6 +495,11 @@ acpi_get_object_info(acpi_handle handle, } } + if (cls) { + next_id_string = acpi_ns_copy_device_id(&info->class_code, + cls, next_id_string); + } + /* Copy the fixed-length data */ info->info_size = info_size; @@ -510,6 +524,9 @@ cleanup: if (cid_list) { ACPI_FREE(cid_list); } + if (cls) { + ACPI_FREE(cls); + } return (status); } @@ -620,7 +637,7 @@ acpi_status acpi_install_method(u8 *buffer) /* Copy the method AML to the local buffer */ - ACPI_MEMCPY(aml_buffer, aml_start, aml_length); + memcpy(aml_buffer, aml_start, aml_length); /* Initialize the method object with the new method's information */ diff --git a/drivers/acpi/acpica/psutils.c b/drivers/acpi/acpica/psutils.c index 960505ab409a..32440912023a 100644 --- a/drivers/acpi/acpica/psutils.c +++ b/drivers/acpi/acpica/psutils.c @@ -93,10 +93,9 @@ void acpi_ps_init_op(union acpi_parse_object *op, u16 opcode) op->common.descriptor_type = ACPI_DESC_TYPE_PARSER; op->common.aml_opcode = opcode; - ACPI_DISASM_ONLY_MEMBERS(ACPI_STRNCPY(op->common.aml_op_name, - (acpi_ps_get_opcode_info - (opcode))->name, - sizeof(op->common.aml_op_name))); + ACPI_DISASM_ONLY_MEMBERS(strncpy(op->common.aml_op_name, + (acpi_ps_get_opcode_info(opcode))-> + name, sizeof(op->common.aml_op_name))); } /******************************************************************************* diff --git a/drivers/acpi/acpica/rscreate.c b/drivers/acpi/acpica/rscreate.c index 15434e4c9b34..3fa829e96c2a 100644 --- a/drivers/acpi/acpica/rscreate.c +++ b/drivers/acpi/acpica/rscreate.c @@ -353,13 +353,13 @@ acpi_rs_create_pci_routing_table(union acpi_operand_object *package_object, /* +1 to include null terminator */ user_prt->length += - (u32) ACPI_STRLEN(user_prt->source) + 1; + (u32)strlen(user_prt->source) + 1; break; case ACPI_TYPE_STRING: - ACPI_STRCPY(user_prt->source, - obj_desc->string.pointer); + strcpy(user_prt->source, + obj_desc->string.pointer); /* * Add to the Length field the length of the string diff --git a/drivers/acpi/acpica/rsmisc.c b/drivers/acpi/acpica/rsmisc.c index 1fe49d223663..ac37852e0821 100644 --- a/drivers/acpi/acpica/rsmisc.c +++ b/drivers/acpi/acpica/rsmisc.c @@ -119,7 +119,7 @@ acpi_rs_convert_aml_to_resource(struct acpi_resource *resource, /* * Get the resource type and the initial (minimum) length */ - ACPI_MEMSET(resource, 0, INIT_RESOURCE_LENGTH(info)); + memset(resource, 0, INIT_RESOURCE_LENGTH(info)); resource->type = INIT_RESOURCE_TYPE(info); resource->length = INIT_RESOURCE_LENGTH(info); break; @@ -324,13 +324,13 @@ acpi_rs_convert_aml_to_resource(struct acpi_resource *resource, case ACPI_RSC_SET8: - ACPI_MEMSET(destination, info->aml_offset, info->value); + memset(destination, info->aml_offset, info->value); break; case ACPI_RSC_DATA8: target = ACPI_ADD_PTR(char, resource, info->value); - ACPI_MEMCPY(destination, source, ACPI_GET16(target)); + memcpy(destination, source, ACPI_GET16(target)); break; case ACPI_RSC_ADDRESS: @@ -502,7 +502,7 @@ acpi_rs_convert_resource_to_aml(struct acpi_resource *resource, switch (info->opcode) { case ACPI_RSC_INITSET: - ACPI_MEMSET(aml, 0, INIT_RESOURCE_LENGTH(info)); + memset(aml, 0, INIT_RESOURCE_LENGTH(info)); aml_length = INIT_RESOURCE_LENGTH(info); acpi_rs_set_resource_header(INIT_RESOURCE_TYPE(info), aml_length, aml); diff --git a/drivers/acpi/acpica/rsutils.c b/drivers/acpi/acpica/rsutils.c index ece3cd60cc6a..52b024df0052 100644 --- a/drivers/acpi/acpica/rsutils.c +++ b/drivers/acpi/acpica/rsutils.c @@ -148,7 +148,7 @@ acpi_rs_move_data(void *destination, void *source, u16 item_count, u8 move_type) case ACPI_RSC_MOVE_SERIAL_VEN: case ACPI_RSC_MOVE_SERIAL_RES: - ACPI_MEMCPY(destination, source, item_count); + memcpy(destination, source, item_count); return; /* @@ -364,12 +364,11 @@ acpi_rs_get_resource_source(acpi_rs_length resource_length, * Zero the entire area of the buffer. */ total_length = - (u32) - ACPI_STRLEN(ACPI_CAST_PTR(char, &aml_resource_source[1])) + + (u32)strlen(ACPI_CAST_PTR(char, &aml_resource_source[1])) + 1; - total_length = (u32) ACPI_ROUND_UP_TO_NATIVE_WORD(total_length); + total_length = (u32)ACPI_ROUND_UP_TO_NATIVE_WORD(total_length); - ACPI_MEMSET(resource_source->string_ptr, 0, total_length); + memset(resource_source->string_ptr, 0, total_length); /* Copy the resource_source string to the destination */ @@ -432,8 +431,8 @@ acpi_rs_set_resource_source(union aml_resource * aml, /* Copy the resource_source string */ - ACPI_STRCPY(ACPI_CAST_PTR(char, &aml_resource_source[1]), - resource_source->string_ptr); + strcpy(ACPI_CAST_PTR(char, &aml_resource_source[1]), + resource_source->string_ptr); /* * Add the length of the string (+ 1 for null terminator) to the diff --git a/drivers/acpi/acpica/rsxface.c b/drivers/acpi/acpica/rsxface.c index 8e6276df0226..de51f836ef68 100644 --- a/drivers/acpi/acpica/rsxface.c +++ b/drivers/acpi/acpica/rsxface.c @@ -398,8 +398,8 @@ acpi_resource_to_address64(struct acpi_resource *resource, /* Simple copy for 64 bit source */ - ACPI_MEMCPY(out, &resource->data, - sizeof(struct acpi_resource_address64)); + memcpy(out, &resource->data, + sizeof(struct acpi_resource_address64)); break; default: @@ -499,7 +499,7 @@ acpi_rs_match_vendor_resource(struct acpi_resource *resource, void *context) */ if ((vendor->byte_length < (ACPI_UUID_LENGTH + 1)) || (vendor->uuid_subtype != info->uuid->subtype) || - (ACPI_MEMCMP(vendor->uuid, info->uuid->data, ACPI_UUID_LENGTH))) { + (memcmp(vendor->uuid, info->uuid->data, ACPI_UUID_LENGTH))) { return (AE_OK); } @@ -513,7 +513,7 @@ acpi_rs_match_vendor_resource(struct acpi_resource *resource, void *context) /* Found the correct resource, copy and return it */ - ACPI_MEMCPY(buffer->pointer, resource, resource->length); + memcpy(buffer->pointer, resource, resource->length); buffer->length = resource->length; /* Found the desired descriptor, terminate resource walk */ diff --git a/drivers/acpi/acpica/tbdata.c b/drivers/acpi/acpica/tbdata.c index d7f8386455bd..5c9d5abf1588 100644 --- a/drivers/acpi/acpica/tbdata.c +++ b/drivers/acpi/acpica/tbdata.c @@ -73,7 +73,7 @@ acpi_tb_init_table_descriptor(struct acpi_table_desc *table_desc, * Initialize the table descriptor. Set the pointer to NULL, since the * table is not fully mapped at this time. */ - ACPI_MEMSET(table_desc, 0, sizeof(struct acpi_table_desc)); + memset(table_desc, 0, sizeof(struct acpi_table_desc)); table_desc->address = address; table_desc->length = table->length; table_desc->flags = flags; @@ -465,9 +465,9 @@ acpi_status acpi_tb_resize_root_table_list(void) /* Copy and free the previous table array */ if (acpi_gbl_root_table_list.tables) { - ACPI_MEMCPY(tables, acpi_gbl_root_table_list.tables, - (acpi_size) table_count * - sizeof(struct acpi_table_desc)); + memcpy(tables, acpi_gbl_root_table_list.tables, + (acpi_size) table_count * + sizeof(struct acpi_table_desc)); if (acpi_gbl_root_table_list.flags & ACPI_ROOT_ORIGIN_ALLOCATED) { ACPI_FREE(acpi_gbl_root_table_list.tables); diff --git a/drivers/acpi/acpica/tbfadt.c b/drivers/acpi/acpica/tbfadt.c index 7d2486005e3f..6253001b6375 100644 --- a/drivers/acpi/acpica/tbfadt.c +++ b/drivers/acpi/acpica/tbfadt.c @@ -350,9 +350,18 @@ void acpi_tb_parse_fadt(u32 table_index) /* If Hardware Reduced flag is set, there is no FACS */ if (!acpi_gbl_reduced_hardware) { - acpi_tb_install_fixed_table((acpi_physical_address) - acpi_gbl_FADT.Xfacs, ACPI_SIG_FACS, - ACPI_TABLE_INDEX_FACS); + if (acpi_gbl_FADT.facs) { + acpi_tb_install_fixed_table((acpi_physical_address) + acpi_gbl_FADT.facs, + ACPI_SIG_FACS, + ACPI_TABLE_INDEX_FACS); + } + if (acpi_gbl_FADT.Xfacs) { + acpi_tb_install_fixed_table((acpi_physical_address) + acpi_gbl_FADT.Xfacs, + ACPI_SIG_FACS, + ACPI_TABLE_INDEX_X_FACS); + } } } @@ -389,12 +398,12 @@ void acpi_tb_create_local_fadt(struct acpi_table_header *table, u32 length) /* Clear the entire local FADT */ - ACPI_MEMSET(&acpi_gbl_FADT, 0, sizeof(struct acpi_table_fadt)); + memset(&acpi_gbl_FADT, 0, sizeof(struct acpi_table_fadt)); /* Copy the original FADT, up to sizeof (struct acpi_table_fadt) */ - ACPI_MEMCPY(&acpi_gbl_FADT, table, - ACPI_MIN(length, sizeof(struct acpi_table_fadt))); + memcpy(&acpi_gbl_FADT, table, + ACPI_MIN(length, sizeof(struct acpi_table_fadt))); /* Take a copy of the Hardware Reduced flag */ @@ -491,13 +500,9 @@ static void acpi_tb_convert_fadt(void) acpi_gbl_FADT.header.length = sizeof(struct acpi_table_fadt); /* - * Expand the 32-bit FACS and DSDT addresses to 64-bit as necessary. + * Expand the 32-bit DSDT addresses to 64-bit as necessary. * Later ACPICA code will always use the X 64-bit field. */ - acpi_gbl_FADT.Xfacs = acpi_tb_select_address("FACS", - acpi_gbl_FADT.facs, - acpi_gbl_FADT.Xfacs); - acpi_gbl_FADT.Xdsdt = acpi_tb_select_address("DSDT", acpi_gbl_FADT.dsdt, acpi_gbl_FADT.Xdsdt); diff --git a/drivers/acpi/acpica/tbfind.c b/drivers/acpi/acpica/tbfind.c index 0b879fcfef67..119c84ad9833 100644 --- a/drivers/acpi/acpica/tbfind.c +++ b/drivers/acpi/acpica/tbfind.c @@ -76,16 +76,16 @@ acpi_tb_find_table(char *signature, /* Normalize the input strings */ - ACPI_MEMSET(&header, 0, sizeof(struct acpi_table_header)); + memset(&header, 0, sizeof(struct acpi_table_header)); ACPI_MOVE_NAME(header.signature, signature); - ACPI_STRNCPY(header.oem_id, oem_id, ACPI_OEM_ID_SIZE); - ACPI_STRNCPY(header.oem_table_id, oem_table_id, ACPI_OEM_TABLE_ID_SIZE); + strncpy(header.oem_id, oem_id, ACPI_OEM_ID_SIZE); + strncpy(header.oem_table_id, oem_table_id, ACPI_OEM_TABLE_ID_SIZE); /* Search for the table */ for (i = 0; i < acpi_gbl_root_table_list.current_table_count; ++i) { - if (ACPI_MEMCMP(&(acpi_gbl_root_table_list.tables[i].signature), - header.signature, ACPI_NAME_SIZE)) { + if (memcmp(&(acpi_gbl_root_table_list.tables[i].signature), + header.signature, ACPI_NAME_SIZE)) { /* Not the requested table */ @@ -112,21 +112,20 @@ acpi_tb_find_table(char *signature, /* Check for table match on all IDs */ - if (!ACPI_MEMCMP + if (!memcmp (acpi_gbl_root_table_list.tables[i].pointer->signature, header.signature, ACPI_NAME_SIZE) && (!oem_id[0] || - !ACPI_MEMCMP + !memcmp (acpi_gbl_root_table_list. tables[i].pointer-> oem_id, header.oem_id, ACPI_OEM_ID_SIZE)) && (!oem_table_id[0] - || !ACPI_MEMCMP(acpi_gbl_root_table_list.tables[i]. - pointer->oem_table_id, - header.oem_table_id, - ACPI_OEM_TABLE_ID_SIZE))) { + || !memcmp(acpi_gbl_root_table_list.tables[i].pointer-> + oem_table_id, header.oem_table_id, + ACPI_OEM_TABLE_ID_SIZE))) { *table_index = i; ACPI_DEBUG_PRINT((ACPI_DB_TABLES, diff --git a/drivers/acpi/acpica/tbinstal.c b/drivers/acpi/acpica/tbinstal.c index 008a251780f4..15ea98e0068d 100644 --- a/drivers/acpi/acpica/tbinstal.c +++ b/drivers/acpi/acpica/tbinstal.c @@ -87,8 +87,8 @@ acpi_tb_compare_tables(struct acpi_table_desc *table_desc, u32 table_index) * not just the header. */ is_identical = (u8)((table_desc->length != table_length || - ACPI_MEMCMP(table_desc->pointer, table, - table_length)) ? FALSE : TRUE); + memcmp(table_desc->pointer, table, table_length)) ? + FALSE : TRUE); /* Release the acquired table */ @@ -289,8 +289,7 @@ acpi_tb_install_standard_table(acpi_physical_address address, if ((new_table_desc.signature.ascii[0] != 0x00) && (!ACPI_COMPARE_NAME (&new_table_desc.signature, ACPI_SIG_SSDT)) - && (ACPI_STRNCMP(new_table_desc.signature.ascii, "OEM", 3))) - { + && (strncmp(new_table_desc.signature.ascii, "OEM", 3))) { ACPI_BIOS_ERROR((AE_INFO, "Table has invalid signature [%4.4s] (0x%8.8X), " "must be SSDT or OEMx", diff --git a/drivers/acpi/acpica/tbprint.c b/drivers/acpi/acpica/tbprint.c index 77ba5c71c6e7..709d5112fc16 100644 --- a/drivers/acpi/acpica/tbprint.c +++ b/drivers/acpi/acpica/tbprint.c @@ -73,7 +73,7 @@ static void acpi_tb_fix_string(char *string, acpi_size length) { while (length && *string) { - if (!ACPI_IS_PRINT(*string)) { + if (!isprint((int)*string)) { *string = '?'; } string++; @@ -100,7 +100,7 @@ acpi_tb_cleanup_table_header(struct acpi_table_header *out_header, struct acpi_table_header *header) { - ACPI_MEMCPY(out_header, header, sizeof(struct acpi_table_header)); + memcpy(out_header, header, sizeof(struct acpi_table_header)); acpi_tb_fix_string(out_header->signature, ACPI_NAME_SIZE); acpi_tb_fix_string(out_header->oem_id, ACPI_OEM_ID_SIZE); @@ -138,9 +138,9 @@ acpi_tb_print_table_header(acpi_physical_address address, /* RSDP has no common fields */ - ACPI_MEMCPY(local_header.oem_id, - ACPI_CAST_PTR(struct acpi_table_rsdp, - header)->oem_id, ACPI_OEM_ID_SIZE); + memcpy(local_header.oem_id, + ACPI_CAST_PTR(struct acpi_table_rsdp, header)->oem_id, + ACPI_OEM_ID_SIZE); acpi_tb_fix_string(local_header.oem_id, ACPI_OEM_ID_SIZE); ACPI_INFO((AE_INFO, "RSDP 0x%8.8X%8.8X %06X (v%.2d %-6.6s)", diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c index 6559a58439c5..568ac0e4a3c6 100644 --- a/drivers/acpi/acpica/tbutils.c +++ b/drivers/acpi/acpica/tbutils.c @@ -68,7 +68,6 @@ acpi_tb_get_root_table_entry(u8 *table_entry, u32 table_entry_size); acpi_status acpi_tb_initialize_facs(void) { - acpi_status status; /* If Hardware Reduced flag is set, there is no FACS */ @@ -77,11 +76,25 @@ acpi_status acpi_tb_initialize_facs(void) return (AE_OK); } - status = acpi_get_table_by_index(ACPI_TABLE_INDEX_FACS, - ACPI_CAST_INDIRECT_PTR(struct - acpi_table_header, - &acpi_gbl_FACS)); - return (status); + (void)acpi_get_table_by_index(ACPI_TABLE_INDEX_FACS, + ACPI_CAST_INDIRECT_PTR(struct + acpi_table_header, + &acpi_gbl_facs32)); + (void)acpi_get_table_by_index(ACPI_TABLE_INDEX_X_FACS, + ACPI_CAST_INDIRECT_PTR(struct + acpi_table_header, + &acpi_gbl_facs64)); + + if (acpi_gbl_facs64 + && (!acpi_gbl_facs32 || !acpi_gbl_use32_bit_facs_addresses)) { + acpi_gbl_FACS = acpi_gbl_facs64; + } else if (acpi_gbl_facs32) { + acpi_gbl_FACS = acpi_gbl_facs32; + } + + /* If there is no FACS, just continue. There was already an error msg */ + + return (AE_OK); } #endif /* !ACPI_REDUCED_HARDWARE */ @@ -101,7 +114,7 @@ acpi_status acpi_tb_initialize_facs(void) u8 acpi_tb_tables_loaded(void) { - if (acpi_gbl_root_table_list.current_table_count >= 3) { + if (acpi_gbl_root_table_list.current_table_count >= 4) { return (TRUE); } @@ -175,7 +188,7 @@ struct acpi_table_header *acpi_tb_copy_dsdt(u32 table_index) return (NULL); } - ACPI_MEMCPY(new_table, table_desc->pointer, table_desc->length); + memcpy(new_table, table_desc->pointer, table_desc->length); acpi_tb_uninstall_table(table_desc); acpi_tb_init_table_descriptor(&acpi_gbl_root_table_list. @@ -357,11 +370,11 @@ acpi_status __init acpi_tb_parse_root_table(acpi_physical_address rsdp_address) table_entry = ACPI_ADD_PTR(u8, table, sizeof(struct acpi_table_header)); /* - * First two entries in the table array are reserved for the DSDT - * and FACS, which are not actually present in the RSDT/XSDT - they - * come from the FADT + * First three entries in the table array are reserved for the DSDT + * and 32bit/64bit FACS, which are not actually present in the + * RSDT/XSDT - they come from the FADT */ - acpi_gbl_root_table_list.current_table_count = 2; + acpi_gbl_root_table_list.current_table_count = 3; /* Initialize the root table array from the RSDT/XSDT */ diff --git a/drivers/acpi/acpica/tbxface.c b/drivers/acpi/acpica/tbxface.c index 60e94f87f27a..5559e2c70b15 100644 --- a/drivers/acpi/acpica/tbxface.c +++ b/drivers/acpi/acpica/tbxface.c @@ -119,9 +119,9 @@ acpi_initialize_tables(struct acpi_table_desc * initial_table_array, } else { /* Root Table Array has been statically allocated by the host */ - ACPI_MEMSET(initial_table_array, 0, - (acpi_size) initial_table_count * - sizeof(struct acpi_table_desc)); + memset(initial_table_array, 0, + (acpi_size) initial_table_count * + sizeof(struct acpi_table_desc)); acpi_gbl_root_table_list.tables = initial_table_array; acpi_gbl_root_table_list.max_table_count = initial_table_count; @@ -242,8 +242,9 @@ acpi_get_table_header(char *signature, if (!header) { return (AE_NO_MEMORY); } - ACPI_MEMCPY(out_table_header, header, - sizeof(struct acpi_table_header)); + + memcpy(out_table_header, header, + sizeof(struct acpi_table_header)); acpi_os_unmap_memory(header, sizeof(struct acpi_table_header)); @@ -251,9 +252,9 @@ acpi_get_table_header(char *signature, return (AE_NOT_FOUND); } } else { - ACPI_MEMCPY(out_table_header, - acpi_gbl_root_table_list.tables[i].pointer, - sizeof(struct acpi_table_header)); + memcpy(out_table_header, + acpi_gbl_root_table_list.tables[i].pointer, + sizeof(struct acpi_table_header)); } return (AE_OK); } diff --git a/drivers/acpi/acpica/tbxfload.c b/drivers/acpi/acpica/tbxfload.c index aadb3002a2dd..9682d40ca6ff 100644 --- a/drivers/acpi/acpica/tbxfload.c +++ b/drivers/acpi/acpica/tbxfload.c @@ -150,8 +150,8 @@ static acpi_status acpi_tb_load_namespace(void) * Save the original DSDT header for detection of table corruption * and/or replacement of the DSDT from outside the OS. */ - ACPI_MEMCPY(&acpi_gbl_original_dsdt_header, acpi_gbl_DSDT, - sizeof(struct acpi_table_header)); + memcpy(&acpi_gbl_original_dsdt_header, acpi_gbl_DSDT, + sizeof(struct acpi_table_header)); (void)acpi_ut_release_mutex(ACPI_MTX_TABLES); @@ -166,13 +166,18 @@ static acpi_status acpi_tb_load_namespace(void) (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES); for (i = 0; i < acpi_gbl_root_table_list.current_table_count; ++i) { - if ((!ACPI_COMPARE_NAME + if (!acpi_gbl_root_table_list.tables[i].address || + (!ACPI_COMPARE_NAME (&(acpi_gbl_root_table_list.tables[i].signature), ACPI_SIG_SSDT) && !ACPI_COMPARE_NAME(& (acpi_gbl_root_table_list.tables[i]. - signature), ACPI_SIG_PSDT)) + signature), ACPI_SIG_PSDT) + && + !ACPI_COMPARE_NAME(& + (acpi_gbl_root_table_list.tables[i]. + signature), ACPI_SIG_OSDT)) || ACPI_FAILURE(acpi_tb_validate_table (&acpi_gbl_root_table_list.tables[i]))) { @@ -219,9 +224,9 @@ acpi_install_table(acpi_physical_address address, u8 physical) ACPI_FUNCTION_TRACE(acpi_install_table); if (physical) { - flags = ACPI_TABLE_ORIGIN_EXTERNAL_VIRTUAL; - } else { flags = ACPI_TABLE_ORIGIN_INTERNAL_PHYSICAL; + } else { + flags = ACPI_TABLE_ORIGIN_EXTERNAL_VIRTUAL; } status = acpi_tb_install_standard_table(address, flags, diff --git a/drivers/acpi/acpica/utalloc.c b/drivers/acpi/acpica/utalloc.c index 61d8f6d186d1..7a4101f0685e 100644 --- a/drivers/acpi/acpica/utalloc.c +++ b/drivers/acpi/acpica/utalloc.c @@ -73,7 +73,7 @@ void *acpi_os_allocate_zeroed(acpi_size size) /* Clear the memory block */ - ACPI_MEMSET(allocation, 0, size); + memset(allocation, 0, size); } return (allocation); @@ -181,7 +181,7 @@ acpi_status acpi_ut_delete_caches(void) char buffer[7]; if (acpi_gbl_display_final_mem_stats) { - ACPI_STRCPY(buffer, "MEMORY"); + strcpy(buffer, "MEMORY"); (void)acpi_db_display_statistics(buffer); } #endif @@ -337,6 +337,6 @@ acpi_ut_initialize_buffer(struct acpi_buffer * buffer, /* Have a valid buffer, clear it */ - ACPI_MEMSET(buffer->pointer, 0, required_length); + memset(buffer->pointer, 0, required_length); return (AE_OK); } diff --git a/drivers/acpi/acpica/utbuffer.c b/drivers/acpi/acpica/utbuffer.c index a8c39643e618..01c8709ca586 100644 --- a/drivers/acpi/acpica/utbuffer.c +++ b/drivers/acpi/acpica/utbuffer.c @@ -159,7 +159,7 @@ void acpi_ut_dump_buffer(u8 *buffer, u32 count, u32 display, u32 base_offset) } buf_char = buffer[(acpi_size) i + j]; - if (ACPI_IS_PRINT(buf_char)) { + if (isprint(buf_char)) { acpi_os_printf("%c", buf_char); } else { acpi_os_printf("."); @@ -319,7 +319,7 @@ acpi_ut_dump_buffer_to_file(ACPI_FILE file, } buf_char = buffer[(acpi_size) i + j]; - if (ACPI_IS_PRINT(buf_char)) { + if (isprint(buf_char)) { acpi_ut_file_printf(file, "%c", buf_char); } else { acpi_ut_file_printf(file, "."); diff --git a/drivers/acpi/acpica/utcache.c b/drivers/acpi/acpica/utcache.c index eacc5eee362e..0d21fbd99363 100644 --- a/drivers/acpi/acpica/utcache.c +++ b/drivers/acpi/acpica/utcache.c @@ -84,7 +84,7 @@ acpi_os_create_cache(char *cache_name, /* Populate the cache object and return it */ - ACPI_MEMSET(cache, 0, sizeof(struct acpi_memory_list)); + memset(cache, 0, sizeof(struct acpi_memory_list)); cache->list_name = cache_name; cache->object_size = object_size; cache->max_depth = max_depth; @@ -212,7 +212,7 @@ acpi_os_release_object(struct acpi_memory_list * cache, void *object) /* Mark the object as cached */ - ACPI_MEMSET(object, 0xCA, cache->object_size); + memset(object, 0xCA, cache->object_size); ACPI_SET_DESCRIPTOR_TYPE(object, ACPI_DESC_TYPE_CACHED); /* Put the object at the head of the cache list */ @@ -281,7 +281,7 @@ void *acpi_os_acquire_object(struct acpi_memory_list *cache) /* Clear (zero) the previously used Object */ - ACPI_MEMSET(object, 0, cache->object_size); + memset(object, 0, cache->object_size); } else { /* The cache is empty, create a new object */ diff --git a/drivers/acpi/acpica/utcopy.c b/drivers/acpi/acpica/utcopy.c index c37ec5035f4c..257221d452c8 100644 --- a/drivers/acpi/acpica/utcopy.c +++ b/drivers/acpi/acpica/utcopy.c @@ -129,7 +129,7 @@ acpi_ut_copy_isimple_to_esimple(union acpi_operand_object *internal_object, /* Always clear the external object */ - ACPI_MEMSET(external_object, 0, sizeof(union acpi_object)); + memset(external_object, 0, sizeof(union acpi_object)); /* * In general, the external object will be the same type as @@ -149,9 +149,9 @@ acpi_ut_copy_isimple_to_esimple(union acpi_operand_object *internal_object, string. length + 1); - ACPI_MEMCPY((void *)data_space, - (void *)internal_object->string.pointer, - (acpi_size) internal_object->string.length + 1); + memcpy((void *)data_space, + (void *)internal_object->string.pointer, + (acpi_size) internal_object->string.length + 1); break; case ACPI_TYPE_BUFFER: @@ -162,9 +162,9 @@ acpi_ut_copy_isimple_to_esimple(union acpi_operand_object *internal_object, ACPI_ROUND_UP_TO_NATIVE_WORD(internal_object->string. length); - ACPI_MEMCPY((void *)data_space, - (void *)internal_object->buffer.pointer, - internal_object->buffer.length); + memcpy((void *)data_space, + (void *)internal_object->buffer.pointer, + internal_object->buffer.length); break; case ACPI_TYPE_INTEGER: @@ -502,9 +502,9 @@ acpi_ut_copy_esimple_to_isimple(union acpi_object *external_object, goto error_exit; } - ACPI_MEMCPY(internal_object->string.pointer, - external_object->string.pointer, - external_object->string.length); + memcpy(internal_object->string.pointer, + external_object->string.pointer, + external_object->string.length); internal_object->string.length = external_object->string.length; break; @@ -517,9 +517,9 @@ acpi_ut_copy_esimple_to_isimple(union acpi_object *external_object, goto error_exit; } - ACPI_MEMCPY(internal_object->buffer.pointer, - external_object->buffer.pointer, - external_object->buffer.length); + memcpy(internal_object->buffer.pointer, + external_object->buffer.pointer, + external_object->buffer.length); internal_object->buffer.length = external_object->buffer.length; @@ -694,8 +694,8 @@ acpi_ut_copy_simple_object(union acpi_operand_object *source_desc, copy_size = sizeof(struct acpi_namespace_node); } - ACPI_MEMCPY(ACPI_CAST_PTR(char, dest_desc), - ACPI_CAST_PTR(char, source_desc), copy_size); + memcpy(ACPI_CAST_PTR(char, dest_desc), + ACPI_CAST_PTR(char, source_desc), copy_size); /* Restore the saved fields */ @@ -725,9 +725,9 @@ acpi_ut_copy_simple_object(union acpi_operand_object *source_desc, /* Copy the actual buffer data */ - ACPI_MEMCPY(dest_desc->buffer.pointer, - source_desc->buffer.pointer, - source_desc->buffer.length); + memcpy(dest_desc->buffer.pointer, + source_desc->buffer.pointer, + source_desc->buffer.length); } break; @@ -747,9 +747,9 @@ acpi_ut_copy_simple_object(union acpi_operand_object *source_desc, /* Copy the actual string data */ - ACPI_MEMCPY(dest_desc->string.pointer, - source_desc->string.pointer, - (acpi_size) source_desc->string.length + 1); + memcpy(dest_desc->string.pointer, + source_desc->string.pointer, + (acpi_size) source_desc->string.length + 1); } break; diff --git a/drivers/acpi/acpica/utdebug.c b/drivers/acpi/acpica/utdebug.c index 4f3f888d33bb..cd02693841db 100644 --- a/drivers/acpi/acpica/utdebug.c +++ b/drivers/acpi/acpica/utdebug.c @@ -111,8 +111,8 @@ void acpi_ut_track_stack_ptr(void) * RETURN: Updated pointer to the function name * * DESCRIPTION: Remove the "Acpi" prefix from the function name, if present. - * This allows compiler macros such as __func__ to be used with no - * change to the debug output. + * This allows compiler macros such as __func__ to be used + * with no change to the debug output. * ******************************************************************************/ diff --git a/drivers/acpi/acpica/utglobal.c b/drivers/acpi/acpica/utglobal.c index 5e8df9177da4..a72685c1e819 100644 --- a/drivers/acpi/acpica/utglobal.c +++ b/drivers/acpi/acpica/utglobal.c @@ -102,12 +102,19 @@ const struct acpi_predefined_names acpi_gbl_pre_defined_names[] = { {"_SB_", ACPI_TYPE_DEVICE, NULL}, {"_SI_", ACPI_TYPE_LOCAL_SCOPE, NULL}, {"_TZ_", ACPI_TYPE_DEVICE, NULL}, - {"_REV", ACPI_TYPE_INTEGER, (char *)ACPI_CA_SUPPORT_LEVEL}, + /* + * March, 2015: + * The _REV object is in the process of being deprecated, because + * other ACPI implementations permanently return 2. Thus, it + * has little or no value. Return 2 for compatibility with + * other ACPI implementations. + */ + {"_REV", ACPI_TYPE_INTEGER, ACPI_CAST_PTR(char, 2)}, {"_OS_", ACPI_TYPE_STRING, ACPI_OS_NAME}, - {"_GL_", ACPI_TYPE_MUTEX, (char *)1}, + {"_GL_", ACPI_TYPE_MUTEX, ACPI_CAST_PTR(char, 1)}, #if !defined (ACPI_NO_METHOD_EXECUTION) || defined (ACPI_CONSTANT_EVAL_ONLY) - {"_OSI", ACPI_TYPE_METHOD, (char *)1}, + {"_OSI", ACPI_TYPE_METHOD, ACPI_CAST_PTR(char, 1)}, #endif /* Table terminator */ diff --git a/drivers/acpi/acpica/utids.c b/drivers/acpi/acpica/utids.c index 27431cfc1c44..7956df1e263c 100644 --- a/drivers/acpi/acpica/utids.c +++ b/drivers/acpi/acpica/utids.c @@ -1,6 +1,6 @@ /****************************************************************************** * - * Module Name: utids - support for device Ids - HID, UID, CID + * Module Name: utids - support for device Ids - HID, UID, CID, SUB, CLS * *****************************************************************************/ @@ -111,7 +111,7 @@ acpi_ut_execute_HID(struct acpi_namespace_node *device_node, if (obj_desc->common.type == ACPI_TYPE_INTEGER) { acpi_ex_eisa_id_to_string(hid->string, obj_desc->integer.value); } else { - ACPI_STRCPY(hid->string, obj_desc->string.pointer); + strcpy(hid->string, obj_desc->string.pointer); } hid->length = length; @@ -180,7 +180,7 @@ acpi_ut_execute_SUB(struct acpi_namespace_node *device_node, /* Simply copy existing string */ - ACPI_STRCPY(sub->string, obj_desc->string.pointer); + strcpy(sub->string, obj_desc->string.pointer); sub->length = length; *return_id = sub; @@ -256,7 +256,7 @@ acpi_ut_execute_UID(struct acpi_namespace_node *device_node, if (obj_desc->common.type == ACPI_TYPE_INTEGER) { acpi_ex_integer_to_string(uid->string, obj_desc->integer.value); } else { - ACPI_STRCPY(uid->string, obj_desc->string.pointer); + strcpy(uid->string, obj_desc->string.pointer); } uid->length = length; @@ -393,8 +393,7 @@ acpi_ut_execute_CID(struct acpi_namespace_node *device_node, /* Copy the String CID from the returned object */ - ACPI_STRCPY(next_id_string, - cid_objects[i]->string.pointer); + strcpy(next_id_string, cid_objects[i]->string.pointer); length = cid_objects[i]->string.length + 1; } @@ -416,3 +415,92 @@ cleanup: acpi_ut_remove_reference(obj_desc); return_ACPI_STATUS(status); } + +/******************************************************************************* + * + * FUNCTION: acpi_ut_execute_CLS + * + * PARAMETERS: device_node - Node for the device + * return_id - Where the _CLS is returned + * + * RETURN: Status + * + * DESCRIPTION: Executes the _CLS control method that returns PCI-defined + * class code of the device. The _CLS value is always a package + * containing PCI class information as a list of integers. + * The returned string has format "BBSSPP", where: + * BB = Base-class code + * SS = Sub-class code + * PP = Programming Interface code + * + ******************************************************************************/ + +acpi_status +acpi_ut_execute_CLS(struct acpi_namespace_node *device_node, + struct acpi_pnp_device_id **return_id) +{ + union acpi_operand_object *obj_desc; + union acpi_operand_object **cls_objects; + u32 count; + struct acpi_pnp_device_id *cls; + u32 length; + acpi_status status; + u8 class_code[3] = { 0, 0, 0 }; + + ACPI_FUNCTION_TRACE(ut_execute_CLS); + + status = acpi_ut_evaluate_object(device_node, METHOD_NAME__CLS, + ACPI_BTYPE_PACKAGE, &obj_desc); + if (ACPI_FAILURE(status)) { + return_ACPI_STATUS(status); + } + + /* Get the size of the String to be returned, includes null terminator */ + + length = ACPI_PCICLS_STRING_SIZE; + cls_objects = obj_desc->package.elements; + count = obj_desc->package.count; + + if (obj_desc->common.type == ACPI_TYPE_PACKAGE) { + if (count > 0 + && cls_objects[0]->common.type == ACPI_TYPE_INTEGER) { + class_code[0] = (u8)cls_objects[0]->integer.value; + } + if (count > 1 + && cls_objects[1]->common.type == ACPI_TYPE_INTEGER) { + class_code[1] = (u8)cls_objects[1]->integer.value; + } + if (count > 2 + && cls_objects[2]->common.type == ACPI_TYPE_INTEGER) { + class_code[2] = (u8)cls_objects[2]->integer.value; + } + } + + /* Allocate a buffer for the CLS */ + + cls = + ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_pnp_device_id) + + (acpi_size) length); + if (!cls) { + status = AE_NO_MEMORY; + goto cleanup; + } + + /* Area for the string starts after PNP_DEVICE_ID struct */ + + cls->string = + ACPI_ADD_PTR(char, cls, sizeof(struct acpi_pnp_device_id)); + + /* Simply copy existing string */ + + acpi_ex_pci_cls_to_string(cls->string, class_code); + cls->length = length; + *return_id = cls; + +cleanup: + + /* On exit, we must delete the return object */ + + acpi_ut_remove_reference(obj_desc); + return_ACPI_STATUS(status); +} diff --git a/drivers/acpi/acpica/utmisc.c b/drivers/acpi/acpica/utmisc.c index cbb7034d28d8..71b66537f826 100644 --- a/drivers/acpi/acpica/utmisc.c +++ b/drivers/acpi/acpica/utmisc.c @@ -66,9 +66,9 @@ u8 acpi_ut_is_pci_root_bridge(char *id) * Check if this is a PCI root bridge. * ACPI 3.0+: check for a PCI Express root also. */ - if (!(ACPI_STRCMP(id, - PCI_ROOT_HID_STRING)) || - !(ACPI_STRCMP(id, PCI_EXPRESS_ROOT_HID_STRING))) { + if (!(strcmp(id, + PCI_ROOT_HID_STRING)) || + !(strcmp(id, PCI_EXPRESS_ROOT_HID_STRING))) { return (TRUE); } @@ -97,7 +97,8 @@ u8 acpi_ut_is_aml_table(struct acpi_table_header *table) if (ACPI_COMPARE_NAME(table->signature, ACPI_SIG_DSDT) || ACPI_COMPARE_NAME(table->signature, ACPI_SIG_PSDT) || - ACPI_COMPARE_NAME(table->signature, ACPI_SIG_SSDT)) { + ACPI_COMPARE_NAME(table->signature, ACPI_SIG_SSDT) || + ACPI_COMPARE_NAME(table->signature, ACPI_SIG_OSDT)) { return (TRUE); } diff --git a/drivers/acpi/acpica/utosi.c b/drivers/acpi/acpica/utosi.c index 44035abdbf29..8f3d203aed79 100644 --- a/drivers/acpi/acpica/utosi.c +++ b/drivers/acpi/acpica/utosi.c @@ -232,8 +232,7 @@ acpi_status acpi_ut_install_interface(acpi_string interface_name) return (AE_NO_MEMORY); } - interface_info->name = - ACPI_ALLOCATE_ZEROED(ACPI_STRLEN(interface_name) + 1); + interface_info->name = ACPI_ALLOCATE_ZEROED(strlen(interface_name) + 1); if (!interface_info->name) { ACPI_FREE(interface_info); return (AE_NO_MEMORY); @@ -241,7 +240,7 @@ acpi_status acpi_ut_install_interface(acpi_string interface_name) /* Initialize new info and insert at the head of the global list */ - ACPI_STRCPY(interface_info->name, interface_name); + strcpy(interface_info->name, interface_name); interface_info->flags = ACPI_OSI_DYNAMIC; interface_info->next = acpi_gbl_supported_interfaces; @@ -269,7 +268,7 @@ acpi_status acpi_ut_remove_interface(acpi_string interface_name) previous_interface = next_interface = acpi_gbl_supported_interfaces; while (next_interface) { - if (!ACPI_STRCMP(interface_name, next_interface->name)) { + if (!strcmp(interface_name, next_interface->name)) { /* Found: name is in either the static list or was added at runtime */ @@ -373,7 +372,7 @@ struct acpi_interface_info *acpi_ut_get_interface(acpi_string interface_name) next_interface = acpi_gbl_supported_interfaces; while (next_interface) { - if (!ACPI_STRCMP(interface_name, next_interface->name)) { + if (!strcmp(interface_name, next_interface->name)) { return (next_interface); } diff --git a/drivers/acpi/acpica/utpredef.c b/drivers/acpi/acpica/utpredef.c index 29e449935a82..97898ed71b4b 100644 --- a/drivers/acpi/acpica/utpredef.c +++ b/drivers/acpi/acpica/utpredef.c @@ -148,7 +148,7 @@ void acpi_ut_get_expected_return_types(char *buffer, u32 expected_btypes) u32 j; if (!expected_btypes) { - ACPI_STRCPY(buffer, "NONE"); + strcpy(buffer, "NONE"); return; } @@ -161,7 +161,7 @@ void acpi_ut_get_expected_return_types(char *buffer, u32 expected_btypes) /* If one of the expected types, concatenate the name of this type */ if (expected_btypes & this_rtype) { - ACPI_STRCAT(buffer, &ut_rtype_names[i][j]); + strcat(buffer, &ut_rtype_names[i][j]); j = 0; /* Use name separator from now on */ } diff --git a/drivers/acpi/acpica/utprint.c b/drivers/acpi/acpica/utprint.c index 2be6bd4bdc09..b26297c5de49 100644 --- a/drivers/acpi/acpica/utprint.c +++ b/drivers/acpi/acpica/utprint.c @@ -180,7 +180,7 @@ const char *acpi_ut_scan_number(const char *string, u64 *number_ptr) { u64 number = 0; - while (ACPI_IS_DIGIT(*string)) { + while (isdigit((int)*string)) { number *= 10; number += *(string++) - '0'; } @@ -405,7 +405,7 @@ acpi_ut_vsnprintf(char *string, /* Process width */ width = -1; - if (ACPI_IS_DIGIT(*format)) { + if (isdigit((int)*format)) { format = acpi_ut_scan_number(format, &number); width = (s32) number; } else if (*format == '*') { @@ -422,7 +422,7 @@ acpi_ut_vsnprintf(char *string, precision = -1; if (*format == '.') { ++format; - if (ACPI_IS_DIGIT(*format)) { + if (isdigit((int)*format)) { format = acpi_ut_scan_number(format, &number); precision = (s32) number; } else if (*format == '*') { diff --git a/drivers/acpi/acpica/utstring.c b/drivers/acpi/acpica/utstring.c index 83b6c52490dc..8f3c883dfe0e 100644 --- a/drivers/acpi/acpica/utstring.c +++ b/drivers/acpi/acpica/utstring.c @@ -79,7 +79,7 @@ void acpi_ut_strlwr(char *src_string) /* Walk entire string, lowercasing the letters */ for (string = src_string; *string; string++) { - *string = (char)ACPI_TOLOWER(*string); + *string = (char)tolower((int)*string); } return; @@ -145,7 +145,7 @@ void acpi_ut_strupr(char *src_string) /* Walk entire string, uppercasing the letters */ for (string = src_string; *string; string++) { - *string = (char)ACPI_TOUPPER(*string); + *string = (char)toupper((int)*string); } return; @@ -202,7 +202,7 @@ acpi_status acpi_ut_strtoul64(char *string, u32 base, u64 *ret_integer) /* Skip over any white space in the buffer */ - while ((*string) && (ACPI_IS_SPACE(*string) || *string == '\t')) { + while ((*string) && (isspace((int)*string) || *string == '\t')) { string++; } @@ -211,7 +211,7 @@ acpi_status acpi_ut_strtoul64(char *string, u32 base, u64 *ret_integer) * Base equal to ACPI_ANY_BASE means 'ToInteger operation case'. * We need to determine if it is decimal or hexadecimal. */ - if ((*string == '0') && (ACPI_TOLOWER(*(string + 1)) == 'x')) { + if ((*string == '0') && (tolower((int)*(string + 1)) == 'x')) { sign_of0x = 1; base = 16; @@ -224,7 +224,7 @@ acpi_status acpi_ut_strtoul64(char *string, u32 base, u64 *ret_integer) /* Any string left? Check that '0x' is not followed by white space. */ - if (!(*string) || ACPI_IS_SPACE(*string) || *string == '\t') { + if (!(*string) || isspace((int)*string) || *string == '\t') { if (to_integer_op) { goto error_exit; } else { @@ -241,7 +241,7 @@ acpi_status acpi_ut_strtoul64(char *string, u32 base, u64 *ret_integer) /* Main loop: convert the string to a 32- or 64-bit integer */ while (*string) { - if (ACPI_IS_DIGIT(*string)) { + if (isdigit((int)*string)) { /* Convert ASCII 0-9 to Decimal value */ @@ -252,8 +252,8 @@ acpi_status acpi_ut_strtoul64(char *string, u32 base, u64 *ret_integer) term = 1; } else { - this_digit = (u8)ACPI_TOUPPER(*string); - if (ACPI_IS_XDIGIT((char)this_digit)) { + this_digit = (u8)toupper((int)*string); + if (isxdigit((int)this_digit)) { /* Convert ASCII Hex char to value */ @@ -404,7 +404,7 @@ void acpi_ut_print_string(char *string, u16 max_length) /* Check for printable character or hex escape */ - if (ACPI_IS_PRINT(string[i])) { + if (isprint((int)string[i])) { /* This is a normal character */ acpi_os_printf("%c", (int)string[i]); @@ -609,22 +609,22 @@ void ut_convert_backslashes(char *pathname) u8 acpi_ut_safe_strcpy(char *dest, acpi_size dest_size, char *source) { - if (ACPI_STRLEN(source) >= dest_size) { + if (strlen(source) >= dest_size) { return (TRUE); } - ACPI_STRCPY(dest, source); + strcpy(dest, source); return (FALSE); } u8 acpi_ut_safe_strcat(char *dest, acpi_size dest_size, char *source) { - if ((ACPI_STRLEN(dest) + ACPI_STRLEN(source)) >= dest_size) { + if ((strlen(dest) + strlen(source)) >= dest_size) { return (TRUE); } - ACPI_STRCAT(dest, source); + strcat(dest, source); return (FALSE); } @@ -635,14 +635,13 @@ acpi_ut_safe_strncat(char *dest, { acpi_size actual_transfer_length; - actual_transfer_length = - ACPI_MIN(max_transfer_length, ACPI_STRLEN(source)); + actual_transfer_length = ACPI_MIN(max_transfer_length, strlen(source)); - if ((ACPI_STRLEN(dest) + actual_transfer_length) >= dest_size) { + if ((strlen(dest) + actual_transfer_length) >= dest_size) { return (TRUE); } - ACPI_STRNCAT(dest, source, max_transfer_length); + strncat(dest, source, max_transfer_length); return (FALSE); } #endif diff --git a/drivers/acpi/acpica/uttrack.c b/drivers/acpi/acpica/uttrack.c index 130dd9f96f0f..9a7dc8196a5d 100644 --- a/drivers/acpi/acpica/uttrack.c +++ b/drivers/acpi/acpica/uttrack.c @@ -100,7 +100,7 @@ acpi_ut_create_list(char *list_name, return (AE_NO_MEMORY); } - ACPI_MEMSET(cache, 0, sizeof(struct acpi_memory_list)); + memset(cache, 0, sizeof(struct acpi_memory_list)); cache->list_name = list_name; cache->object_size = object_size; @@ -402,7 +402,7 @@ acpi_ut_track_allocation(struct acpi_debug_mem_block *allocation, allocation->component = component; allocation->line = line; - ACPI_STRNCPY(allocation->module, module, ACPI_MAX_MODULE_NAME); + strncpy(allocation->module, module, ACPI_MAX_MODULE_NAME); allocation->module[ACPI_MAX_MODULE_NAME - 1] = 0; if (!element) { @@ -497,7 +497,7 @@ acpi_ut_remove_allocation(struct acpi_debug_mem_block *allocation, /* Mark the segment as deleted */ - ACPI_MEMSET(&allocation->user_space, 0xEA, allocation->size); + memset(&allocation->user_space, 0xEA, allocation->size); status = acpi_ut_release_mutex(ACPI_MTX_MEMORY); return (status); @@ -595,7 +595,7 @@ void acpi_ut_dump_allocations(u32 component, const char *module) while (element) { if ((element->component & component) && ((module == NULL) - || (0 == ACPI_STRCMP(module, element->module)))) { + || (0 == strcmp(module, element->module)))) { descriptor = ACPI_CAST_PTR(union acpi_descriptor, &element->user_space); diff --git a/drivers/acpi/acpica/utxface.c b/drivers/acpi/acpica/utxface.c index 0929187bdce0..51cf52d52243 100644 --- a/drivers/acpi/acpica/utxface.c +++ b/drivers/acpi/acpica/utxface.c @@ -234,8 +234,8 @@ acpi_status acpi_get_statistics(struct acpi_statistics *stats) stats->sci_count = acpi_sci_count; stats->gpe_count = acpi_gpe_count; - ACPI_MEMCPY(stats->fixed_event_count, acpi_fixed_event_count, - sizeof(acpi_fixed_event_count)); + memcpy(stats->fixed_event_count, acpi_fixed_event_count, + sizeof(acpi_fixed_event_count)); /* Other counters */ @@ -322,7 +322,7 @@ acpi_status acpi_install_interface(acpi_string interface_name) /* Parameter validation */ - if (!interface_name || (ACPI_STRLEN(interface_name) == 0)) { + if (!interface_name || (strlen(interface_name) == 0)) { return (AE_BAD_PARAMETER); } @@ -374,7 +374,7 @@ acpi_status acpi_remove_interface(acpi_string interface_name) /* Parameter validation */ - if (!interface_name || (ACPI_STRLEN(interface_name) == 0)) { + if (!interface_name || (strlen(interface_name) == 0)) { return (AE_BAD_PARAMETER); } diff --git a/drivers/acpi/acpica/utxfinit.c b/drivers/acpi/acpica/utxfinit.c index 083a76891889..42a32a66ef22 100644 --- a/drivers/acpi/acpica/utxfinit.c +++ b/drivers/acpi/acpica/utxfinit.c @@ -179,10 +179,12 @@ acpi_status __init acpi_enable_subsystem(u32 flags) * Obtain a permanent mapping for the FACS. This is required for the * Global Lock and the Firmware Waking Vector */ - status = acpi_tb_initialize_facs(); - if (ACPI_FAILURE(status)) { - ACPI_WARNING((AE_INFO, "Could not map the FACS table")); - return_ACPI_STATUS(status); + if (!(flags & ACPI_NO_FACS_INIT)) { + status = acpi_tb_initialize_facs(); + if (ACPI_FAILURE(status)) { + ACPI_WARNING((AE_INFO, "Could not map the FACS table")); + return_ACPI_STATUS(status); + } } #endif /* !ACPI_REDUCED_HARDWARE */ diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c index 1d1791935c31..278dc4be992a 100644 --- a/drivers/acpi/blacklist.c +++ b/drivers/acpi/blacklist.c @@ -162,6 +162,15 @@ static int __init dmi_disable_osi_win8(const struct dmi_system_id *d) acpi_osi_setup("!Windows 2012"); return 0; } +#ifdef CONFIG_ACPI_REV_OVERRIDE_POSSIBLE +static int __init dmi_enable_rev_override(const struct dmi_system_id *d) +{ + printk(KERN_NOTICE PREFIX "DMI detected: %s (force ACPI _REV to 5)\n", + d->ident); + acpi_rev_override_setup(NULL); + return 0; +} +#endif static struct dmi_system_id acpi_osi_dmi_table[] __initdata = { { @@ -325,6 +334,23 @@ static struct dmi_system_id acpi_osi_dmi_table[] __initdata = { DMI_MATCH(DMI_PRODUCT_NAME, "1015PX"), }, }, + +#ifdef CONFIG_ACPI_REV_OVERRIDE_POSSIBLE + /* + * DELL XPS 13 (2015) switches sound between HDA and I2S + * depending on the ACPI _REV callback. If userspace supports + * I2S sufficiently (or if you do not care about sound), you + * can safely disable this quirk. + */ + { + .callback = dmi_enable_rev_override, + .ident = "DELL XPS 13 (2015)", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "XPS 13 9343"), + }, + }, +#endif {} }; diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 787c629bc9b4..4683a96932b9 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -58,6 +58,7 @@ void acpi_cmos_rtc_init(void); #else static inline void acpi_cmos_rtc_init(void) {} #endif +int acpi_rev_override_setup(char *str); extern bool acpi_force_hot_remove; diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 2161fa178c8d..628a42c41ab1 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -18,6 +18,7 @@ #include <linux/list.h> #include <linux/acpi.h> #include <linux/sort.h> +#include <linux/pmem.h> #include <linux/io.h> #include "nfit.h" @@ -305,6 +306,23 @@ static bool add_idt(struct acpi_nfit_desc *acpi_desc, return true; } +static bool add_flush(struct acpi_nfit_desc *acpi_desc, + struct acpi_nfit_flush_address *flush) +{ + struct device *dev = acpi_desc->dev; + struct nfit_flush *nfit_flush = devm_kzalloc(dev, sizeof(*nfit_flush), + GFP_KERNEL); + + if (!nfit_flush) + return false; + INIT_LIST_HEAD(&nfit_flush->list); + nfit_flush->flush = flush; + list_add_tail(&nfit_flush->list, &acpi_desc->flushes); + dev_dbg(dev, "%s: nfit_flush handle: %d hint_count: %d\n", __func__, + flush->device_handle, flush->hint_count); + return true; +} + static void *add_table(struct acpi_nfit_desc *acpi_desc, void *table, const void *end) { @@ -338,7 +356,8 @@ static void *add_table(struct acpi_nfit_desc *acpi_desc, void *table, return err; break; case ACPI_NFIT_TYPE_FLUSH_ADDRESS: - dev_dbg(dev, "%s: flush\n", __func__); + if (!add_flush(acpi_desc, table)) + return err; break; case ACPI_NFIT_TYPE_SMBIOS: dev_dbg(dev, "%s: smbios\n", __func__); @@ -389,6 +408,7 @@ static int nfit_mem_add(struct acpi_nfit_desc *acpi_desc, { u16 dcr = __to_nfit_memdev(nfit_mem)->region_index; struct nfit_memdev *nfit_memdev; + struct nfit_flush *nfit_flush; struct nfit_dcr *nfit_dcr; struct nfit_bdw *nfit_bdw; struct nfit_idt *nfit_idt; @@ -442,6 +462,14 @@ static int nfit_mem_add(struct acpi_nfit_desc *acpi_desc, nfit_mem->idt_bdw = nfit_idt->idt; break; } + + list_for_each_entry(nfit_flush, &acpi_desc->flushes, list) { + if (nfit_flush->flush->device_handle != + nfit_memdev->memdev->device_handle) + continue; + nfit_mem->nfit_flush = nfit_flush; + break; + } break; } @@ -978,6 +1006,24 @@ static u64 to_interleave_offset(u64 offset, struct nfit_blk_mmio *mmio) return mmio->base_offset + line_offset + table_offset + sub_line_offset; } +static void wmb_blk(struct nfit_blk *nfit_blk) +{ + + if (nfit_blk->nvdimm_flush) { + /* + * The first wmb() is needed to 'sfence' all previous writes + * such that they are architecturally visible for the platform + * buffer flush. Note that we've already arranged for pmem + * writes to avoid the cache via arch_memcpy_to_pmem(). The + * final wmb() ensures ordering for the NVDIMM flush write. + */ + wmb(); + writeq(1, nfit_blk->nvdimm_flush); + wmb(); + } else + wmb_pmem(); +} + static u64 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw) { struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR]; @@ -1012,7 +1058,10 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw, offset = to_interleave_offset(offset, mmio); writeq(cmd, mmio->base + offset); - /* FIXME: conditionally perform read-back if mandated by firmware */ + wmb_blk(nfit_blk); + + if (nfit_blk->dimm_flags & ND_BLK_DCR_LATCH) + readq(mmio->base + offset); } static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, @@ -1026,7 +1075,6 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, base_offset = nfit_blk->bdw_offset + dpa % L1_CACHE_BYTES + lane * mmio->size; - /* TODO: non-temporal access, flush hints, cache management etc... */ write_blk_ctl(nfit_blk, lane, dpa, len, rw); while (len) { unsigned int c; @@ -1045,13 +1093,19 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, } if (rw) - memcpy(mmio->aperture + offset, iobuf + copied, c); + memcpy_to_pmem(mmio->aperture + offset, + iobuf + copied, c); else - memcpy(iobuf + copied, mmio->aperture + offset, c); + memcpy_from_pmem(iobuf + copied, + mmio->aperture + offset, c); copied += c; len -= c; } + + if (rw) + wmb_blk(nfit_blk); + rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0; return rc; } @@ -1124,7 +1178,7 @@ static void nfit_spa_unmap(struct acpi_nfit_desc *acpi_desc, } static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc, - struct acpi_nfit_system_address *spa) + struct acpi_nfit_system_address *spa, enum spa_map_type type) { resource_size_t start = spa->address; resource_size_t n = spa->length; @@ -1152,8 +1206,15 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc, if (!res) goto err_mem; - /* TODO: cacheability based on the spa type */ - spa_map->iomem = ioremap_nocache(start, n); + if (type == SPA_MAP_APERTURE) { + /* + * TODO: memremap_pmem() support, but that requires cache + * flushing when the aperture is moved. + */ + spa_map->iomem = ioremap_wc(start, n); + } else + spa_map->iomem = ioremap_nocache(start, n); + if (!spa_map->iomem) goto err_map; @@ -1171,6 +1232,7 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc, * nfit_spa_map - interleave-aware managed-mappings of acpi_nfit_system_address ranges * @nvdimm_bus: NFIT-bus that provided the spa table entry * @nfit_spa: spa table to map + * @type: aperture or control region * * In the case where block-data-window apertures and * dimm-control-regions are interleaved they will end up sharing a @@ -1180,12 +1242,12 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc, * unbound. */ static void __iomem *nfit_spa_map(struct acpi_nfit_desc *acpi_desc, - struct acpi_nfit_system_address *spa) + struct acpi_nfit_system_address *spa, enum spa_map_type type) { void __iomem *iomem; mutex_lock(&acpi_desc->spa_map_mutex); - iomem = __nfit_spa_map(acpi_desc, spa); + iomem = __nfit_spa_map(acpi_desc, spa, type); mutex_unlock(&acpi_desc->spa_map_mutex); return iomem; @@ -1206,12 +1268,35 @@ static int nfit_blk_init_interleave(struct nfit_blk_mmio *mmio, return 0; } +static int acpi_nfit_blk_get_flags(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, struct nfit_blk *nfit_blk) +{ + struct nd_cmd_dimm_flags flags; + int rc; + + memset(&flags, 0, sizeof(flags)); + rc = nd_desc->ndctl(nd_desc, nvdimm, ND_CMD_DIMM_FLAGS, &flags, + sizeof(flags)); + + if (rc >= 0 && flags.status == 0) + nfit_blk->dimm_flags = flags.flags; + else if (rc == -ENOTTY) { + /* fall back to a conservative default */ + nfit_blk->dimm_flags = ND_BLK_DCR_LATCH; + rc = 0; + } else + rc = -ENXIO; + + return rc; +} + static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, struct device *dev) { struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus); struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); struct nd_blk_region *ndbr = to_nd_blk_region(dev); + struct nfit_flush *nfit_flush; struct nfit_blk_mmio *mmio; struct nfit_blk *nfit_blk; struct nfit_mem *nfit_mem; @@ -1223,8 +1308,8 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) { dev_dbg(dev, "%s: missing%s%s%s\n", __func__, nfit_mem ? "" : " nfit_mem", - nfit_mem->dcr ? "" : " dcr", - nfit_mem->bdw ? "" : " bdw"); + (nfit_mem && nfit_mem->dcr) ? "" : " dcr", + (nfit_mem && nfit_mem->bdw) ? "" : " bdw"); return -ENXIO; } @@ -1237,7 +1322,8 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, /* map block aperture memory */ nfit_blk->bdw_offset = nfit_mem->bdw->offset; mmio = &nfit_blk->mmio[BDW]; - mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw); + mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw, + SPA_MAP_APERTURE); if (!mmio->base) { dev_dbg(dev, "%s: %s failed to map bdw\n", __func__, nvdimm_name(nvdimm)); @@ -1259,7 +1345,8 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, nfit_blk->cmd_offset = nfit_mem->dcr->command_offset; nfit_blk->stat_offset = nfit_mem->dcr->status_offset; mmio = &nfit_blk->mmio[DCR]; - mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr); + mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr, + SPA_MAP_CONTROL); if (!mmio->base) { dev_dbg(dev, "%s: %s failed to map dcr\n", __func__, nvdimm_name(nvdimm)); @@ -1277,6 +1364,24 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, return rc; } + rc = acpi_nfit_blk_get_flags(nd_desc, nvdimm, nfit_blk); + if (rc < 0) { + dev_dbg(dev, "%s: %s failed get DIMM flags\n", + __func__, nvdimm_name(nvdimm)); + return rc; + } + + nfit_flush = nfit_mem->nfit_flush; + if (nfit_flush && nfit_flush->flush->hint_count != 0) { + nfit_blk->nvdimm_flush = devm_ioremap_nocache(dev, + nfit_flush->flush->hint_address[0], 8); + if (!nfit_blk->nvdimm_flush) + return -ENOMEM; + } + + if (!arch_has_pmem_api() && !nfit_blk->nvdimm_flush) + dev_warn(dev, "unable to guarantee persistence of writes\n"); + if (mmio->line_size == 0) return 0; @@ -1459,6 +1564,7 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz) INIT_LIST_HEAD(&acpi_desc->dcrs); INIT_LIST_HEAD(&acpi_desc->bdws); INIT_LIST_HEAD(&acpi_desc->idts); + INIT_LIST_HEAD(&acpi_desc->flushes); INIT_LIST_HEAD(&acpi_desc->memdevs); INIT_LIST_HEAD(&acpi_desc->dimms); mutex_init(&acpi_desc->spa_map_mutex); diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h index 81f2e8c5a79c..79b6d83875c1 100644 --- a/drivers/acpi/nfit.h +++ b/drivers/acpi/nfit.h @@ -40,6 +40,10 @@ enum nfit_uuids { NFIT_UUID_MAX, }; +enum { + ND_BLK_DCR_LATCH = 2, +}; + struct nfit_spa { struct acpi_nfit_system_address *spa; struct list_head list; @@ -60,6 +64,11 @@ struct nfit_idt { struct list_head list; }; +struct nfit_flush { + struct acpi_nfit_flush_address *flush; + struct list_head list; +}; + struct nfit_memdev { struct acpi_nfit_memory_map *memdev; struct list_head list; @@ -77,6 +86,7 @@ struct nfit_mem { struct acpi_nfit_system_address *spa_bdw; struct acpi_nfit_interleave *idt_dcr; struct acpi_nfit_interleave *idt_bdw; + struct nfit_flush *nfit_flush; struct list_head list; struct acpi_device *adev; unsigned long dsm_mask; @@ -88,6 +98,7 @@ struct acpi_nfit_desc { struct mutex spa_map_mutex; struct list_head spa_maps; struct list_head memdevs; + struct list_head flushes; struct list_head dimms; struct list_head spas; struct list_head dcrs; @@ -109,7 +120,7 @@ struct nfit_blk { struct nfit_blk_mmio { union { void __iomem *base; - void *aperture; + void __pmem *aperture; }; u64 size; u64 base_offset; @@ -123,6 +134,13 @@ struct nfit_blk { u64 bdw_offset; /* post interleave offset */ u64 stat_offset; u64 cmd_offset; + void __iomem *nvdimm_flush; + u32 dimm_flags; +}; + +enum spa_map_type { + SPA_MAP_CONTROL, + SPA_MAP_APERTURE, }; struct nfit_spa_mapping { diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index a5dc9034efee..3b8963f21b36 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -175,10 +175,14 @@ static void __init acpi_request_region (struct acpi_generic_address *gas, if (!addr || !length) return; - acpi_reserve_region(addr, length, gas->space_id, 0, desc); + /* Resources are never freed */ + if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) + request_region(addr, length, desc); + else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) + request_mem_region(addr, length, desc); } -static void __init acpi_reserve_resources(void) +static int __init acpi_reserve_resources(void) { acpi_request_region(&acpi_gbl_FADT.xpm1a_event_block, acpi_gbl_FADT.pm1_event_length, "ACPI PM1a_EVT_BLK"); @@ -207,7 +211,10 @@ static void __init acpi_reserve_resources(void) if (!(acpi_gbl_FADT.gpe1_block_length & 0x1)) acpi_request_region(&acpi_gbl_FADT.xgpe1_block, acpi_gbl_FADT.gpe1_block_length, "ACPI GPE1_BLK"); + + return 0; } +fs_initcall_sync(acpi_reserve_resources); void acpi_os_printf(const char *fmt, ...) { @@ -530,6 +537,19 @@ acpi_os_get_physical_address(void *virt, acpi_physical_address * phys) } #endif +#ifdef CONFIG_ACPI_REV_OVERRIDE_POSSIBLE +static bool acpi_rev_override; + +int __init acpi_rev_override_setup(char *str) +{ + acpi_rev_override = true; + return 1; +} +__setup("acpi_rev_override", acpi_rev_override_setup); +#else +#define acpi_rev_override false +#endif + #define ACPI_MAX_OVERRIDE_LEN 100 static char acpi_os_name[ACPI_MAX_OVERRIDE_LEN]; @@ -548,6 +568,11 @@ acpi_os_predefined_override(const struct acpi_predefined_names *init_val, *new_val = acpi_os_name; } + if (!memcmp(init_val->name, "_REV", 4) && acpi_rev_override) { + printk(KERN_INFO PREFIX "Overriding _REV return value to 5\n"); + *new_val = (char *)5; + } + return AE_OK; } @@ -1844,7 +1869,6 @@ acpi_status __init acpi_os_initialize(void) acpi_status __init acpi_os_initialize1(void) { - acpi_reserve_resources(); kacpid_wq = alloc_workqueue("kacpid", 0, 1); kacpi_notify_wq = alloc_workqueue("kacpi_notify", 0, 1); kacpi_hotplug_wq = alloc_ordered_workqueue("kacpi_hotplug", 0); diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 10561ce16ed1..8244f013f210 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -26,7 +26,6 @@ #include <linux/device.h> #include <linux/export.h> #include <linux/ioport.h> -#include <linux/list.h> #include <linux/slab.h> #ifdef CONFIG_X86 @@ -622,164 +621,3 @@ int acpi_dev_filter_resource_type(struct acpi_resource *ares, return (type & types) ? 0 : 1; } EXPORT_SYMBOL_GPL(acpi_dev_filter_resource_type); - -struct reserved_region { - struct list_head node; - u64 start; - u64 end; -}; - -static LIST_HEAD(reserved_io_regions); -static LIST_HEAD(reserved_mem_regions); - -static int request_range(u64 start, u64 end, u8 space_id, unsigned long flags, - char *desc) -{ - unsigned int length = end - start + 1; - struct resource *res; - - res = space_id == ACPI_ADR_SPACE_SYSTEM_IO ? - request_region(start, length, desc) : - request_mem_region(start, length, desc); - if (!res) - return -EIO; - - res->flags &= ~flags; - return 0; -} - -static int add_region_before(u64 start, u64 end, u8 space_id, - unsigned long flags, char *desc, - struct list_head *head) -{ - struct reserved_region *reg; - int error; - - reg = kmalloc(sizeof(*reg), GFP_KERNEL); - if (!reg) - return -ENOMEM; - - error = request_range(start, end, space_id, flags, desc); - if (error) { - kfree(reg); - return error; - } - - reg->start = start; - reg->end = end; - list_add_tail(®->node, head); - return 0; -} - -/** - * acpi_reserve_region - Reserve an I/O or memory region as a system resource. - * @start: Starting address of the region. - * @length: Length of the region. - * @space_id: Identifier of address space to reserve the region from. - * @flags: Resource flags to clear for the region after requesting it. - * @desc: Region description (for messages). - * - * Reserve an I/O or memory region as a system resource to prevent others from - * using it. If the new region overlaps with one of the regions (in the given - * address space) already reserved by this routine, only the non-overlapping - * parts of it will be reserved. - * - * Returned is either 0 (success) or a negative error code indicating a resource - * reservation problem. It is the code of the first encountered error, but the - * routine doesn't abort until it has attempted to request all of the parts of - * the new region that don't overlap with other regions reserved previously. - * - * The resources requested by this routine are never released. - */ -int acpi_reserve_region(u64 start, unsigned int length, u8 space_id, - unsigned long flags, char *desc) -{ - struct list_head *regions; - struct reserved_region *reg; - u64 end = start + length - 1; - int ret = 0, error = 0; - - if (space_id == ACPI_ADR_SPACE_SYSTEM_IO) - regions = &reserved_io_regions; - else if (space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) - regions = &reserved_mem_regions; - else - return -EINVAL; - - if (list_empty(regions)) - return add_region_before(start, end, space_id, flags, desc, regions); - - list_for_each_entry(reg, regions, node) - if (reg->start == end + 1) { - /* The new region can be prepended to this one. */ - ret = request_range(start, end, space_id, flags, desc); - if (!ret) - reg->start = start; - - return ret; - } else if (reg->start > end) { - /* No overlap. Add the new region here and get out. */ - return add_region_before(start, end, space_id, flags, - desc, ®->node); - } else if (reg->end == start - 1) { - goto combine; - } else if (reg->end >= start) { - goto overlap; - } - - /* The new region goes after the last existing one. */ - return add_region_before(start, end, space_id, flags, desc, regions); - - overlap: - /* - * The new region overlaps an existing one. - * - * The head part of the new region immediately preceding the existing - * overlapping one can be combined with it right away. - */ - if (reg->start > start) { - error = request_range(start, reg->start - 1, space_id, flags, desc); - if (error) - ret = error; - else - reg->start = start; - } - - combine: - /* - * The new region is adjacent to an existing one. If it extends beyond - * that region all the way to the next one, it is possible to combine - * all three of them. - */ - while (reg->end < end) { - struct reserved_region *next = NULL; - u64 a = reg->end + 1, b = end; - - if (!list_is_last(®->node, regions)) { - next = list_next_entry(reg, node); - if (next->start <= end) - b = next->start - 1; - } - error = request_range(a, b, space_id, flags, desc); - if (!error) { - if (next && next->start == b + 1) { - reg->end = next->end; - list_del(&next->node); - kfree(next); - } else { - reg->end = end; - break; - } - } else if (next) { - if (!ret) - ret = error; - - reg = next; - } else { - break; - } - } - - return ret ? ret : error; -} -EXPORT_SYMBOL_GPL(acpi_reserve_region); diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 2649a068671d..ec256352f423 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1019,6 +1019,29 @@ static bool acpi_of_match_device(struct acpi_device *adev, return false; } +static bool __acpi_match_device_cls(const struct acpi_device_id *id, + struct acpi_hardware_id *hwid) +{ + int i, msk, byte_shift; + char buf[3]; + + if (!id->cls) + return false; + + /* Apply class-code bitmask, before checking each class-code byte */ + for (i = 1; i <= 3; i++) { + byte_shift = 8 * (3 - i); + msk = (id->cls_msk >> byte_shift) & 0xFF; + if (!msk) + continue; + + sprintf(buf, "%02x", (id->cls >> byte_shift) & msk); + if (strncmp(buf, &hwid->id[(i - 1) * 2], 2)) + return false; + } + return true; +} + static const struct acpi_device_id *__acpi_match_device( struct acpi_device *device, const struct acpi_device_id *ids, @@ -1036,9 +1059,12 @@ static const struct acpi_device_id *__acpi_match_device( list_for_each_entry(hwid, &device->pnp.ids, list) { /* First, check the ACPI/PNP IDs provided by the caller. */ - for (id = ids; id->id[0]; id++) - if (!strcmp((char *) id->id, hwid->id)) + for (id = ids; id->id[0] || id->cls; id++) { + if (id->id[0] && !strcmp((char *) id->id, hwid->id)) return id; + else if (id->cls && __acpi_match_device_cls(id, hwid)) + return id; + } /* * Next, check ACPI_DT_NAMESPACE_HID and try to match the @@ -2101,6 +2127,8 @@ static void acpi_set_pnp_ids(acpi_handle handle, struct acpi_device_pnp *pnp, if (info->valid & ACPI_VALID_UID) pnp->unique_id = kstrdup(info->unique_id.string, GFP_KERNEL); + if (info->valid & ACPI_VALID_CLS) + acpi_add_id(pnp, info->class_code.string); kfree(info); diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 6d17a3b65ef7..15e40ee62a94 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -48,7 +48,7 @@ config ATA_VERBOSE_ERROR config ATA_ACPI bool "ATA ACPI Support" - depends on ACPI && PCI + depends on ACPI default y help This option adds support for ATA-related ACPI objects. diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c index 614c78f510f0..1befb114c384 100644 --- a/drivers/ata/ahci_platform.c +++ b/drivers/ata/ahci_platform.c @@ -20,6 +20,8 @@ #include <linux/platform_device.h> #include <linux/libata.h> #include <linux/ahci_platform.h> +#include <linux/acpi.h> +#include <linux/pci_ids.h> #include "ahci.h" #define DRV_NAME "ahci" @@ -79,12 +81,19 @@ static const struct of_device_id ahci_of_match[] = { }; MODULE_DEVICE_TABLE(of, ahci_of_match); +static const struct acpi_device_id ahci_acpi_match[] = { + { ACPI_DEVICE_CLASS(PCI_CLASS_STORAGE_SATA_AHCI, 0xffffff) }, + {}, +}; +MODULE_DEVICE_TABLE(acpi, ahci_acpi_match); + static struct platform_driver ahci_driver = { .probe = ahci_probe, .remove = ata_platform_remove_one, .driver = { .name = DRV_NAME, .of_match_table = ahci_of_match, + .acpi_match_table = ahci_acpi_match, .pm = &ahci_pm_ops, }, }; diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 9c4288362a8e..894bda114224 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -563,10 +563,8 @@ static void fw_dev_release(struct device *dev) kfree(fw_priv); } -static int firmware_uevent(struct device *dev, struct kobj_uevent_env *env) +static int do_firmware_uevent(struct firmware_priv *fw_priv, struct kobj_uevent_env *env) { - struct firmware_priv *fw_priv = to_firmware_priv(dev); - if (add_uevent_var(env, "FIRMWARE=%s", fw_priv->buf->fw_id)) return -ENOMEM; if (add_uevent_var(env, "TIMEOUT=%i", loading_timeout)) @@ -577,6 +575,18 @@ static int firmware_uevent(struct device *dev, struct kobj_uevent_env *env) return 0; } +static int firmware_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + struct firmware_priv *fw_priv = to_firmware_priv(dev); + int err = 0; + + mutex_lock(&fw_lock); + if (fw_priv->buf) + err = do_firmware_uevent(fw_priv, env); + mutex_unlock(&fw_lock); + return err; +} + static struct class firmware_class = { .name = "firmware", .class_attrs = firmware_class_attrs, diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index cdd547bd67df..0ee43c1056e0 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -6,6 +6,7 @@ * This file is released under the GPLv2. */ +#include <linux/delay.h> #include <linux/kernel.h> #include <linux/io.h> #include <linux/platform_device.h> @@ -19,6 +20,8 @@ #include <linux/suspend.h> #include <linux/export.h> +#define GENPD_RETRY_MAX_MS 250 /* Approximate */ + #define GENPD_DEV_CALLBACK(genpd, type, callback, dev) \ ({ \ type (*__routine)(struct device *__d); \ @@ -2131,6 +2134,7 @@ EXPORT_SYMBOL_GPL(of_genpd_get_from_provider); static void genpd_dev_pm_detach(struct device *dev, bool power_off) { struct generic_pm_domain *pd; + unsigned int i; int ret = 0; pd = pm_genpd_lookup_dev(dev); @@ -2139,10 +2143,12 @@ static void genpd_dev_pm_detach(struct device *dev, bool power_off) dev_dbg(dev, "removing from PM domain %s\n", pd->name); - while (1) { + for (i = 1; i < GENPD_RETRY_MAX_MS; i <<= 1) { ret = pm_genpd_remove_device(pd, dev); if (ret != -EAGAIN) break; + + mdelay(i); cond_resched(); } @@ -2183,6 +2189,7 @@ int genpd_dev_pm_attach(struct device *dev) { struct of_phandle_args pd_args; struct generic_pm_domain *pd; + unsigned int i; int ret; if (!dev->of_node) @@ -2218,10 +2225,12 @@ int genpd_dev_pm_attach(struct device *dev) dev_dbg(dev, "adding to PM domain %s\n", pd->name); - while (1) { + for (i = 1; i < GENPD_RETRY_MAX_MS; i <<= 1) { ret = pm_genpd_add_device(pd, dev); if (ret != -EAGAIN) break; + + mdelay(i); cond_resched(); } diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c index 7470004ca810..eb6e67451dec 100644 --- a/drivers/base/power/wakeirq.c +++ b/drivers/base/power/wakeirq.c @@ -45,14 +45,12 @@ static int dev_pm_attach_wake_irq(struct device *dev, int irq, return -EEXIST; } - dev->power.wakeirq = wirq; - spin_unlock_irqrestore(&dev->power.lock, flags); - err = device_wakeup_attach_irq(dev, wirq); - if (err) - return err; + if (!err) + dev->power.wakeirq = wirq; - return 0; + spin_unlock_irqrestore(&dev->power.lock, flags); + return err; } /** @@ -105,10 +103,10 @@ void dev_pm_clear_wake_irq(struct device *dev) return; spin_lock_irqsave(&dev->power.lock, flags); + device_wakeup_detach_irq(dev); dev->power.wakeirq = NULL; spin_unlock_irqrestore(&dev->power.lock, flags); - device_wakeup_detach_irq(dev); if (wirq->dedicated_irq) free_irq(wirq->irq, wirq); kfree(wirq); diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 40f71603378c..51f15bc15774 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -281,32 +281,25 @@ EXPORT_SYMBOL_GPL(device_wakeup_enable); * Attach a device wakeirq to the wakeup source so the device * wake IRQ can be configured automatically for suspend and * resume. + * + * Call under the device's power.lock lock. */ int device_wakeup_attach_irq(struct device *dev, struct wake_irq *wakeirq) { struct wakeup_source *ws; - int ret = 0; - spin_lock_irq(&dev->power.lock); ws = dev->power.wakeup; if (!ws) { dev_err(dev, "forgot to call call device_init_wakeup?\n"); - ret = -EINVAL; - goto unlock; + return -EINVAL; } - if (ws->wakeirq) { - ret = -EEXIST; - goto unlock; - } + if (ws->wakeirq) + return -EEXIST; ws->wakeirq = wakeirq; - -unlock: - spin_unlock_irq(&dev->power.lock); - - return ret; + return 0; } /** @@ -314,20 +307,16 @@ unlock: * @dev: Device to handle * * Removes a device wakeirq from the wakeup source. + * + * Call under the device's power.lock lock. */ void device_wakeup_detach_irq(struct device *dev) { struct wakeup_source *ws; - spin_lock_irq(&dev->power.lock); ws = dev->power.wakeup; - if (!ws) - goto unlock; - - ws->wakeirq = NULL; - -unlock: - spin_unlock_irq(&dev->power.lock); + if (ws) + ws->wakeirq = NULL; } /** diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c index a6ee3d750c30..6b88a35fb048 100644 --- a/drivers/block/drbd/drbd_debugfs.c +++ b/drivers/block/drbd/drbd_debugfs.c @@ -419,14 +419,6 @@ static int in_flight_summary_show(struct seq_file *m, void *pos) return 0; } -/* simple_positive(file->f_path.dentry) respectively debugfs_positive(), - * but neither is "reachable" from here. - * So we have our own inline version of it above. :-( */ -static inline int debugfs_positive(struct dentry *dentry) -{ - return d_really_is_positive(dentry) && !d_unhashed(dentry); -} - /* make sure at *open* time that the respective object won't go away. */ static int drbd_single_open(struct file *file, int (*show)(struct seq_file *, void *), void *data, struct kref *kref, @@ -444,7 +436,7 @@ static int drbd_single_open(struct file *file, int (*show)(struct seq_file *, vo /* serialize with d_delete() */ mutex_lock(&d_inode(parent)->i_mutex); /* Make sure the object is still alive */ - if (debugfs_positive(file->f_path.dentry) + if (simple_positive(file->f_path.dentry) && kref_get_unless_zero(kref)) ret = 0; mutex_unlock(&d_inode(parent)->i_mutex); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 40580dc7f41c..f7a4c9d7f721 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -588,7 +588,7 @@ static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf) spin_lock_irq(&lo->lo_lock); if (lo->lo_backing_file) - p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1); + p = file_path(lo->lo_backing_file, buf, PAGE_SIZE - 1); spin_unlock_irq(&lo->lo_lock); if (IS_ERR_OR_NULL(p)) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 34338d7438f5..d1d6141920d3 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1474,6 +1474,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; nvmeq->q_depth = depth; nvmeq->qid = qid; + nvmeq->cq_vector = -1; dev->queues[qid] = nvmeq; /* make sure queue descriptor is set before queue count, for kthread */ @@ -1726,8 +1727,10 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) nvmeq->cq_vector = 0; result = queue_request_irq(dev, nvmeq, nvmeq->irqname); - if (result) + if (result) { + nvmeq->cq_vector = -1; goto free_nvmeq; + } return result; @@ -2213,8 +2216,10 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) dev->max_qid = nr_io_queues; result = queue_request_irq(dev, adminq, adminq->irqname); - if (result) + if (result) { + adminq->cq_vector = -1; goto free_queues; + } /* Free previously allocated queues that are no longer usable */ nvme_free_queues(dev, nr_io_queues + 1); diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index ec6c5c6e1ac9..d94529d5c8e9 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -346,6 +346,7 @@ struct rbd_device { struct rbd_image_header header; unsigned long flags; /* possibly lock protected */ struct rbd_spec *spec; + struct rbd_options *opts; char *header_name; @@ -724,34 +725,36 @@ static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts) } /* - * mount options + * (Per device) rbd map options */ enum { + Opt_queue_depth, Opt_last_int, /* int args above */ Opt_last_string, /* string args above */ Opt_read_only, Opt_read_write, - /* Boolean args above */ - Opt_last_bool, + Opt_err }; static match_table_t rbd_opts_tokens = { + {Opt_queue_depth, "queue_depth=%d"}, /* int args above */ /* string args above */ {Opt_read_only, "read_only"}, {Opt_read_only, "ro"}, /* Alternate spelling */ {Opt_read_write, "read_write"}, {Opt_read_write, "rw"}, /* Alternate spelling */ - /* Boolean args above */ - {-1, NULL} + {Opt_err, NULL} }; struct rbd_options { + int queue_depth; bool read_only; }; +#define RBD_QUEUE_DEPTH_DEFAULT BLKDEV_MAX_RQ #define RBD_READ_ONLY_DEFAULT false static int parse_rbd_opts_token(char *c, void *private) @@ -761,27 +764,27 @@ static int parse_rbd_opts_token(char *c, void *private) int token, intval, ret; token = match_token(c, rbd_opts_tokens, argstr); - if (token < 0) - return -EINVAL; - if (token < Opt_last_int) { ret = match_int(&argstr[0], &intval); if (ret < 0) { - pr_err("bad mount option arg (not int) " - "at '%s'\n", c); + pr_err("bad mount option arg (not int) at '%s'\n", c); return ret; } dout("got int token %d val %d\n", token, intval); } else if (token > Opt_last_int && token < Opt_last_string) { - dout("got string token %d val %s\n", token, - argstr[0].from); - } else if (token > Opt_last_string && token < Opt_last_bool) { - dout("got Boolean token %d\n", token); + dout("got string token %d val %s\n", token, argstr[0].from); } else { dout("got token %d\n", token); } switch (token) { + case Opt_queue_depth: + if (intval < 1) { + pr_err("queue_depth out of range\n"); + return -EINVAL; + } + rbd_opts->queue_depth = intval; + break; case Opt_read_only: rbd_opts->read_only = true; break; @@ -789,9 +792,10 @@ static int parse_rbd_opts_token(char *c, void *private) rbd_opts->read_only = false; break; default: - rbd_assert(false); - break; + /* libceph prints "bad option" msg */ + return -EINVAL; } + return 0; } @@ -1563,22 +1567,39 @@ static void rbd_obj_request_end(struct rbd_obj_request *obj_request) /* * Wait for an object request to complete. If interrupted, cancel the * underlying osd request. + * + * @timeout: in jiffies, 0 means "wait forever" */ -static int rbd_obj_request_wait(struct rbd_obj_request *obj_request) +static int __rbd_obj_request_wait(struct rbd_obj_request *obj_request, + unsigned long timeout) { - int ret; + long ret; dout("%s %p\n", __func__, obj_request); - - ret = wait_for_completion_interruptible(&obj_request->completion); - if (ret < 0) { - dout("%s %p interrupted\n", __func__, obj_request); + ret = wait_for_completion_interruptible_timeout( + &obj_request->completion, + ceph_timeout_jiffies(timeout)); + if (ret <= 0) { + if (ret == 0) + ret = -ETIMEDOUT; rbd_obj_request_end(obj_request); - return ret; + } else { + ret = 0; } - dout("%s %p done\n", __func__, obj_request); - return 0; + dout("%s %p ret %d\n", __func__, obj_request, (int)ret); + return ret; +} + +static int rbd_obj_request_wait(struct rbd_obj_request *obj_request) +{ + return __rbd_obj_request_wait(obj_request, 0); +} + +static int rbd_obj_request_wait_timeout(struct rbd_obj_request *obj_request, + unsigned long timeout) +{ + return __rbd_obj_request_wait(obj_request, timeout); } static void rbd_img_request_complete(struct rbd_img_request *img_request) @@ -2001,11 +2022,11 @@ static struct rbd_obj_request *rbd_obj_request_create(const char *object_name, rbd_assert(obj_request_type_valid(type)); size = strlen(object_name) + 1; - name = kmalloc(size, GFP_KERNEL); + name = kmalloc(size, GFP_NOIO); if (!name) return NULL; - obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_KERNEL); + obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_NOIO); if (!obj_request) { kfree(name); return NULL; @@ -2376,7 +2397,7 @@ static void rbd_img_obj_request_fill(struct rbd_obj_request *obj_request, } if (opcode == CEPH_OSD_OP_DELETE) - osd_req_op_init(osd_request, num_ops, opcode); + osd_req_op_init(osd_request, num_ops, opcode, 0); else osd_req_op_extent_init(osd_request, num_ops, opcode, offset, length, 0, 0); @@ -2848,7 +2869,7 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request) goto out; stat_request->callback = rbd_img_obj_exists_callback; - osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT); + osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT, 0); osd_req_op_raw_data_in_pages(stat_request->osd_req, 0, pages, size, 0, false, false); rbd_osd_req_format_read(stat_request); @@ -3122,6 +3143,7 @@ static struct rbd_obj_request *rbd_obj_watch_request_helper( bool watch) { struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; + struct ceph_options *opts = osdc->client->options; struct rbd_obj_request *obj_request; int ret; @@ -3148,7 +3170,7 @@ static struct rbd_obj_request *rbd_obj_watch_request_helper( if (ret) goto out; - ret = rbd_obj_request_wait(obj_request); + ret = rbd_obj_request_wait_timeout(obj_request, opts->mount_timeout); if (ret) goto out; @@ -3750,10 +3772,9 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) memset(&rbd_dev->tag_set, 0, sizeof(rbd_dev->tag_set)); rbd_dev->tag_set.ops = &rbd_mq_ops; - rbd_dev->tag_set.queue_depth = BLKDEV_MAX_RQ; + rbd_dev->tag_set.queue_depth = rbd_dev->opts->queue_depth; rbd_dev->tag_set.numa_node = NUMA_NO_NODE; - rbd_dev->tag_set.flags = - BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; + rbd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; rbd_dev->tag_set.nr_hw_queues = 1; rbd_dev->tag_set.cmd_size = sizeof(struct work_struct); @@ -3773,6 +3794,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) /* set io sizes to object size */ segment_size = rbd_obj_bytes(&rbd_dev->header); blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE); + blk_queue_max_segments(q, segment_size / SECTOR_SIZE); blk_queue_max_segment_size(q, segment_size); blk_queue_io_min(q, segment_size); blk_queue_io_opt(q, segment_size); @@ -4044,7 +4066,8 @@ static void rbd_spec_free(struct kref *kref) } static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, - struct rbd_spec *spec) + struct rbd_spec *spec, + struct rbd_options *opts) { struct rbd_device *rbd_dev; @@ -4058,8 +4081,9 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, INIT_LIST_HEAD(&rbd_dev->node); init_rwsem(&rbd_dev->header_rwsem); - rbd_dev->spec = spec; rbd_dev->rbd_client = rbdc; + rbd_dev->spec = spec; + rbd_dev->opts = opts; /* Initialize the layout used for all rbd requests */ @@ -4075,6 +4099,7 @@ static void rbd_dev_destroy(struct rbd_device *rbd_dev) { rbd_put_client(rbd_dev->rbd_client); rbd_spec_put(rbd_dev->spec); + kfree(rbd_dev->opts); kfree(rbd_dev); } @@ -4933,6 +4958,7 @@ static int rbd_add_parse_args(const char *buf, goto out_mem; rbd_opts->read_only = RBD_READ_ONLY_DEFAULT; + rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT; copts = ceph_parse_options(options, mon_addrs, mon_addrs + mon_addrs_size - 1, @@ -4963,8 +4989,8 @@ out_err: */ static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name) { + struct ceph_options *opts = rbdc->client->options; u64 newest_epoch; - unsigned long timeout = rbdc->client->options->mount_timeout * HZ; int tries = 0; int ret; @@ -4979,7 +5005,8 @@ again: if (rbdc->client->osdc.osdmap->epoch < newest_epoch) { ceph_monc_request_next_osdmap(&rbdc->client->monc); (void) ceph_monc_wait_osdmap(&rbdc->client->monc, - newest_epoch, timeout); + newest_epoch, + opts->mount_timeout); goto again; } else { /* the osdmap we have is new enough */ @@ -5148,7 +5175,7 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev) rbdc = __rbd_get_client(rbd_dev->rbd_client); ret = -ENOMEM; - parent = rbd_dev_create(rbdc, parent_spec); + parent = rbd_dev_create(rbdc, parent_spec, NULL); if (!parent) goto out_err; @@ -5394,9 +5421,6 @@ static ssize_t do_rbd_add(struct bus_type *bus, rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec); if (rc < 0) goto err_out_module; - read_only = rbd_opts->read_only; - kfree(rbd_opts); - rbd_opts = NULL; /* done with this */ rbdc = rbd_get_client(ceph_opts); if (IS_ERR(rbdc)) { @@ -5422,11 +5446,12 @@ static ssize_t do_rbd_add(struct bus_type *bus, goto err_out_client; } - rbd_dev = rbd_dev_create(rbdc, spec); + rbd_dev = rbd_dev_create(rbdc, spec, rbd_opts); if (!rbd_dev) goto err_out_client; rbdc = NULL; /* rbd_dev now owns this */ spec = NULL; /* rbd_dev now owns this */ + rbd_opts = NULL; /* rbd_dev now owns this */ rc = rbd_dev_image_probe(rbd_dev, true); if (rc < 0) @@ -5434,6 +5459,7 @@ static ssize_t do_rbd_add(struct bus_type *bus, /* If we are mapping a snapshot it must be marked read-only */ + read_only = rbd_dev->opts->read_only; if (rbd_dev->spec->snap_id != CEPH_NOSNAP) read_only = true; rbd_dev->mapping.read_only = read_only; @@ -5458,6 +5484,7 @@ err_out_client: rbd_put_client(rbdc); err_out_args: rbd_spec_put(spec); + kfree(rbd_opts); err_out_module: module_put(THIS_MODULE); diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 0b4188b9af7c..c6dea3f6917b 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -581,7 +581,7 @@ static inline int needs_ilk_vtd_wa(void) /* Query intel_iommu to see if we need the workaround. Presumably that * was loaded first. */ - if ((gpu_devid == PCI_DEVICE_ID_INTEL_IRONLAKE_M_HB || + if ((gpu_devid == PCI_DEVICE_ID_INTEL_IRONLAKE_D_IG || gpu_devid == PCI_DEVICE_ID_INTEL_IRONLAKE_M_IG) && intel_iommu_gfx_mapped) return 1; diff --git a/drivers/clk/at91/clk-h32mx.c b/drivers/clk/at91/clk-h32mx.c index 152dcb3f7b5f..61566bcefa53 100644 --- a/drivers/clk/at91/clk-h32mx.c +++ b/drivers/clk/at91/clk-h32mx.c @@ -116,8 +116,10 @@ void __init of_sama5d4_clk_h32mx_setup(struct device_node *np, h32mxclk->pmc = pmc; clk = clk_register(NULL, &h32mxclk->hw); - if (!clk) + if (!clk) { + kfree(h32mxclk); return; + } of_clk_add_provider(np, of_clk_src_simple_get, clk); } diff --git a/drivers/clk/at91/clk-main.c b/drivers/clk/at91/clk-main.c index c2400456a044..27dfa965cfed 100644 --- a/drivers/clk/at91/clk-main.c +++ b/drivers/clk/at91/clk-main.c @@ -171,8 +171,10 @@ at91_clk_register_main_osc(struct at91_pmc *pmc, irq_set_status_flags(osc->irq, IRQ_NOAUTOEN); ret = request_irq(osc->irq, clk_main_osc_irq_handler, IRQF_TRIGGER_HIGH, name, osc); - if (ret) + if (ret) { + kfree(osc); return ERR_PTR(ret); + } if (bypass) pmc_write(pmc, AT91_CKGR_MOR, diff --git a/drivers/clk/at91/clk-master.c b/drivers/clk/at91/clk-master.c index f98eafe9b12d..5b3ded5205a2 100644 --- a/drivers/clk/at91/clk-master.c +++ b/drivers/clk/at91/clk-master.c @@ -165,12 +165,16 @@ at91_clk_register_master(struct at91_pmc *pmc, unsigned int irq, irq_set_status_flags(master->irq, IRQ_NOAUTOEN); ret = request_irq(master->irq, clk_master_irq_handler, IRQF_TRIGGER_HIGH, "clk-master", master); - if (ret) + if (ret) { + kfree(master); return ERR_PTR(ret); + } clk = clk_register(NULL, &master->hw); - if (IS_ERR(clk)) + if (IS_ERR(clk)) { + free_irq(master->irq, master); kfree(master); + } return clk; } diff --git a/drivers/clk/at91/clk-pll.c b/drivers/clk/at91/clk-pll.c index cbbe40377ad6..18b60f4895a6 100644 --- a/drivers/clk/at91/clk-pll.c +++ b/drivers/clk/at91/clk-pll.c @@ -346,12 +346,16 @@ at91_clk_register_pll(struct at91_pmc *pmc, unsigned int irq, const char *name, irq_set_status_flags(pll->irq, IRQ_NOAUTOEN); ret = request_irq(pll->irq, clk_pll_irq_handler, IRQF_TRIGGER_HIGH, id ? "clk-pllb" : "clk-plla", pll); - if (ret) + if (ret) { + kfree(pll); return ERR_PTR(ret); + } clk = clk_register(NULL, &pll->hw); - if (IS_ERR(clk)) + if (IS_ERR(clk)) { + free_irq(pll->irq, pll); kfree(pll); + } return clk; } diff --git a/drivers/clk/at91/clk-system.c b/drivers/clk/at91/clk-system.c index a76d03fd577b..58008b3e8bc1 100644 --- a/drivers/clk/at91/clk-system.c +++ b/drivers/clk/at91/clk-system.c @@ -130,13 +130,17 @@ at91_clk_register_system(struct at91_pmc *pmc, const char *name, irq_set_status_flags(sys->irq, IRQ_NOAUTOEN); ret = request_irq(sys->irq, clk_system_irq_handler, IRQF_TRIGGER_HIGH, name, sys); - if (ret) + if (ret) { + kfree(sys); return ERR_PTR(ret); + } } clk = clk_register(NULL, &sys->hw); - if (IS_ERR(clk)) + if (IS_ERR(clk)) { + free_irq(sys->irq, sys); kfree(sys); + } return clk; } diff --git a/drivers/clk/at91/clk-utmi.c b/drivers/clk/at91/clk-utmi.c index ae3263bc1476..30dd697b1668 100644 --- a/drivers/clk/at91/clk-utmi.c +++ b/drivers/clk/at91/clk-utmi.c @@ -118,12 +118,16 @@ at91_clk_register_utmi(struct at91_pmc *pmc, unsigned int irq, irq_set_status_flags(utmi->irq, IRQ_NOAUTOEN); ret = request_irq(utmi->irq, clk_utmi_irq_handler, IRQF_TRIGGER_HIGH, "clk-utmi", utmi); - if (ret) + if (ret) { + kfree(utmi); return ERR_PTR(ret); + } clk = clk_register(NULL, &utmi->hw); - if (IS_ERR(clk)) + if (IS_ERR(clk)) { + free_irq(utmi->irq, utmi); kfree(utmi); + } return clk; } diff --git a/drivers/clk/bcm/clk-iproc-asiu.c b/drivers/clk/bcm/clk-iproc-asiu.c index e19c09cd9645..f630e1bbdcfe 100644 --- a/drivers/clk/bcm/clk-iproc-asiu.c +++ b/drivers/clk/bcm/clk-iproc-asiu.c @@ -222,10 +222,6 @@ void __init iproc_asiu_setup(struct device_node *node, struct iproc_asiu_clk *asiu_clk; const char *clk_name; - clk_name = kzalloc(IPROC_CLK_NAME_LEN, GFP_KERNEL); - if (WARN_ON(!clk_name)) - goto err_clk_register; - ret = of_property_read_string_index(node, "clock-output-names", i, &clk_name); if (WARN_ON(ret)) @@ -259,7 +255,7 @@ void __init iproc_asiu_setup(struct device_node *node, err_clk_register: for (i = 0; i < num_clks; i++) - kfree(asiu->clks[i].name); + clk_unregister(asiu->clk_data.clks[i]); iounmap(asiu->gate_base); err_iomap_gate: diff --git a/drivers/clk/bcm/clk-iproc-pll.c b/drivers/clk/bcm/clk-iproc-pll.c index 46fb84bc2674..2dda4e8295a9 100644 --- a/drivers/clk/bcm/clk-iproc-pll.c +++ b/drivers/clk/bcm/clk-iproc-pll.c @@ -366,7 +366,7 @@ static unsigned long iproc_pll_recalc_rate(struct clk_hw *hw, val = readl(pll->pll_base + ctrl->ndiv_int.offset); ndiv_int = (val >> ctrl->ndiv_int.shift) & bit_mask(ctrl->ndiv_int.width); - ndiv = ndiv_int << ctrl->ndiv_int.shift; + ndiv = (u64)ndiv_int << ctrl->ndiv_int.shift; if (ctrl->flags & IPROC_CLK_PLL_HAS_NDIV_FRAC) { val = readl(pll->pll_base + ctrl->ndiv_frac.offset); @@ -374,7 +374,8 @@ static unsigned long iproc_pll_recalc_rate(struct clk_hw *hw, bit_mask(ctrl->ndiv_frac.width); if (ndiv_frac != 0) - ndiv = (ndiv_int << ctrl->ndiv_int.shift) | ndiv_frac; + ndiv = ((u64)ndiv_int << ctrl->ndiv_int.shift) | + ndiv_frac; } val = readl(pll->pll_base + ctrl->pdiv.offset); @@ -655,10 +656,6 @@ void __init iproc_pll_clk_setup(struct device_node *node, memset(&init, 0, sizeof(init)); parent_name = node->name; - clk_name = kzalloc(IPROC_CLK_NAME_LEN, GFP_KERNEL); - if (WARN_ON(!clk_name)) - goto err_clk_register; - ret = of_property_read_string_index(node, "clock-output-names", i, &clk_name); if (WARN_ON(ret)) @@ -690,10 +687,8 @@ void __init iproc_pll_clk_setup(struct device_node *node, return; err_clk_register: - for (i = 0; i < num_clks; i++) { - kfree(pll->clks[i].name); + for (i = 0; i < num_clks; i++) clk_unregister(pll->clk_data.clks[i]); - } err_pll_register: if (pll->asiu_base) diff --git a/drivers/clk/clk-stm32f4.c b/drivers/clk/clk-stm32f4.c index b9b12a742970..3f6f7ad39490 100644 --- a/drivers/clk/clk-stm32f4.c +++ b/drivers/clk/clk-stm32f4.c @@ -268,7 +268,7 @@ static int stm32f4_rcc_lookup_clk_idx(u8 primary, u8 secondary) memcpy(table, stm32f42xx_gate_map, sizeof(table)); /* only bits set in table can be used as indices */ - if (WARN_ON(secondary > 8 * sizeof(table) || + if (WARN_ON(secondary >= BITS_PER_BYTE * sizeof(table) || 0 == (table[BIT_ULL_WORD(secondary)] & BIT_ULL_MASK(secondary)))) return -EINVAL; diff --git a/drivers/clk/mediatek/clk-mt8173.c b/drivers/clk/mediatek/clk-mt8173.c index 4b9e04cdf7e8..8b6523d15fb8 100644 --- a/drivers/clk/mediatek/clk-mt8173.c +++ b/drivers/clk/mediatek/clk-mt8173.c @@ -700,6 +700,22 @@ static const struct mtk_composite peri_clks[] __initconst = { MUX(CLK_PERI_UART3_SEL, "uart3_ck_sel", uart_ck_sel_parents, 0x40c, 3, 1), }; +static struct clk_onecell_data *mt8173_top_clk_data __initdata; +static struct clk_onecell_data *mt8173_pll_clk_data __initdata; + +static void __init mtk_clk_enable_critical(void) +{ + if (!mt8173_top_clk_data || !mt8173_pll_clk_data) + return; + + clk_prepare_enable(mt8173_pll_clk_data->clks[CLK_APMIXED_ARMCA15PLL]); + clk_prepare_enable(mt8173_pll_clk_data->clks[CLK_APMIXED_ARMCA7PLL]); + clk_prepare_enable(mt8173_top_clk_data->clks[CLK_TOP_MEM_SEL]); + clk_prepare_enable(mt8173_top_clk_data->clks[CLK_TOP_DDRPHYCFG_SEL]); + clk_prepare_enable(mt8173_top_clk_data->clks[CLK_TOP_CCI400_SEL]); + clk_prepare_enable(mt8173_top_clk_data->clks[CLK_TOP_RTC_SEL]); +} + static void __init mtk_topckgen_init(struct device_node *node) { struct clk_onecell_data *clk_data; @@ -712,19 +728,19 @@ static void __init mtk_topckgen_init(struct device_node *node) return; } - clk_data = mtk_alloc_clk_data(CLK_TOP_NR_CLK); + mt8173_top_clk_data = clk_data = mtk_alloc_clk_data(CLK_TOP_NR_CLK); mtk_clk_register_factors(root_clk_alias, ARRAY_SIZE(root_clk_alias), clk_data); mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), clk_data); mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base, &mt8173_clk_lock, clk_data); - clk_prepare_enable(clk_data->clks[CLK_TOP_CCI400_SEL]); - r = of_clk_add_provider(node, of_clk_src_onecell_get, clk_data); if (r) pr_err("%s(): could not register clock provider: %d\n", __func__, r); + + mtk_clk_enable_critical(); } CLK_OF_DECLARE(mtk_topckgen, "mediatek,mt8173-topckgen", mtk_topckgen_init); @@ -818,13 +834,13 @@ static void __init mtk_apmixedsys_init(struct device_node *node) { struct clk_onecell_data *clk_data; - clk_data = mtk_alloc_clk_data(CLK_APMIXED_NR_CLK); + mt8173_pll_clk_data = clk_data = mtk_alloc_clk_data(CLK_APMIXED_NR_CLK); if (!clk_data) return; mtk_clk_register_plls(node, plls, ARRAY_SIZE(plls), clk_data); - clk_prepare_enable(clk_data->clks[CLK_APMIXED_ARMCA15PLL]); + mtk_clk_enable_critical(); } CLK_OF_DECLARE(mtk_apmixedsys, "mediatek,mt8173-apmixedsys", mtk_apmixedsys_init); diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index b95d17fbb8d7..92936f0912d2 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -530,19 +530,16 @@ static int clk_pixel_set_rate(struct clk_hw *hw, unsigned long rate, struct clk_rcg2 *rcg = to_clk_rcg2(hw); struct freq_tbl f = *rcg->freq_tbl; const struct frac_entry *frac = frac_table_pixel; - unsigned long request, src_rate; + unsigned long request; int delta = 100000; u32 mask = BIT(rcg->hid_width) - 1; u32 hid_div; - int index = qcom_find_src_index(hw, rcg->parent_map, f.src); - struct clk *parent = clk_get_parent_by_index(hw->clk, index); for (; frac->num; frac++) { request = (rate * frac->den) / frac->num; - src_rate = __clk_round_rate(parent, request); - if ((src_rate < (request - delta)) || - (src_rate > (request + delta))) + if ((parent_rate < (request - delta)) || + (parent_rate > (request + delta))) continue; regmap_read(rcg->clkr.regmap, rcg->cmd_rcgr + CFG_REG, diff --git a/drivers/clk/st/clk-flexgen.c b/drivers/clk/st/clk-flexgen.c index 657ca14ba709..8dd8cce27361 100644 --- a/drivers/clk/st/clk-flexgen.c +++ b/drivers/clk/st/clk-flexgen.c @@ -190,7 +190,7 @@ static struct clk *clk_register_flexgen(const char *name, init.name = name; init.ops = &flexgen_ops; - init.flags = CLK_IS_BASIC | flexgen_flags; + init.flags = CLK_IS_BASIC | CLK_GET_RATE_NOCACHE | flexgen_flags; init.parent_names = parent_names; init.num_parents = num_parents; @@ -303,6 +303,8 @@ static void __init st_of_flexgen_setup(struct device_node *np) if (!rlock) goto err; + spin_lock_init(rlock); + for (i = 0; i < clk_data->clk_num; i++) { struct clk *clk; const char *clk_name; diff --git a/drivers/clk/st/clkgen-fsyn.c b/drivers/clk/st/clkgen-fsyn.c index e94197f04b0b..d9eb2e1d8471 100644 --- a/drivers/clk/st/clkgen-fsyn.c +++ b/drivers/clk/st/clkgen-fsyn.c @@ -340,7 +340,7 @@ static const struct clkgen_quadfs_data st_fs660c32_C_407 = { CLKGEN_FIELD(0x30c, 0xf, 20), CLKGEN_FIELD(0x310, 0xf, 20) }, .lockstatus_present = true, - .lock_status = CLKGEN_FIELD(0x2A0, 0x1, 24), + .lock_status = CLKGEN_FIELD(0x2f0, 0x1, 24), .powerup_polarity = 1, .standby_polarity = 1, .pll_ops = &st_quadfs_pll_c32_ops, @@ -489,7 +489,7 @@ static int quadfs_pll_is_enabled(struct clk_hw *hw) struct st_clk_quadfs_pll *pll = to_quadfs_pll(hw); u32 npda = CLKGEN_READ(pll, npda); - return !!npda; + return pll->data->powerup_polarity ? !npda : !!npda; } static int clk_fs660c32_vco_get_rate(unsigned long input, struct stm_fs *fs, @@ -635,7 +635,7 @@ static struct clk * __init st_clk_register_quadfs_pll( init.name = name; init.ops = quadfs->pll_ops; - init.flags = CLK_IS_BASIC; + init.flags = CLK_IS_BASIC | CLK_GET_RATE_NOCACHE; init.parent_names = &parent_name; init.num_parents = 1; @@ -774,7 +774,7 @@ static void quadfs_fsynth_disable(struct clk_hw *hw) if (fs->lock) spin_lock_irqsave(fs->lock, flags); - CLKGEN_WRITE(fs, nsb[fs->chan], !fs->data->standby_polarity); + CLKGEN_WRITE(fs, nsb[fs->chan], fs->data->standby_polarity); if (fs->lock) spin_unlock_irqrestore(fs->lock, flags); @@ -1082,10 +1082,6 @@ static const struct of_device_id quadfs_of_match[] = { .compatible = "st,stih407-quadfs660-D", .data = &st_fs660c32_D_407 }, - { - .compatible = "st,stih407-quadfs660-D", - .data = (void *)&st_fs660c32_D_407 - }, {} }; diff --git a/drivers/clk/st/clkgen-mux.c b/drivers/clk/st/clkgen-mux.c index 4fbe6e099587..717c4a91a17b 100644 --- a/drivers/clk/st/clkgen-mux.c +++ b/drivers/clk/st/clkgen-mux.c @@ -237,7 +237,7 @@ static struct clk *clk_register_genamux(const char *name, init.name = name; init.ops = &clkgena_divmux_ops; - init.flags = CLK_IS_BASIC; + init.flags = CLK_IS_BASIC | CLK_GET_RATE_NOCACHE; init.parent_names = parent_names; init.num_parents = num_parents; @@ -513,7 +513,8 @@ static void __init st_of_clkgena_prediv_setup(struct device_node *np) 0, &clk_name)) return; - clk = clk_register_divider_table(NULL, clk_name, parent_name, 0, + clk = clk_register_divider_table(NULL, clk_name, parent_name, + CLK_GET_RATE_NOCACHE, reg + data->offset, data->shift, 1, 0, data->table, NULL); if (IS_ERR(clk)) @@ -582,7 +583,7 @@ static struct clkgen_mux_data stih416_a9_mux_data = { }; static struct clkgen_mux_data stih407_a9_mux_data = { .offset = 0x1a4, - .shift = 1, + .shift = 0, .width = 2, }; @@ -786,7 +787,8 @@ static void __init st_of_clkgen_vcc_setup(struct device_node *np) &mux->hw, &clk_mux_ops, &div->hw, &clk_divider_ops, &gate->hw, &clk_gate_ops, - data->clk_flags); + data->clk_flags | + CLK_GET_RATE_NOCACHE); if (IS_ERR(clk)) { kfree(gate); kfree(div); diff --git a/drivers/clk/st/clkgen-pll.c b/drivers/clk/st/clkgen-pll.c index 106532207213..72d1c27eaffa 100644 --- a/drivers/clk/st/clkgen-pll.c +++ b/drivers/clk/st/clkgen-pll.c @@ -406,7 +406,7 @@ static struct clk * __init clkgen_pll_register(const char *parent_name, init.name = clk_name; init.ops = pll_data->ops; - init.flags = CLK_IS_BASIC; + init.flags = CLK_IS_BASIC | CLK_GET_RATE_NOCACHE; init.parent_names = &parent_name; init.num_parents = 1; diff --git a/drivers/clk/sunxi/clk-sunxi.c b/drivers/clk/sunxi/clk-sunxi.c index 9a82f17d2d73..abf7b37faf73 100644 --- a/drivers/clk/sunxi/clk-sunxi.c +++ b/drivers/clk/sunxi/clk-sunxi.c @@ -1391,6 +1391,7 @@ static void __init sun6i_init_clocks(struct device_node *node) CLK_OF_DECLARE(sun6i_a31_clk_init, "allwinner,sun6i-a31", sun6i_init_clocks); CLK_OF_DECLARE(sun6i_a31s_clk_init, "allwinner,sun6i-a31s", sun6i_init_clocks); CLK_OF_DECLARE(sun8i_a23_clk_init, "allwinner,sun8i-a23", sun6i_init_clocks); +CLK_OF_DECLARE(sun8i_a33_clk_init, "allwinner,sun8i-a33", sun6i_init_clocks); static void __init sun9i_init_clocks(struct device_node *node) { diff --git a/drivers/clocksource/timer-imx-gpt.c b/drivers/clocksource/timer-imx-gpt.c index 879c78423546..2d59038dec43 100644 --- a/drivers/clocksource/timer-imx-gpt.c +++ b/drivers/clocksource/timer-imx-gpt.c @@ -529,6 +529,7 @@ static void __init imx6dl_timer_init_dt(struct device_node *np) CLOCKSOURCE_OF_DECLARE(imx1_timer, "fsl,imx1-gpt", imx1_timer_init_dt); CLOCKSOURCE_OF_DECLARE(imx21_timer, "fsl,imx21-gpt", imx21_timer_init_dt); +CLOCKSOURCE_OF_DECLARE(imx27_timer, "fsl,imx27-gpt", imx21_timer_init_dt); CLOCKSOURCE_OF_DECLARE(imx31_timer, "fsl,imx31-gpt", imx31_timer_init_dt); CLOCKSOURCE_OF_DECLARE(imx25_timer, "fsl,imx25-gpt", imx31_timer_init_dt); CLOCKSOURCE_OF_DECLARE(imx50_timer, "fsl,imx50-gpt", imx31_timer_init_dt); diff --git a/drivers/cpufreq/loongson2_cpufreq.c b/drivers/cpufreq/loongson2_cpufreq.c index fc897babab55..e362860c2b50 100644 --- a/drivers/cpufreq/loongson2_cpufreq.c +++ b/drivers/cpufreq/loongson2_cpufreq.c @@ -3,7 +3,7 @@ * * The 2E revision of loongson processor not support this feature. * - * Copyright (C) 2006 - 2008 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2006 - 2008 Lemote Inc. & Institute of Computing Technology * Author: Yanhua, yanh@lemote.com * * This file is subject to the terms and conditions of the GNU General Public diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c index 5bcd575fa96f..e6b658faef63 100644 --- a/drivers/crypto/mv_cesa.c +++ b/drivers/crypto/mv_cesa.c @@ -1034,8 +1034,8 @@ static int mv_cesa_get_sram(struct platform_device *pdev, &sram_size); cp->sram_size = sram_size; - cp->sram_pool = of_get_named_gen_pool(pdev->dev.of_node, - "marvell,crypto-srams", 0); + cp->sram_pool = of_gen_pool_get(pdev->dev.of_node, + "marvell,crypto-srams", 0); if (cp->sram_pool) { cp->sram = gen_pool_dma_alloc(cp->sram_pool, sram_size, &cp->sram_dma); diff --git a/drivers/crypto/nx/nx-aes-ccm.c b/drivers/crypto/nx/nx-aes-ccm.c index 67f80813a06f..e4311ce0cd78 100644 --- a/drivers/crypto/nx/nx-aes-ccm.c +++ b/drivers/crypto/nx/nx-aes-ccm.c @@ -494,8 +494,9 @@ out: static int ccm4309_aes_nx_encrypt(struct aead_request *req) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); + struct nx_gcm_rctx *rctx = aead_request_ctx(req); struct blkcipher_desc desc; - u8 *iv = nx_ctx->priv.ccm.iv; + u8 *iv = rctx->iv; iv[0] = 3; memcpy(iv + 1, nx_ctx->priv.ccm.nonce, 3); @@ -525,8 +526,9 @@ static int ccm_aes_nx_encrypt(struct aead_request *req) static int ccm4309_aes_nx_decrypt(struct aead_request *req) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); + struct nx_gcm_rctx *rctx = aead_request_ctx(req); struct blkcipher_desc desc; - u8 *iv = nx_ctx->priv.ccm.iv; + u8 *iv = rctx->iv; iv[0] = 3; memcpy(iv + 1, nx_ctx->priv.ccm.nonce, 3); diff --git a/drivers/crypto/nx/nx-aes-ctr.c b/drivers/crypto/nx/nx-aes-ctr.c index 2617cd4d54dd..dd7e9f3f5b6b 100644 --- a/drivers/crypto/nx/nx-aes-ctr.c +++ b/drivers/crypto/nx/nx-aes-ctr.c @@ -72,7 +72,7 @@ static int ctr3686_aes_nx_set_key(struct crypto_tfm *tfm, if (key_len < CTR_RFC3686_NONCE_SIZE) return -EINVAL; - memcpy(nx_ctx->priv.ctr.iv, + memcpy(nx_ctx->priv.ctr.nonce, in_key + key_len - CTR_RFC3686_NONCE_SIZE, CTR_RFC3686_NONCE_SIZE); @@ -131,14 +131,15 @@ static int ctr3686_aes_nx_crypt(struct blkcipher_desc *desc, unsigned int nbytes) { struct nx_crypto_ctx *nx_ctx = crypto_blkcipher_ctx(desc->tfm); - u8 *iv = nx_ctx->priv.ctr.iv; + u8 iv[16]; + memcpy(iv, nx_ctx->priv.ctr.nonce, CTR_RFC3686_IV_SIZE); memcpy(iv + CTR_RFC3686_NONCE_SIZE, desc->info, CTR_RFC3686_IV_SIZE); iv[12] = iv[13] = iv[14] = 0; iv[15] = 1; - desc->info = nx_ctx->priv.ctr.iv; + desc->info = iv; return ctr_aes_nx_crypt(desc, dst, src, nbytes); } diff --git a/drivers/crypto/nx/nx-aes-gcm.c b/drivers/crypto/nx/nx-aes-gcm.c index 08ac6d48688c..92c993f08213 100644 --- a/drivers/crypto/nx/nx-aes-gcm.c +++ b/drivers/crypto/nx/nx-aes-gcm.c @@ -317,6 +317,7 @@ out: static int gcm_aes_nx_crypt(struct aead_request *req, int enc) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); + struct nx_gcm_rctx *rctx = aead_request_ctx(req); struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; struct blkcipher_desc desc; unsigned int nbytes = req->cryptlen; @@ -326,7 +327,7 @@ static int gcm_aes_nx_crypt(struct aead_request *req, int enc) spin_lock_irqsave(&nx_ctx->lock, irq_flags); - desc.info = nx_ctx->priv.gcm.iv; + desc.info = rctx->iv; /* initialize the counter */ *(u32 *)(desc.info + NX_GCM_CTR_OFFSET) = 1; @@ -424,8 +425,8 @@ out: static int gcm_aes_nx_encrypt(struct aead_request *req) { - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); - char *iv = nx_ctx->priv.gcm.iv; + struct nx_gcm_rctx *rctx = aead_request_ctx(req); + char *iv = rctx->iv; memcpy(iv, req->iv, 12); @@ -434,8 +435,8 @@ static int gcm_aes_nx_encrypt(struct aead_request *req) static int gcm_aes_nx_decrypt(struct aead_request *req) { - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); - char *iv = nx_ctx->priv.gcm.iv; + struct nx_gcm_rctx *rctx = aead_request_ctx(req); + char *iv = rctx->iv; memcpy(iv, req->iv, 12); @@ -445,7 +446,8 @@ static int gcm_aes_nx_decrypt(struct aead_request *req) static int gcm4106_aes_nx_encrypt(struct aead_request *req) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); - char *iv = nx_ctx->priv.gcm.iv; + struct nx_gcm_rctx *rctx = aead_request_ctx(req); + char *iv = rctx->iv; char *nonce = nx_ctx->priv.gcm.nonce; memcpy(iv, nonce, NX_GCM4106_NONCE_LEN); @@ -457,7 +459,8 @@ static int gcm4106_aes_nx_encrypt(struct aead_request *req) static int gcm4106_aes_nx_decrypt(struct aead_request *req) { struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm); - char *iv = nx_ctx->priv.gcm.iv; + struct nx_gcm_rctx *rctx = aead_request_ctx(req); + char *iv = rctx->iv; char *nonce = nx_ctx->priv.gcm.nonce; memcpy(iv, nonce, NX_GCM4106_NONCE_LEN); diff --git a/drivers/crypto/nx/nx-aes-xcbc.c b/drivers/crypto/nx/nx-aes-xcbc.c index 8c2faffab4a3..c2f7d4befb55 100644 --- a/drivers/crypto/nx/nx-aes-xcbc.c +++ b/drivers/crypto/nx/nx-aes-xcbc.c @@ -42,6 +42,7 @@ static int nx_xcbc_set_key(struct crypto_shash *desc, unsigned int key_len) { struct nx_crypto_ctx *nx_ctx = crypto_shash_ctx(desc); + struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; switch (key_len) { case AES_KEYSIZE_128: @@ -51,7 +52,7 @@ static int nx_xcbc_set_key(struct crypto_shash *desc, return -EINVAL; } - memcpy(nx_ctx->priv.xcbc.key, in_key, key_len); + memcpy(csbcpb->cpb.aes_xcbc.key, in_key, key_len); return 0; } @@ -148,32 +149,29 @@ out: return rc; } -static int nx_xcbc_init(struct shash_desc *desc) +static int nx_crypto_ctx_aes_xcbc_init2(struct crypto_tfm *tfm) { - struct xcbc_state *sctx = shash_desc_ctx(desc); - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); + struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(tfm); struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; - struct nx_sg *out_sg; - int len; + int err; - nx_ctx_init(nx_ctx, HCOP_FC_AES); + err = nx_crypto_ctx_aes_xcbc_init(tfm); + if (err) + return err; - memset(sctx, 0, sizeof *sctx); + nx_ctx_init(nx_ctx, HCOP_FC_AES); NX_CPB_SET_KEY_SIZE(csbcpb, NX_KS_AES_128); csbcpb->cpb.hdr.mode = NX_MODE_AES_XCBC_MAC; - memcpy(csbcpb->cpb.aes_xcbc.key, nx_ctx->priv.xcbc.key, AES_BLOCK_SIZE); - memset(nx_ctx->priv.xcbc.key, 0, sizeof *nx_ctx->priv.xcbc.key); - - len = AES_BLOCK_SIZE; - out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, - &len, nx_ctx->ap->sglen); + return 0; +} - if (len != AES_BLOCK_SIZE) - return -EINVAL; +static int nx_xcbc_init(struct shash_desc *desc) +{ + struct xcbc_state *sctx = shash_desc_ctx(desc); - nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); + memset(sctx, 0, sizeof *sctx); return 0; } @@ -186,6 +184,7 @@ static int nx_xcbc_update(struct shash_desc *desc, struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = nx_ctx->csbcpb; struct nx_sg *in_sg; + struct nx_sg *out_sg; u32 to_process = 0, leftover, total; unsigned int max_sg_len; unsigned long irq_flags; @@ -213,6 +212,17 @@ static int nx_xcbc_update(struct shash_desc *desc, max_sg_len = min_t(u64, max_sg_len, nx_ctx->ap->databytelen/NX_PAGE_SIZE); + data_len = AES_BLOCK_SIZE; + out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, + &len, nx_ctx->ap->sglen); + + if (data_len != AES_BLOCK_SIZE) { + rc = -EINVAL; + goto out; + } + + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); + do { to_process = total - to_process; to_process = to_process & ~(AES_BLOCK_SIZE - 1); @@ -235,8 +245,10 @@ static int nx_xcbc_update(struct shash_desc *desc, (u8 *) sctx->buffer, &data_len, max_sg_len); - if (data_len != sctx->count) - return -EINVAL; + if (data_len != sctx->count) { + rc = -EINVAL; + goto out; + } } data_len = to_process - sctx->count; @@ -245,8 +257,10 @@ static int nx_xcbc_update(struct shash_desc *desc, &data_len, max_sg_len); - if (data_len != to_process - sctx->count) - return -EINVAL; + if (data_len != to_process - sctx->count) { + rc = -EINVAL; + goto out; + } nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); @@ -325,15 +339,19 @@ static int nx_xcbc_final(struct shash_desc *desc, u8 *out) in_sg = nx_build_sg_list(nx_ctx->in_sg, (u8 *)sctx->buffer, &len, nx_ctx->ap->sglen); - if (len != sctx->count) - return -EINVAL; + if (len != sctx->count) { + rc = -EINVAL; + goto out; + } len = AES_BLOCK_SIZE; out_sg = nx_build_sg_list(nx_ctx->out_sg, out, &len, nx_ctx->ap->sglen); - if (len != AES_BLOCK_SIZE) - return -EINVAL; + if (len != AES_BLOCK_SIZE) { + rc = -EINVAL; + goto out; + } nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); @@ -372,7 +390,7 @@ struct shash_alg nx_shash_aes_xcbc_alg = { .cra_blocksize = AES_BLOCK_SIZE, .cra_module = THIS_MODULE, .cra_ctxsize = sizeof(struct nx_crypto_ctx), - .cra_init = nx_crypto_ctx_aes_xcbc_init, + .cra_init = nx_crypto_ctx_aes_xcbc_init2, .cra_exit = nx_crypto_ctx_exit, } }; diff --git a/drivers/crypto/nx/nx-sha256.c b/drivers/crypto/nx/nx-sha256.c index 4e91bdb83c59..08f8d5cd6334 100644 --- a/drivers/crypto/nx/nx-sha256.c +++ b/drivers/crypto/nx/nx-sha256.c @@ -29,34 +29,28 @@ #include "nx.h" -static int nx_sha256_init(struct shash_desc *desc) +static int nx_crypto_ctx_sha256_init(struct crypto_tfm *tfm) { - struct sha256_state *sctx = shash_desc_ctx(desc); - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); - struct nx_sg *out_sg; - int len; - u32 max_sg_len; + struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(tfm); + int err; - nx_ctx_init(nx_ctx, HCOP_FC_SHA); + err = nx_crypto_ctx_sha_init(tfm); + if (err) + return err; - memset(sctx, 0, sizeof *sctx); + nx_ctx_init(nx_ctx, HCOP_FC_SHA); nx_ctx->ap = &nx_ctx->props[NX_PROPS_SHA256]; NX_CPB_SET_DIGEST_SIZE(nx_ctx->csbcpb, NX_DS_SHA256); - max_sg_len = min_t(u64, nx_ctx->ap->sglen, - nx_driver.of.max_sg_len/sizeof(struct nx_sg)); - max_sg_len = min_t(u64, max_sg_len, - nx_ctx->ap->databytelen/NX_PAGE_SIZE); + return 0; +} - len = SHA256_DIGEST_SIZE; - out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, - &len, max_sg_len); - nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); +static int nx_sha256_init(struct shash_desc *desc) { + struct sha256_state *sctx = shash_desc_ctx(desc); - if (len != SHA256_DIGEST_SIZE) - return -EINVAL; + memset(sctx, 0, sizeof *sctx); sctx->state[0] = __cpu_to_be32(SHA256_H0); sctx->state[1] = __cpu_to_be32(SHA256_H1); @@ -78,6 +72,7 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; struct nx_sg *in_sg; + struct nx_sg *out_sg; u64 to_process = 0, leftover, total; unsigned long irq_flags; int rc = 0; @@ -108,6 +103,16 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, max_sg_len = min_t(u64, max_sg_len, nx_ctx->ap->databytelen/NX_PAGE_SIZE); + data_len = SHA256_DIGEST_SIZE; + out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, + &data_len, max_sg_len); + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); + + if (data_len != SHA256_DIGEST_SIZE) { + rc = -EINVAL; + goto out; + } + do { /* * to_process: the SHA256_BLOCK_SIZE data chunk to process in @@ -282,7 +287,7 @@ struct shash_alg nx_shash_sha256_alg = { .cra_blocksize = SHA256_BLOCK_SIZE, .cra_module = THIS_MODULE, .cra_ctxsize = sizeof(struct nx_crypto_ctx), - .cra_init = nx_crypto_ctx_sha_init, + .cra_init = nx_crypto_ctx_sha256_init, .cra_exit = nx_crypto_ctx_exit, } }; diff --git a/drivers/crypto/nx/nx-sha512.c b/drivers/crypto/nx/nx-sha512.c index e6a58d2ee628..aff0fe58eac0 100644 --- a/drivers/crypto/nx/nx-sha512.c +++ b/drivers/crypto/nx/nx-sha512.c @@ -28,34 +28,29 @@ #include "nx.h" -static int nx_sha512_init(struct shash_desc *desc) +static int nx_crypto_ctx_sha512_init(struct crypto_tfm *tfm) { - struct sha512_state *sctx = shash_desc_ctx(desc); - struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); - struct nx_sg *out_sg; - int len; - u32 max_sg_len; + struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(tfm); + int err; - nx_ctx_init(nx_ctx, HCOP_FC_SHA); + err = nx_crypto_ctx_sha_init(tfm); + if (err) + return err; - memset(sctx, 0, sizeof *sctx); + nx_ctx_init(nx_ctx, HCOP_FC_SHA); nx_ctx->ap = &nx_ctx->props[NX_PROPS_SHA512]; NX_CPB_SET_DIGEST_SIZE(nx_ctx->csbcpb, NX_DS_SHA512); - max_sg_len = min_t(u64, nx_ctx->ap->sglen, - nx_driver.of.max_sg_len/sizeof(struct nx_sg)); - max_sg_len = min_t(u64, max_sg_len, - nx_ctx->ap->databytelen/NX_PAGE_SIZE); + return 0; +} - len = SHA512_DIGEST_SIZE; - out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, - &len, max_sg_len); - nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); +static int nx_sha512_init(struct shash_desc *desc) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); - if (len != SHA512_DIGEST_SIZE) - return -EINVAL; + memset(sctx, 0, sizeof *sctx); sctx->state[0] = __cpu_to_be64(SHA512_H0); sctx->state[1] = __cpu_to_be64(SHA512_H1); @@ -77,6 +72,7 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; struct nx_sg *in_sg; + struct nx_sg *out_sg; u64 to_process, leftover = 0, total; unsigned long irq_flags; int rc = 0; @@ -107,6 +103,16 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, max_sg_len = min_t(u64, max_sg_len, nx_ctx->ap->databytelen/NX_PAGE_SIZE); + data_len = SHA512_DIGEST_SIZE; + out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, + &data_len, max_sg_len); + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); + + if (data_len != SHA512_DIGEST_SIZE) { + rc = -EINVAL; + goto out; + } + do { /* * to_process: the SHA512_BLOCK_SIZE data chunk to process in @@ -288,7 +294,7 @@ struct shash_alg nx_shash_sha512_alg = { .cra_blocksize = SHA512_BLOCK_SIZE, .cra_module = THIS_MODULE, .cra_ctxsize = sizeof(struct nx_crypto_ctx), - .cra_init = nx_crypto_ctx_sha_init, + .cra_init = nx_crypto_ctx_sha512_init, .cra_exit = nx_crypto_ctx_exit, } }; diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c index f6198f29a4a8..436971343ff7 100644 --- a/drivers/crypto/nx/nx.c +++ b/drivers/crypto/nx/nx.c @@ -713,12 +713,15 @@ static int nx_crypto_ctx_init(struct nx_crypto_ctx *nx_ctx, u32 fc, u32 mode) /* entry points from the crypto tfm initializers */ int nx_crypto_ctx_aes_ccm_init(struct crypto_tfm *tfm) { + crypto_aead_set_reqsize(__crypto_aead_cast(tfm), + sizeof(struct nx_ccm_rctx)); return nx_crypto_ctx_init(crypto_tfm_ctx(tfm), NX_FC_AES, NX_MODE_AES_CCM); } int nx_crypto_ctx_aes_gcm_init(struct crypto_aead *tfm) { + crypto_aead_set_reqsize(tfm, sizeof(struct nx_gcm_rctx)); return nx_crypto_ctx_init(crypto_aead_ctx(tfm), NX_FC_AES, NX_MODE_AES_GCM); } diff --git a/drivers/crypto/nx/nx.h b/drivers/crypto/nx/nx.h index de3ea8738146..cdff03a42ae7 100644 --- a/drivers/crypto/nx/nx.h +++ b/drivers/crypto/nx/nx.h @@ -2,6 +2,8 @@ #ifndef __NX_H__ #define __NX_H__ +#include <crypto/ctr.h> + #define NX_NAME "nx-crypto" #define NX_STRING "IBM Power7+ Nest Accelerator Crypto Driver" #define NX_VERSION "1.0" @@ -91,8 +93,11 @@ struct nx_crypto_driver { #define NX_GCM4106_NONCE_LEN (4) #define NX_GCM_CTR_OFFSET (12) -struct nx_gcm_priv { +struct nx_gcm_rctx { u8 iv[16]; +}; + +struct nx_gcm_priv { u8 iauth_tag[16]; u8 nonce[NX_GCM4106_NONCE_LEN]; }; @@ -100,8 +105,11 @@ struct nx_gcm_priv { #define NX_CCM_AES_KEY_LEN (16) #define NX_CCM4309_AES_KEY_LEN (19) #define NX_CCM4309_NONCE_LEN (3) -struct nx_ccm_priv { +struct nx_ccm_rctx { u8 iv[16]; +}; + +struct nx_ccm_priv { u8 b0[16]; u8 iauth_tag[16]; u8 oauth_tag[16]; @@ -113,7 +121,7 @@ struct nx_xcbc_priv { }; struct nx_ctr_priv { - u8 iv[16]; + u8 nonce[CTR_RFC3686_NONCE_SIZE]; }; struct nx_crypto_ctx { diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c index 46307098f8ba..0a70e46d5416 100644 --- a/drivers/crypto/omap-des.c +++ b/drivers/crypto/omap-des.c @@ -536,9 +536,6 @@ static int omap_des_crypt_dma_stop(struct omap_des_dev *dd) dmaengine_terminate_all(dd->dma_lch_in); dmaengine_terminate_all(dd->dma_lch_out); - dma_unmap_sg(dd->dev, dd->in_sg, dd->in_sg_len, DMA_TO_DEVICE); - dma_unmap_sg(dd->dev, dd->out_sg, dd->out_sg_len, DMA_FROM_DEVICE); - return err; } diff --git a/drivers/edac/octeon_edac-l2c.c b/drivers/edac/octeon_edac-l2c.c index 7e98084d3645..afea7fc625cc 100644 --- a/drivers/edac/octeon_edac-l2c.c +++ b/drivers/edac/octeon_edac-l2c.c @@ -151,7 +151,7 @@ static int octeon_l2c_probe(struct platform_device *pdev) l2c->ctl_name = "octeon_l2c_err"; - if (OCTEON_IS_MODEL(OCTEON_FAM_1_PLUS)) { + if (OCTEON_IS_OCTEON1PLUS()) { union cvmx_l2t_err l2t_err; union cvmx_l2d_err l2d_err; diff --git a/drivers/edac/octeon_edac-lmc.c b/drivers/edac/octeon_edac-lmc.c index bb19e0732681..cda6dab5067a 100644 --- a/drivers/edac/octeon_edac-lmc.c +++ b/drivers/edac/octeon_edac-lmc.c @@ -234,7 +234,7 @@ static int octeon_lmc_edac_probe(struct platform_device *pdev) layers[0].size = 1; layers[0].is_virt_csrow = false; - if (OCTEON_IS_MODEL(OCTEON_FAM_1_PLUS)) { + if (OCTEON_IS_OCTEON1PLUS()) { union cvmx_lmcx_mem_cfg0 cfg0; cfg0.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(0)); diff --git a/drivers/edac/octeon_edac-pc.c b/drivers/edac/octeon_edac-pc.c index 0f83c33a7d1f..2ab6cf24c959 100644 --- a/drivers/edac/octeon_edac-pc.c +++ b/drivers/edac/octeon_edac-pc.c @@ -73,7 +73,7 @@ static int co_cache_error_event(struct notifier_block *this, edac_device_handle_ce(p->ed, cpu, 0, "dcache"); /* Clear the error indication */ - if (OCTEON_IS_MODEL(OCTEON_FAM_2)) + if (OCTEON_IS_OCTEON2()) write_octeon_c0_dcacheerr(1); else write_octeon_c0_dcacheerr(0); diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index ca617f40574a..9fa8084a7c8d 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -66,7 +66,6 @@ static int __init parse_efi_cmdline(char *str) early_param("efi", parse_efi_cmdline); struct kobject *efi_kobj; -static struct kobject *efivars_kobj; /* * Let's not leave out systab information that snuck into @@ -218,10 +217,9 @@ static int __init efisubsys_init(void) goto err_remove_group; /* and the standard mountpoint for efivarfs */ - efivars_kobj = kobject_create_and_add("efivars", efi_kobj); - if (!efivars_kobj) { + error = sysfs_create_mount_point(efi_kobj, "efivars"); + if (error) { pr_err("efivars: Subsystem registration failed.\n"); - error = -ENOMEM; goto err_remove_group; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 22866d1c3d69..01657830b470 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -425,6 +425,8 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, unsigned irq_type); int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner, struct amdgpu_fence **fence); +int amdgpu_fence_recreate(struct amdgpu_ring *ring, void *owner, + uint64_t seq, struct amdgpu_fence **fence); void amdgpu_fence_process(struct amdgpu_ring *ring); int amdgpu_fence_wait_next(struct amdgpu_ring *ring); int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); @@ -435,9 +437,6 @@ int amdgpu_fence_wait(struct amdgpu_fence *fence, bool interruptible); int amdgpu_fence_wait_any(struct amdgpu_device *adev, struct amdgpu_fence **fences, bool intr); -long amdgpu_fence_wait_seq_timeout(struct amdgpu_device *adev, - u64 *target_seq, bool intr, - long timeout); struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence); void amdgpu_fence_unref(struct amdgpu_fence **fence); @@ -1622,6 +1621,7 @@ struct amdgpu_vce { unsigned fb_version; atomic_t handles[AMDGPU_MAX_VCE_HANDLES]; struct drm_file *filp[AMDGPU_MAX_VCE_HANDLES]; + uint32_t img_size[AMDGPU_MAX_VCE_HANDLES]; struct delayed_work idle_work; const struct firmware *fw; /* VCE firmware */ struct amdgpu_ring ring[AMDGPU_MAX_VCE_RINGS]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 36d34e0afbc3..f82a2dd83874 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -30,6 +30,7 @@ #include <drm/drmP.h> #include "amdgpu.h" +#include "amdgpu_trace.h" static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv, struct amdgpu_bo_list **result, @@ -124,6 +125,8 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, gws_obj = entry->robj; if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_OA) oa_obj = entry->robj; + + trace_amdgpu_bo_list_set(list, entry->robj); } for (i = 0; i < list->num_entries; ++i) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index f09b2cba40ca..d63135bf29c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -181,8 +181,6 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) } p->chunks[i].chunk_id = user_chunk.chunk_id; p->chunks[i].length_dw = user_chunk.length_dw; - if (p->chunks[i].chunk_id == AMDGPU_CHUNK_ID_IB) - p->num_ibs++; size = p->chunks[i].length_dw; cdata = (void __user *)(unsigned long)user_chunk.chunk_data; @@ -199,7 +197,12 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) goto out; } - if (p->chunks[i].chunk_id == AMDGPU_CHUNK_ID_FENCE) { + switch (p->chunks[i].chunk_id) { + case AMDGPU_CHUNK_ID_IB: + p->num_ibs++; + break; + + case AMDGPU_CHUNK_ID_FENCE: size = sizeof(struct drm_amdgpu_cs_chunk_fence); if (p->chunks[i].length_dw * sizeof(uint32_t) >= size) { uint32_t handle; @@ -221,6 +224,14 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) r = -EINVAL; goto out; } + break; + + case AMDGPU_CHUNK_ID_DEPENDENCIES: + break; + + default: + r = -EINVAL; + goto out; } } @@ -445,8 +456,9 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo for (i = 0; i < parser->nchunks; i++) drm_free_large(parser->chunks[i].kdata); kfree(parser->chunks); - for (i = 0; i < parser->num_ibs; i++) - amdgpu_ib_free(parser->adev, &parser->ibs[i]); + if (parser->ibs) + for (i = 0; i < parser->num_ibs; i++) + amdgpu_ib_free(parser->adev, &parser->ibs[i]); kfree(parser->ibs); if (parser->uf.bo) drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base); @@ -654,6 +666,55 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, return 0; } +static int amdgpu_cs_dependencies(struct amdgpu_device *adev, + struct amdgpu_cs_parser *p) +{ + struct amdgpu_ib *ib; + int i, j, r; + + if (!p->num_ibs) + return 0; + + /* Add dependencies to first IB */ + ib = &p->ibs[0]; + for (i = 0; i < p->nchunks; ++i) { + struct drm_amdgpu_cs_chunk_dep *deps; + struct amdgpu_cs_chunk *chunk; + unsigned num_deps; + + chunk = &p->chunks[i]; + + if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES) + continue; + + deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_dep); + + for (j = 0; j < num_deps; ++j) { + struct amdgpu_fence *fence; + struct amdgpu_ring *ring; + + r = amdgpu_cs_get_ring(adev, deps[j].ip_type, + deps[j].ip_instance, + deps[j].ring, &ring); + if (r) + return r; + + r = amdgpu_fence_recreate(ring, p->filp, + deps[j].handle, + &fence); + if (r) + return r; + + amdgpu_sync_fence(&ib->sync, fence); + amdgpu_fence_unref(&fence); + } + } + + return 0; +} + int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; @@ -688,11 +749,16 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) else DRM_ERROR("Failed to process the buffer list %d!\n", r); } - } else { + } + + if (!r) { reserved_buffers = true; r = amdgpu_cs_ib_fill(adev, &parser); } + if (!r) + r = amdgpu_cs_dependencies(adev, &parser); + if (r) { amdgpu_cs_parser_fini(&parser, r, reserved_buffers); up_read(&adev->exclusive_lock); @@ -730,9 +796,9 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, { union drm_amdgpu_wait_cs *wait = data; struct amdgpu_device *adev = dev->dev_private; - uint64_t seq[AMDGPU_MAX_RINGS] = {0}; - struct amdgpu_ring *ring = NULL; unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); + struct amdgpu_fence *fence = NULL; + struct amdgpu_ring *ring = NULL; struct amdgpu_ctx *ctx; long r; @@ -745,9 +811,12 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, if (r) return r; - seq[ring->idx] = wait->in.handle; + r = amdgpu_fence_recreate(ring, filp, wait->in.handle, &fence); + if (r) + return r; - r = amdgpu_fence_wait_seq_timeout(adev, seq, true, timeout); + r = fence_wait_timeout(&fence->base, true, timeout); + amdgpu_fence_unref(&fence); amdgpu_ctx_put(ctx); if (r < 0) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index fec487d1c870..ba46be361c9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1191,7 +1191,9 @@ static int amdgpu_early_init(struct amdgpu_device *adev) return -EINVAL; } - + adev->ip_block_enabled = kcalloc(adev->num_ip_blocks, sizeof(bool), GFP_KERNEL); + if (adev->ip_block_enabled == NULL) + return -ENOMEM; if (adev->ip_blocks == NULL) { DRM_ERROR("No IP blocks found!\n"); @@ -1575,8 +1577,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev) amdgpu_fence_driver_fini(adev); amdgpu_fbdev_fini(adev); r = amdgpu_fini(adev); - if (adev->ip_block_enabled) - kfree(adev->ip_block_enabled); + kfree(adev->ip_block_enabled); adev->ip_block_enabled = NULL; adev->accel_working = false; /* free i2c buses */ @@ -2000,4 +2001,10 @@ int amdgpu_debugfs_init(struct drm_minor *minor) void amdgpu_debugfs_cleanup(struct drm_minor *minor) { } +#else +static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) +{ + return 0; +} +static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) { } #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 5c9918d01bf9..a7189a1fa6a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -136,6 +136,38 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner, } /** + * amdgpu_fence_recreate - recreate a fence from an user fence + * + * @ring: ring the fence is associated with + * @owner: creator of the fence + * @seq: user fence sequence number + * @fence: resulting amdgpu fence object + * + * Recreates a fence command from the user fence sequence number (all asics). + * Returns 0 on success, -ENOMEM on failure. + */ +int amdgpu_fence_recreate(struct amdgpu_ring *ring, void *owner, + uint64_t seq, struct amdgpu_fence **fence) +{ + struct amdgpu_device *adev = ring->adev; + + if (seq > ring->fence_drv.sync_seq[ring->idx]) + return -EINVAL; + + *fence = kmalloc(sizeof(struct amdgpu_fence), GFP_KERNEL); + if ((*fence) == NULL) + return -ENOMEM; + + (*fence)->seq = seq; + (*fence)->ring = ring; + (*fence)->owner = owner; + fence_init(&(*fence)->base, &amdgpu_fence_ops, + &adev->fence_queue.lock, adev->fence_context + ring->idx, + (*fence)->seq); + return 0; +} + +/** * amdgpu_fence_check_signaled - callback from fence_queue * * this function is called with fence_queue lock held, which is also used @@ -517,12 +549,14 @@ static bool amdgpu_fence_any_seq_signaled(struct amdgpu_device *adev, u64 *seq) * the wait timeout, or an error for all other cases. * -EDEADLK is returned when a GPU lockup has been detected. */ -long amdgpu_fence_wait_seq_timeout(struct amdgpu_device *adev, u64 *target_seq, - bool intr, long timeout) +static long amdgpu_fence_wait_seq_timeout(struct amdgpu_device *adev, + u64 *target_seq, bool intr, + long timeout) { uint64_t last_seq[AMDGPU_MAX_RINGS]; bool signaled; - int i, r; + int i; + long r; if (timeout == 0) { return amdgpu_fence_any_seq_signaled(adev, target_seq); @@ -1023,7 +1057,7 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data) amdgpu_fence_process(ring); - seq_printf(m, "--- ring %d ---\n", i); + seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name); seq_printf(m, "Last signaled fence 0x%016llx\n", (unsigned long long)atomic64_read(&ring->fence_drv.last_seq)); seq_printf(m, "Last emitted 0x%016llx\n", @@ -1031,7 +1065,8 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data) for (j = 0; j < AMDGPU_MAX_RINGS; ++j) { struct amdgpu_ring *other = adev->rings[j]; - if (i != j && other && other->fence_drv.initialized) + if (i != j && other && other->fence_drv.initialized && + ring->fence_drv.sync_seq[j]) seq_printf(m, "Last sync to ring %d 0x%016llx\n", j, ring->fence_drv.sync_seq[j]); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 0ec222295fee..ae43b58c9733 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -352,7 +352,7 @@ unsigned long amdgpu_gem_timeout(uint64_t timeout_ns) if (((int64_t)timeout_ns) < 0) return MAX_SCHEDULE_TIMEOUT; - timeout = ktime_sub_ns(ktime_get(), timeout_ns); + timeout = ktime_sub(ns_to_ktime(timeout_ns), ktime_get()); if (ktime_to_ns(timeout) < 0) return 0; @@ -496,7 +496,7 @@ error_unreserve: error_free: drm_free_large(vm_bos); - if (r) + if (r && r != -ERESTARTSYS) DRM_ERROR("Couldn't update BO_VA (%d)\n", r); } @@ -525,8 +525,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - invalid_flags = ~(AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | - AMDGPU_VM_PAGE_EXECUTABLE); + invalid_flags = ~(AMDGPU_VM_DELAY_UPDATE | AMDGPU_VM_PAGE_READABLE | + AMDGPU_VM_PAGE_WRITEABLE | AMDGPU_VM_PAGE_EXECUTABLE); if ((args->flags & invalid_flags)) { dev_err(&dev->pdev->dev, "invalid flags 0x%08X vs 0x%08X\n", args->flags, invalid_flags); @@ -579,7 +579,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, break; } - if (!r) + if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE)) amdgpu_gem_va_update_vm(adev, bo_va); drm_gem_object_unreference_unlocked(gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index b56dd64bd4ea..961d7265c286 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -30,19 +30,21 @@ TRACE_EVENT(amdgpu_cs, TP_PROTO(struct amdgpu_cs_parser *p, int i), TP_ARGS(p, i), TP_STRUCT__entry( + __field(struct amdgpu_bo_list *, bo_list) __field(u32, ring) __field(u32, dw) __field(u32, fences) ), TP_fast_assign( + __entry->bo_list = p->bo_list; __entry->ring = p->ibs[i].ring->idx; __entry->dw = p->ibs[i].length_dw; __entry->fences = amdgpu_fence_count_emitted( p->ibs[i].ring); ), - TP_printk("ring=%u, dw=%u, fences=%u", - __entry->ring, __entry->dw, + TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u", + __entry->bo_list, __entry->ring, __entry->dw, __entry->fences) ); @@ -61,6 +63,54 @@ TRACE_EVENT(amdgpu_vm_grab_id, TP_printk("vmid=%u, ring=%u", __entry->vmid, __entry->ring) ); +TRACE_EVENT(amdgpu_vm_bo_map, + TP_PROTO(struct amdgpu_bo_va *bo_va, + struct amdgpu_bo_va_mapping *mapping), + TP_ARGS(bo_va, mapping), + TP_STRUCT__entry( + __field(struct amdgpu_bo *, bo) + __field(long, start) + __field(long, last) + __field(u64, offset) + __field(u32, flags) + ), + + TP_fast_assign( + __entry->bo = bo_va->bo; + __entry->start = mapping->it.start; + __entry->last = mapping->it.last; + __entry->offset = mapping->offset; + __entry->flags = mapping->flags; + ), + TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%08x", + __entry->bo, __entry->start, __entry->last, + __entry->offset, __entry->flags) +); + +TRACE_EVENT(amdgpu_vm_bo_unmap, + TP_PROTO(struct amdgpu_bo_va *bo_va, + struct amdgpu_bo_va_mapping *mapping), + TP_ARGS(bo_va, mapping), + TP_STRUCT__entry( + __field(struct amdgpu_bo *, bo) + __field(long, start) + __field(long, last) + __field(u64, offset) + __field(u32, flags) + ), + + TP_fast_assign( + __entry->bo = bo_va->bo; + __entry->start = mapping->it.start; + __entry->last = mapping->it.last; + __entry->offset = mapping->offset; + __entry->flags = mapping->flags; + ), + TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%08x", + __entry->bo, __entry->start, __entry->last, + __entry->offset, __entry->flags) +); + TRACE_EVENT(amdgpu_vm_bo_update, TP_PROTO(struct amdgpu_bo_va_mapping *mapping), TP_ARGS(mapping), @@ -121,6 +171,21 @@ TRACE_EVENT(amdgpu_vm_flush, __entry->pd_addr, __entry->ring, __entry->id) ); +TRACE_EVENT(amdgpu_bo_list_set, + TP_PROTO(struct amdgpu_bo_list *list, struct amdgpu_bo *bo), + TP_ARGS(list, bo), + TP_STRUCT__entry( + __field(struct amdgpu_bo_list *, list) + __field(struct amdgpu_bo *, bo) + ), + + TP_fast_assign( + __entry->list = list; + __entry->bo = bo; + ), + TP_printk("list=%p, bo=%p", __entry->list, __entry->bo) +); + DECLARE_EVENT_CLASS(amdgpu_fence_request, TP_PROTO(struct drm_device *dev, int ring, u32 seqno), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index d3706a498293..dd3415d2e45d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -674,7 +674,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) return 0; if (gtt && gtt->userptr) { - ttm->sg = kcalloc(1, sizeof(struct sg_table), GFP_KERNEL); + ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL); if (!ttm->sg) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 1127a504f118..d3ca73090e39 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -464,28 +464,42 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, * @p: parser context * @lo: address of lower dword * @hi: address of higher dword + * @size: minimum size * * Patch relocation inside command stream with real buffer address */ -int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, int lo, int hi) +static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, + int lo, int hi, unsigned size, uint32_t index) { struct amdgpu_bo_va_mapping *mapping; struct amdgpu_ib *ib = &p->ibs[ib_idx]; struct amdgpu_bo *bo; uint64_t addr; + if (index == 0xffffffff) + index = 0; + addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) | ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32; + addr += ((uint64_t)size) * ((uint64_t)index); mapping = amdgpu_cs_find_mapping(p, addr, &bo); if (mapping == NULL) { - DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d\n", + DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n", + addr, lo, hi, size, index); + return -EINVAL; + } + + if ((addr + (uint64_t)size) > + ((uint64_t)mapping->it.last + 1) * AMDGPU_GPU_PAGE_SIZE) { + DRM_ERROR("BO to small for addr 0x%010Lx %d %d\n", addr, lo, hi); return -EINVAL; } addr -= ((uint64_t)mapping->it.start) * AMDGPU_GPU_PAGE_SIZE; addr += amdgpu_bo_gpu_offset(bo); + addr -= ((uint64_t)size) * ((uint64_t)index); ib->ptr[lo] = addr & 0xFFFFFFFF; ib->ptr[hi] = addr >> 32; @@ -494,6 +508,48 @@ int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, int lo, int } /** + * amdgpu_vce_validate_handle - validate stream handle + * + * @p: parser context + * @handle: handle to validate + * @allocated: allocated a new handle? + * + * Validates the handle and return the found session index or -EINVAL + * we we don't have another free session index. + */ +static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p, + uint32_t handle, bool *allocated) +{ + unsigned i; + + *allocated = false; + + /* validate the handle */ + for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { + if (atomic_read(&p->adev->vce.handles[i]) == handle) { + if (p->adev->vce.filp[i] != p->filp) { + DRM_ERROR("VCE handle collision detected!\n"); + return -EINVAL; + } + return i; + } + } + + /* handle not found try to alloc a new one */ + for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { + if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) { + p->adev->vce.filp[i] = p->filp; + p->adev->vce.img_size[i] = 0; + *allocated = true; + return i; + } + } + + DRM_ERROR("No more free VCE handles!\n"); + return -EINVAL; +} + +/** * amdgpu_vce_cs_parse - parse and validate the command stream * * @p: parser context @@ -501,10 +557,15 @@ int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, int lo, int */ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) { - uint32_t handle = 0; - bool destroy = false; - int i, r, idx = 0; struct amdgpu_ib *ib = &p->ibs[ib_idx]; + unsigned fb_idx = 0, bs_idx = 0; + int session_idx = -1; + bool destroyed = false; + bool created = false; + bool allocated = false; + uint32_t tmp, handle = 0; + uint32_t *size = &tmp; + int i, r = 0, idx = 0; amdgpu_vce_note_usage(p->adev); @@ -514,16 +575,44 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) if ((len < 8) || (len & 3)) { DRM_ERROR("invalid VCE command length (%d)!\n", len); - return -EINVAL; + r = -EINVAL; + goto out; + } + + if (destroyed) { + DRM_ERROR("No other command allowed after destroy!\n"); + r = -EINVAL; + goto out; } switch (cmd) { case 0x00000001: // session handle = amdgpu_get_ib_value(p, ib_idx, idx + 2); + session_idx = amdgpu_vce_validate_handle(p, handle, + &allocated); + if (session_idx < 0) + return session_idx; + size = &p->adev->vce.img_size[session_idx]; break; case 0x00000002: // task info + fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6); + bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7); + break; + case 0x01000001: // create + created = true; + if (!allocated) { + DRM_ERROR("Handle already in use!\n"); + r = -EINVAL; + goto out; + } + + *size = amdgpu_get_ib_value(p, ib_idx, idx + 8) * + amdgpu_get_ib_value(p, ib_idx, idx + 10) * + 8 * 3 / 2; + break; + case 0x04000001: // config extension case 0x04000002: // pic control case 0x04000005: // rate control @@ -534,60 +623,74 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) break; case 0x03000001: // encode - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9); + r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9, + *size, 0); if (r) - return r; + goto out; - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11); + r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11, + *size / 3, 0); if (r) - return r; + goto out; break; case 0x02000001: // destroy - destroy = true; + destroyed = true; break; case 0x05000001: // context buffer + r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, + *size * 2, 0); + if (r) + goto out; + break; + case 0x05000004: // video bitstream buffer + tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4); + r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, + tmp, bs_idx); + if (r) + goto out; + break; + case 0x05000005: // feedback buffer - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2); + r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, + 4096, fb_idx); if (r) - return r; + goto out; break; default: DRM_ERROR("invalid VCE command (0x%x)!\n", cmd); - return -EINVAL; + r = -EINVAL; + goto out; } - idx += len / 4; - } - - if (destroy) { - /* IB contains a destroy msg, free the handle */ - for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) - atomic_cmpxchg(&p->adev->vce.handles[i], handle, 0); + if (session_idx == -1) { + DRM_ERROR("no session command at start of IB\n"); + r = -EINVAL; + goto out; + } - return 0; + idx += len / 4; } - /* create or encode, validate the handle */ - for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { - if (atomic_read(&p->adev->vce.handles[i]) == handle) - return 0; + if (allocated && !created) { + DRM_ERROR("New session without create command!\n"); + r = -ENOENT; } - /* handle not found try to alloc a new one */ - for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { - if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) { - p->adev->vce.filp[i] = p->filp; - return 0; - } +out: + if ((!r && destroyed) || (r && allocated)) { + /* + * IB contains a destroy msg or we have allocated an + * handle and got an error, anyway free the handle + */ + for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) + atomic_cmpxchg(&p->adev->vce.handles[i], handle, 0); } - DRM_ERROR("No more free VCE handles!\n"); - - return -EINVAL; + return r; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index b6a9d0956c60..7ccdb5927da5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -33,7 +33,6 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, struct amdgpu_fence **fence); void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); -int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, int lo, int hi); int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx); bool amdgpu_vce_ring_emit_semaphore(struct amdgpu_ring *ring, struct amdgpu_semaphore *semaphore, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 407882b233c7..9a4e3b63f1cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1001,6 +1001,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, list_add(&mapping->list, &bo_va->mappings); interval_tree_insert(&mapping->it, &vm->va); + trace_amdgpu_vm_bo_map(bo_va, mapping); bo_va->addr = 0; @@ -1058,6 +1059,7 @@ error_free: mutex_lock(&vm->mutex); list_del(&mapping->list); interval_tree_remove(&mapping->it, &vm->va); + trace_amdgpu_vm_bo_unmap(bo_va, mapping); kfree(mapping); error_unlock: @@ -1099,6 +1101,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, mutex_lock(&vm->mutex); list_del(&mapping->list); interval_tree_remove(&mapping->it, &vm->va); + trace_amdgpu_vm_bo_unmap(bo_va, mapping); if (bo_va->addr) { /* clear the old address */ @@ -1139,6 +1142,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, list_for_each_entry_safe(mapping, next, &bo_va->mappings, list) { list_del(&mapping->list); interval_tree_remove(&mapping->it, &vm->va); + trace_amdgpu_vm_bo_unmap(bo_va, mapping); if (bo_va->addr) list_add(&mapping->list, &vm->freed); else diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 5dab578d6462..341c56681841 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -2256,10 +2256,6 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) return -EINVAL; } - adev->ip_block_enabled = kcalloc(adev->num_ip_blocks, sizeof(bool), GFP_KERNEL); - if (adev->ip_block_enabled == NULL) - return -ENOMEM; - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h index 220865a44814..d19085a97064 100644 --- a/drivers/gpu/drm/amd/amdgpu/cikd.h +++ b/drivers/gpu/drm/amd/amdgpu/cikd.h @@ -552,4 +552,10 @@ #define VCE_CMD_IB_AUTO 0x00000005 #define VCE_CMD_SEMAPHORE 0x00000006 +/* valid for both DEFAULT_MTYPE and APE1_MTYPE */ +enum { + MTYPE_CACHED = 0, + MTYPE_NONCACHED = 3 +}; + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c index e4936a452bc6..f75a31df30bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c @@ -425,7 +425,7 @@ static int cz_dpm_init(struct amdgpu_device *adev) pi->mgcg_cgtt_local1 = 0x0; pi->clock_slow_down_step = 25000; pi->skip_clock_slow_down = 1; - pi->enable_nb_ps_policy = 1; + pi->enable_nb_ps_policy = 0; pi->caps_power_containment = true; pi->caps_cac = true; pi->didt_enabled = false; diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.h b/drivers/gpu/drm/amd/amdgpu/cz_dpm.h index 782a74107664..99e1afc89629 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.h @@ -46,7 +46,7 @@ /* Do not change the following, it is also defined in SMU8.h */ #define SMU_EnabledFeatureScoreboard_AcpDpmOn 0x00000001 -#define SMU_EnabledFeatureScoreboard_SclkDpmOn 0x00100000 +#define SMU_EnabledFeatureScoreboard_SclkDpmOn 0x00200000 #define SMU_EnabledFeatureScoreboard_UvdDpmOn 0x00800000 #define SMU_EnabledFeatureScoreboard_VceDpmOn 0x01000000 diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 5cde635978f9..6e77964f1b64 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -3403,19 +3403,25 @@ static int dce_v10_0_crtc_irq(struct amdgpu_device *adev, switch (entry->src_data) { case 0: /* vblank */ - if (disp_int & interrupt_status_offsets[crtc].vblank) { + if (disp_int & interrupt_status_offsets[crtc].vblank) dce_v10_0_crtc_vblank_int_ack(adev, crtc); - if (amdgpu_irq_enabled(adev, source, irq_type)) { - drm_handle_vblank(adev->ddev, crtc); - } - DRM_DEBUG("IH: D%d vblank\n", crtc + 1); + else + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (amdgpu_irq_enabled(adev, source, irq_type)) { + drm_handle_vblank(adev->ddev, crtc); } + DRM_DEBUG("IH: D%d vblank\n", crtc + 1); + break; case 1: /* vline */ - if (disp_int & interrupt_status_offsets[crtc].vline) { + if (disp_int & interrupt_status_offsets[crtc].vline) dce_v10_0_crtc_vline_int_ack(adev, crtc); - DRM_DEBUG("IH: D%d vline\n", crtc + 1); - } + else + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + DRM_DEBUG("IH: D%d vline\n", crtc + 1); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 95efd98b202d..7f7abb0e0be5 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -3402,19 +3402,25 @@ static int dce_v11_0_crtc_irq(struct amdgpu_device *adev, switch (entry->src_data) { case 0: /* vblank */ - if (disp_int & interrupt_status_offsets[crtc].vblank) { + if (disp_int & interrupt_status_offsets[crtc].vblank) dce_v11_0_crtc_vblank_int_ack(adev, crtc); - if (amdgpu_irq_enabled(adev, source, irq_type)) { - drm_handle_vblank(adev->ddev, crtc); - } - DRM_DEBUG("IH: D%d vblank\n", crtc + 1); + else + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (amdgpu_irq_enabled(adev, source, irq_type)) { + drm_handle_vblank(adev->ddev, crtc); } + DRM_DEBUG("IH: D%d vblank\n", crtc + 1); + break; case 1: /* vline */ - if (disp_int & interrupt_status_offsets[crtc].vline) { + if (disp_int & interrupt_status_offsets[crtc].vline) dce_v11_0_crtc_vline_int_ack(adev, crtc); - DRM_DEBUG("IH: D%d vline\n", crtc + 1); - } + else + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + DRM_DEBUG("IH: D%d vline\n", crtc + 1); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 72c27ac915f2..08387dfd98a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -3237,19 +3237,25 @@ static int dce_v8_0_crtc_irq(struct amdgpu_device *adev, switch (entry->src_data) { case 0: /* vblank */ - if (disp_int & interrupt_status_offsets[crtc].vblank) { + if (disp_int & interrupt_status_offsets[crtc].vblank) WREG32(mmLB_VBLANK_STATUS + crtc_offsets[crtc], LB_VBLANK_STATUS__VBLANK_ACK_MASK); - if (amdgpu_irq_enabled(adev, source, irq_type)) { - drm_handle_vblank(adev->ddev, crtc); - } - DRM_DEBUG("IH: D%d vblank\n", crtc + 1); + else + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (amdgpu_irq_enabled(adev, source, irq_type)) { + drm_handle_vblank(adev->ddev, crtc); } + DRM_DEBUG("IH: D%d vblank\n", crtc + 1); + break; case 1: /* vline */ - if (disp_int & interrupt_status_offsets[crtc].vline) { + if (disp_int & interrupt_status_offsets[crtc].vline) WREG32(mmLB_VLINE_STATUS + crtc_offsets[crtc], LB_VLINE_STATUS__VLINE_ACK_MASK); - DRM_DEBUG("IH: D%d vline\n", crtc + 1); - } + else + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + DRM_DEBUG("IH: D%d vline\n", crtc + 1); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data); @@ -3379,7 +3385,7 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev, uint32_t disp_int, mask, int_control, tmp; unsigned hpd; - if (entry->src_data > 6) { + if (entry->src_data >= adev->mode_info.num_hpd) { DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index cb7907447b81..2c188fb9fd22 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2010,6 +2010,46 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev, } /** + * gmc_v7_0_init_compute_vmid - gart enable + * + * @rdev: amdgpu_device pointer + * + * Initialize compute vmid sh_mem registers + * + */ +#define DEFAULT_SH_MEM_BASES (0x6000) +#define FIRST_COMPUTE_VMID (8) +#define LAST_COMPUTE_VMID (16) +static void gmc_v7_0_init_compute_vmid(struct amdgpu_device *adev) +{ + int i; + uint32_t sh_mem_config; + uint32_t sh_mem_bases; + + /* + * Configure apertures: + * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) + * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) + * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) + */ + sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); + sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; + sh_mem_config |= MTYPE_NONCACHED << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT; + mutex_lock(&adev->srbm_mutex); + for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + cik_srbm_select(adev, 0, 0, 0, i); + /* CP and shaders */ + WREG32(mmSH_MEM_CONFIG, sh_mem_config); + WREG32(mmSH_MEM_APE1_BASE, 1); + WREG32(mmSH_MEM_APE1_LIMIT, 0); + WREG32(mmSH_MEM_BASES, sh_mem_bases); + } + cik_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} + +/** * gfx_v7_0_gpu_init - setup the 3D engine * * @adev: amdgpu_device pointer @@ -2230,6 +2270,8 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) cik_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); + gmc_v7_0_init_compute_vmid(adev); + WREG32(mmSX_DEBUG_1, 0x20); WREG32(mmTA_CNTL_AUX, 0x00010000); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 14242bd33363..7b683fb2173c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1894,6 +1894,51 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev, mutex_unlock(&adev->grbm_idx_mutex); } +/** + * gmc_v8_0_init_compute_vmid - gart enable + * + * @rdev: amdgpu_device pointer + * + * Initialize compute vmid sh_mem registers + * + */ +#define DEFAULT_SH_MEM_BASES (0x6000) +#define FIRST_COMPUTE_VMID (8) +#define LAST_COMPUTE_VMID (16) +static void gmc_v8_0_init_compute_vmid(struct amdgpu_device *adev) +{ + int i; + uint32_t sh_mem_config; + uint32_t sh_mem_bases; + + /* + * Configure apertures: + * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) + * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) + * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) + */ + sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); + + sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 << + SH_MEM_CONFIG__ADDRESS_MODE__SHIFT | + SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | + MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | + SH_MEM_CONFIG__PRIVATE_ATC_MASK; + + mutex_lock(&adev->srbm_mutex); + for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + vi_srbm_select(adev, 0, 0, 0, i); + /* CP and shaders */ + WREG32(mmSH_MEM_CONFIG, sh_mem_config); + WREG32(mmSH_MEM_APE1_BASE, 1); + WREG32(mmSH_MEM_APE1_LIMIT, 0); + WREG32(mmSH_MEM_BASES, sh_mem_bases); + } + vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} + static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) { u32 gb_addr_config; @@ -2113,6 +2158,8 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) vi_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); + gmc_v8_0_init_compute_vmid(adev); + mutex_lock(&adev->grbm_idx_mutex); /* * making sure that the following register writes will be broadcasted @@ -3081,7 +3128,7 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2); WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, - AMDGPU_DOORBELL_MEC_RING7 << 2); + 0x7FFFF << 2); } tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, @@ -3097,6 +3144,12 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + ring->wptr = 0; + mqd->cp_hqd_pq_wptr = ring->wptr; + WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); + mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); + /* set the vmid for the queue */ mqd->cp_hqd_vmid = 0; WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index e3c1fde75363..7bb37b93993f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -439,6 +439,31 @@ static void sdma_v3_0_rlc_stop(struct amdgpu_device *adev) } /** + * sdma_v3_0_ctx_switch_enable - stop the async dma engines context switch + * + * @adev: amdgpu_device pointer + * @enable: enable/disable the DMA MEs context switch. + * + * Halt or unhalt the async dma engines context switch (VI). + */ +static void sdma_v3_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) +{ + u32 f32_cntl; + int i; + + for (i = 0; i < SDMA_MAX_INSTANCE; i++) { + f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]); + if (enable) + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, + AUTO_CTXSW_ENABLE, 1); + else + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, + AUTO_CTXSW_ENABLE, 0); + WREG32(mmSDMA0_CNTL + sdma_offsets[i], f32_cntl); + } +} + +/** * sdma_v3_0_enable - stop the async dma engines * * @adev: amdgpu_device pointer @@ -648,6 +673,8 @@ static int sdma_v3_0_start(struct amdgpu_device *adev) /* unhalt the MEs */ sdma_v3_0_enable(adev, true); + /* enable sdma ring preemption */ + sdma_v3_0_ctx_switch_enable(adev, true); /* start the gfx rings and rlc compute queues */ r = sdma_v3_0_gfx_resume(adev); @@ -1079,6 +1106,7 @@ static int sdma_v3_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + sdma_v3_0_ctx_switch_enable(adev, false); sdma_v3_0_enable(adev, false); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 90fc93c2c1d0..fa5a4448531d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1189,10 +1189,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) return -EINVAL; } - adev->ip_block_enabled = kcalloc(adev->num_ip_blocks, sizeof(bool), GFP_KERNEL); - if (adev->ip_block_enabled == NULL) - return -ENOMEM; - return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 8a1f999daa24..9be007081b72 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -420,6 +420,12 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) pqm_uninit(&p->pqm); pdd = kfd_get_process_device_data(dev, p); + + if (!pdd) { + mutex_unlock(&p->mutex); + return; + } + if (pdd->reset_wavefronts) { dbgdev_wave_reset_wavefronts(pdd->dev, p); pdd->reset_wavefronts = false; @@ -431,8 +437,7 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) * We don't call amd_iommu_unbind_pasid() here * because the IOMMU called us. */ - if (pdd) - pdd->bound = false; + pdd->bound = false; mutex_unlock(&p->mutex); } diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index b69ed97d447c..b9ba06176eb1 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -4732,7 +4732,7 @@ int drm_mode_connector_update_edid_property(struct drm_connector *connector, return 0; if (edid) - size = EDID_LENGTH + (1 + edid->extensions); + size = EDID_LENGTH * (1 + edid->extensions); ret = drm_property_replace_global_blob(dev, &connector->edid_blob_ptr, diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 8867818b1401..d65cbe6afb92 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -157,9 +157,7 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size) struct drm_i915_gem_object *obj; int ret; - obj = i915_gem_object_create_stolen(dev, size); - if (obj == NULL) - obj = i915_gem_alloc_object(dev, size); + obj = i915_gem_alloc_object(dev, size); if (obj == NULL) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 619dad1b2386..dcc6a88c560e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -516,17 +516,17 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, struct page *page_table; if (WARN_ON(!ppgtt->pdp.page_directory[pdpe])) - continue; + break; pd = ppgtt->pdp.page_directory[pdpe]; if (WARN_ON(!pd->page_table[pde])) - continue; + break; pt = pd->page_table[pde]; if (WARN_ON(!pt->page)) - continue; + break; page_table = pt->page; @@ -2546,6 +2546,8 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj; struct i915_address_space *vm; + struct i915_vma *vma; + bool flush; i915_check_and_clear_faults(dev); @@ -2555,16 +2557,23 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) dev_priv->gtt.base.total, true); + /* Cache flush objects bound into GGTT and rebind them. */ + vm = &dev_priv->gtt.base; list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { - struct i915_vma *vma = i915_gem_obj_to_vma(obj, - &dev_priv->gtt.base); - if (!vma) - continue; + flush = false; + list_for_each_entry(vma, &obj->vma_list, vma_link) { + if (vma->vm != vm) + continue; - i915_gem_clflush_object(obj, obj->pin_display); - WARN_ON(i915_vma_bind(vma, obj->cache_level, PIN_UPDATE)); - } + WARN_ON(i915_vma_bind(vma, obj->cache_level, + PIN_UPDATE)); + flush = true; + } + + if (flush) + i915_gem_clflush_object(obj, obj->pin_display); + } if (INTEL_INFO(dev)->gen >= 8) { if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 633bd1fcab69..d61e74a08f82 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -183,8 +183,18 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev) if (IS_GEN4(dev)) { uint32_t ddc2 = I915_READ(DCC2); - if (!(ddc2 & DCC2_MODIFIED_ENHANCED_DISABLE)) + if (!(ddc2 & DCC2_MODIFIED_ENHANCED_DISABLE)) { + /* Since the swizzling may vary within an + * object, we have no idea what the swizzling + * is for any page in particular. Thus we + * cannot migrate tiled pages using the GPU, + * nor can we tell userspace what the exact + * swizzling is for any object. + */ dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES; + swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; + swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; + } } if (dcc == 0xffffffff) { diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f5edb3504167..2030f602cbf8 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3491,6 +3491,7 @@ enum skl_disp_power_wells { #define BLM_POLARITY_PNV (1 << 0) /* pnv only */ #define BLC_HIST_CTL (dev_priv->info.display_mmio_offset + 0x61260) +#define BLM_HISTOGRAM_ENABLE (1 << 31) /* New registers for PCH-split platforms. Safe where new bits show up, the * register layout machtes with gen4 BLC_PWM_CTL[12]. */ diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index dcb1d25d6f05..647b1404c441 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4854,6 +4854,9 @@ static void intel_crtc_disable_planes(struct drm_crtc *crtc) struct intel_plane *intel_plane; int pipe = intel_crtc->pipe; + if (!intel_crtc->active) + return; + intel_crtc_wait_for_pending_flips(crtc); intel_pre_disable_primary(crtc); @@ -7887,7 +7890,7 @@ static void chv_crtc_clock_get(struct intel_crtc *crtc, int pipe = pipe_config->cpu_transcoder; enum dpio_channel port = vlv_pipe_to_channel(pipe); intel_clock_t clock; - u32 cmn_dw13, pll_dw0, pll_dw1, pll_dw2; + u32 cmn_dw13, pll_dw0, pll_dw1, pll_dw2, pll_dw3; int refclk = 100000; mutex_lock(&dev_priv->sb_lock); @@ -7895,10 +7898,13 @@ static void chv_crtc_clock_get(struct intel_crtc *crtc, pll_dw0 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW0(port)); pll_dw1 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW1(port)); pll_dw2 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW2(port)); + pll_dw3 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW3(port)); mutex_unlock(&dev_priv->sb_lock); clock.m1 = (pll_dw1 & 0x7) == DPIO_CHV_M1_DIV_BY_2 ? 2 : 0; - clock.m2 = ((pll_dw0 & 0xff) << 22) | (pll_dw2 & 0x3fffff); + clock.m2 = (pll_dw0 & 0xff) << 22; + if (pll_dw3 & DPIO_CHV_FRAC_DIV_EN) + clock.m2 |= pll_dw2 & 0x3fffff; clock.n = (pll_dw1 >> DPIO_CHV_N_DIV_SHIFT) & 0xf; clock.p1 = (cmn_dw13 >> DPIO_CHV_P1_DIV_SHIFT) & 0x7; clock.p2 = (cmn_dw13 >> DPIO_CHV_P2_DIV_SHIFT) & 0x1f; @@ -13303,6 +13309,16 @@ intel_check_primary_plane(struct drm_plane *plane, intel_crtc->atomic.wait_vblank = true; } + /* + * FIXME: Actually if we will still have any other plane enabled + * on the pipe we could let IPS enabled still, but for + * now lets consider that when we make primary invisible + * by setting DSPCNTR to 0 on update_primary_plane function + * IPS needs to be disable. + */ + if (!state->visible || !fb) + intel_crtc->atomic.disable_ips = true; + intel_crtc->atomic.fb_bits |= INTEL_FRONTBUFFER_PRIMARY(intel_crtc->pipe); @@ -13400,6 +13416,9 @@ static void intel_begin_crtc_commit(struct drm_crtc *crtc) if (intel_crtc->atomic.disable_fbc) intel_fbc_disable(dev); + if (intel_crtc->atomic.disable_ips) + hsw_disable_ips(intel_crtc); + if (intel_crtc->atomic.pre_disable_primary) intel_pre_disable_primary(crtc); diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 76afc62373d7..6e8faa253792 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1140,6 +1140,9 @@ skl_edp_set_pll_config(struct intel_crtc_state *pipe_config, int link_clock) static void hsw_dp_set_ddi_pll_sel(struct intel_crtc_state *pipe_config, int link_bw) { + memset(&pipe_config->dpll_hw_state, 0, + sizeof(pipe_config->dpll_hw_state)); + switch (link_bw) { case DP_LINK_BW_1_62: pipe_config->ddi_pll_sel = PORT_CLK_SEL_LCPLL_810; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 2afb31a46275..105928382e21 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -485,6 +485,7 @@ struct intel_crtc_atomic_commit { /* Sleepable operations to perform before commit */ bool wait_for_flips; bool disable_fbc; + bool disable_ips; bool pre_disable_primary; bool update_wm; unsigned disabled_planes; diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 7d83527f95f7..55aad2322e10 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -907,6 +907,14 @@ static void i9xx_enable_backlight(struct intel_connector *connector) /* XXX: combine this into above write? */ intel_panel_actually_set_backlight(connector, panel->backlight.level); + + /* + * Needed to enable backlight on some 855gm models. BLC_HIST_CTL is + * 855gm only, but checking for gen2 is safe, as 855gm is the only gen2 + * that has backlight. + */ + if (IS_GEN2(dev)) + I915_WRITE(BLC_HIST_CTL, BLM_HISTOGRAM_ENABLE); } static void i965_enable_backlight(struct intel_connector *connector) diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c index f2daad8c3d96..7841970de48d 100644 --- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c +++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c @@ -285,7 +285,7 @@ static int dmm_txn_commit(struct dmm_txn *txn, bool wait) if (wait) { if (!wait_for_completion_timeout(&engine->compl, - msecs_to_jiffies(1))) { + msecs_to_jiffies(100))) { dev_err(dmm->dev, "timed out waiting for done\n"); ret = -ETIMEDOUT; } diff --git a/drivers/gpu/drm/omapdrm/omap_drv.h b/drivers/gpu/drm/omapdrm/omap_drv.h index ae2df41f216f..12081e61d45a 100644 --- a/drivers/gpu/drm/omapdrm/omap_drv.h +++ b/drivers/gpu/drm/omapdrm/omap_drv.h @@ -177,7 +177,7 @@ struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev, struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos); struct drm_gem_object *omap_framebuffer_bo(struct drm_framebuffer *fb, int p); int omap_framebuffer_pin(struct drm_framebuffer *fb); -int omap_framebuffer_unpin(struct drm_framebuffer *fb); +void omap_framebuffer_unpin(struct drm_framebuffer *fb); void omap_framebuffer_update_scanout(struct drm_framebuffer *fb, struct omap_drm_window *win, struct omap_overlay_info *info); struct drm_connector *omap_framebuffer_get_next_connector( @@ -211,7 +211,7 @@ void omap_gem_dma_sync(struct drm_gem_object *obj, enum dma_data_direction dir); int omap_gem_get_paddr(struct drm_gem_object *obj, dma_addr_t *paddr, bool remap); -int omap_gem_put_paddr(struct drm_gem_object *obj); +void omap_gem_put_paddr(struct drm_gem_object *obj); int omap_gem_get_pages(struct drm_gem_object *obj, struct page ***pages, bool remap); int omap_gem_put_pages(struct drm_gem_object *obj); @@ -236,7 +236,7 @@ static inline int align_pitch(int pitch, int width, int bpp) /* PVR needs alignment to 8 pixels.. right now that is the most * restrictive stride requirement.. */ - return ALIGN(pitch, 8 * bytespp); + return roundup(pitch, 8 * bytespp); } /* map crtc to vblank mask */ diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c index 0b967e76df1a..51b1219af87f 100644 --- a/drivers/gpu/drm/omapdrm/omap_fb.c +++ b/drivers/gpu/drm/omapdrm/omap_fb.c @@ -287,10 +287,10 @@ fail: } /* unpin, no longer being scanned out: */ -int omap_framebuffer_unpin(struct drm_framebuffer *fb) +void omap_framebuffer_unpin(struct drm_framebuffer *fb) { struct omap_framebuffer *omap_fb = to_omap_framebuffer(fb); - int ret, i, n = drm_format_num_planes(fb->pixel_format); + int i, n = drm_format_num_planes(fb->pixel_format); mutex_lock(&omap_fb->lock); @@ -298,24 +298,16 @@ int omap_framebuffer_unpin(struct drm_framebuffer *fb) if (omap_fb->pin_count > 0) { mutex_unlock(&omap_fb->lock); - return 0; + return; } for (i = 0; i < n; i++) { struct plane *plane = &omap_fb->planes[i]; - ret = omap_gem_put_paddr(plane->bo); - if (ret) - goto fail; + omap_gem_put_paddr(plane->bo); plane->paddr = 0; } mutex_unlock(&omap_fb->lock); - - return 0; - -fail: - mutex_unlock(&omap_fb->lock); - return ret; } struct drm_gem_object *omap_framebuffer_bo(struct drm_framebuffer *fb, int p) diff --git a/drivers/gpu/drm/omapdrm/omap_fbdev.c b/drivers/gpu/drm/omapdrm/omap_fbdev.c index 23b5a84389e3..720d16bce7e8 100644 --- a/drivers/gpu/drm/omapdrm/omap_fbdev.c +++ b/drivers/gpu/drm/omapdrm/omap_fbdev.c @@ -135,7 +135,7 @@ static int omap_fbdev_create(struct drm_fb_helper *helper, fbdev->ywrap_enabled = priv->has_dmm && ywrap_enabled; if (fbdev->ywrap_enabled) { /* need to align pitch to page size if using DMM scrolling */ - mode_cmd.pitches[0] = ALIGN(mode_cmd.pitches[0], PAGE_SIZE); + mode_cmd.pitches[0] = PAGE_ALIGN(mode_cmd.pitches[0]); } /* allocate backing bo */ diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c index 2ab77801cf5f..7ed08fdc4c42 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem.c +++ b/drivers/gpu/drm/omapdrm/omap_gem.c @@ -808,10 +808,10 @@ fail: /* Release physical address, when DMA is no longer being performed.. this * could potentially unpin and unmap buffers from TILER */ -int omap_gem_put_paddr(struct drm_gem_object *obj) +void omap_gem_put_paddr(struct drm_gem_object *obj) { struct omap_gem_object *omap_obj = to_omap_bo(obj); - int ret = 0; + int ret; mutex_lock(&obj->dev->struct_mutex); if (omap_obj->paddr_cnt > 0) { @@ -821,7 +821,6 @@ int omap_gem_put_paddr(struct drm_gem_object *obj) if (ret) { dev_err(obj->dev->dev, "could not unpin pages: %d\n", ret); - goto fail; } ret = tiler_release(omap_obj->block); if (ret) { @@ -832,9 +831,8 @@ int omap_gem_put_paddr(struct drm_gem_object *obj) omap_obj->block = NULL; } } -fail: + mutex_unlock(&obj->dev->struct_mutex); - return ret; } /* Get rotated scanout address (only valid if already pinned), at the @@ -1378,11 +1376,7 @@ struct drm_gem_object *omap_gem_new(struct drm_device *dev, omap_obj = kzalloc(sizeof(*omap_obj), GFP_KERNEL); if (!omap_obj) - goto fail; - - spin_lock(&priv->list_lock); - list_add(&omap_obj->mm_list, &priv->obj_list); - spin_unlock(&priv->list_lock); + return NULL; obj = &omap_obj->base; @@ -1392,11 +1386,19 @@ struct drm_gem_object *omap_gem_new(struct drm_device *dev, */ omap_obj->vaddr = dma_alloc_writecombine(dev->dev, size, &omap_obj->paddr, GFP_KERNEL); - if (omap_obj->vaddr) - flags |= OMAP_BO_DMA; + if (!omap_obj->vaddr) { + kfree(omap_obj); + + return NULL; + } + flags |= OMAP_BO_DMA; } + spin_lock(&priv->list_lock); + list_add(&omap_obj->mm_list, &priv->obj_list); + spin_unlock(&priv->list_lock); + omap_obj->flags = flags; if (flags & OMAP_BO_TILED) { diff --git a/drivers/gpu/drm/omapdrm/omap_plane.c b/drivers/gpu/drm/omapdrm/omap_plane.c index cfa8276c4deb..098904696a5c 100644 --- a/drivers/gpu/drm/omapdrm/omap_plane.c +++ b/drivers/gpu/drm/omapdrm/omap_plane.c @@ -17,6 +17,7 @@ * this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_plane_helper.h> @@ -153,9 +154,34 @@ static void omap_plane_atomic_disable(struct drm_plane *plane, dispc_ovl_enable(omap_plane->id, false); } +static int omap_plane_atomic_check(struct drm_plane *plane, + struct drm_plane_state *state) +{ + struct drm_crtc_state *crtc_state; + + if (!state->crtc) + return 0; + + crtc_state = drm_atomic_get_crtc_state(state->state, state->crtc); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); + + if (state->crtc_x < 0 || state->crtc_y < 0) + return -EINVAL; + + if (state->crtc_x + state->crtc_w > crtc_state->adjusted_mode.hdisplay) + return -EINVAL; + + if (state->crtc_y + state->crtc_h > crtc_state->adjusted_mode.vdisplay) + return -EINVAL; + + return 0; +} + static const struct drm_plane_helper_funcs omap_plane_helper_funcs = { .prepare_fb = omap_plane_prepare_fb, .cleanup_fb = omap_plane_cleanup_fb, + .atomic_check = omap_plane_atomic_check, .atomic_update = omap_plane_atomic_update, .atomic_disable = omap_plane_atomic_disable, }; diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index b0688b0c8908..248953d2fdb7 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -4604,6 +4604,31 @@ void cik_compute_set_wptr(struct radeon_device *rdev, WDOORBELL32(ring->doorbell_index, ring->wptr); } +static void cik_compute_stop(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + u32 j, tmp; + + cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0); + /* Disable wptr polling. */ + tmp = RREG32(CP_PQ_WPTR_POLL_CNTL); + tmp &= ~WPTR_POLL_EN; + WREG32(CP_PQ_WPTR_POLL_CNTL, tmp); + /* Disable HQD. */ + if (RREG32(CP_HQD_ACTIVE) & 1) { + WREG32(CP_HQD_DEQUEUE_REQUEST, 1); + for (j = 0; j < rdev->usec_timeout; j++) { + if (!(RREG32(CP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + WREG32(CP_HQD_DEQUEUE_REQUEST, 0); + WREG32(CP_HQD_PQ_RPTR, 0); + WREG32(CP_HQD_PQ_WPTR, 0); + } + cik_srbm_select(rdev, 0, 0, 0, 0); +} + /** * cik_cp_compute_enable - enable/disable the compute CP MEs * @@ -4617,6 +4642,15 @@ static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable) if (enable) WREG32(CP_MEC_CNTL, 0); else { + /* + * To make hibernation reliable we need to clear compute ring + * configuration before halting the compute ring. + */ + mutex_lock(&rdev->srbm_mutex); + cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]); + cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]); + mutex_unlock(&rdev->srbm_mutex); + WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT)); rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false; rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; @@ -7930,23 +7964,27 @@ restart_ih: case 1: /* D1 vblank/vline */ switch (src_data) { case 0: /* D1 vblank */ - if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[0])) - radeon_crtc_handle_vblank(rdev, 0); - rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D1 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[0]) { + drm_handle_vblank(rdev->ddev, 0); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[0])) + radeon_crtc_handle_vblank(rdev, 0); + rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D1 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT; - DRM_DEBUG("IH: D1 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT; + DRM_DEBUG("IH: D1 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -7956,23 +7994,27 @@ restart_ih: case 2: /* D2 vblank/vline */ switch (src_data) { case 0: /* D2 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[1])) - radeon_crtc_handle_vblank(rdev, 1); - rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D2 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[1]) { + drm_handle_vblank(rdev->ddev, 1); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[1])) + radeon_crtc_handle_vblank(rdev, 1); + rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D2 vblank\n"); + break; case 1: /* D2 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; - DRM_DEBUG("IH: D2 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; + DRM_DEBUG("IH: D2 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -7982,23 +8024,27 @@ restart_ih: case 3: /* D3 vblank/vline */ switch (src_data) { case 0: /* D3 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[2]) { - drm_handle_vblank(rdev->ddev, 2); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[2])) - radeon_crtc_handle_vblank(rdev, 2); - rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D3 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[2]) { + drm_handle_vblank(rdev->ddev, 2); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[2])) + radeon_crtc_handle_vblank(rdev, 2); + rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D3 vblank\n"); + break; case 1: /* D3 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; - DRM_DEBUG("IH: D3 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; + DRM_DEBUG("IH: D3 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -8008,23 +8054,27 @@ restart_ih: case 4: /* D4 vblank/vline */ switch (src_data) { case 0: /* D4 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[3]) { - drm_handle_vblank(rdev->ddev, 3); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[3])) - radeon_crtc_handle_vblank(rdev, 3); - rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D4 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[3]) { + drm_handle_vblank(rdev->ddev, 3); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[3])) + radeon_crtc_handle_vblank(rdev, 3); + rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D4 vblank\n"); + break; case 1: /* D4 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; - DRM_DEBUG("IH: D4 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; + DRM_DEBUG("IH: D4 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -8034,23 +8084,27 @@ restart_ih: case 5: /* D5 vblank/vline */ switch (src_data) { case 0: /* D5 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[4]) { - drm_handle_vblank(rdev->ddev, 4); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[4])) - radeon_crtc_handle_vblank(rdev, 4); - rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D5 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[4]) { + drm_handle_vblank(rdev->ddev, 4); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[4])) + radeon_crtc_handle_vblank(rdev, 4); + rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D5 vblank\n"); + break; case 1: /* D5 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; - DRM_DEBUG("IH: D5 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; + DRM_DEBUG("IH: D5 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -8060,23 +8114,27 @@ restart_ih: case 6: /* D6 vblank/vline */ switch (src_data) { case 0: /* D6 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[5]) { - drm_handle_vblank(rdev->ddev, 5); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[5])) - radeon_crtc_handle_vblank(rdev, 5); - rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D6 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[5]) { + drm_handle_vblank(rdev->ddev, 5); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[5])) + radeon_crtc_handle_vblank(rdev, 5); + rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D6 vblank\n"); + break; case 1: /* D6 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; - DRM_DEBUG("IH: D6 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; + DRM_DEBUG("IH: D6 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -8096,88 +8154,112 @@ restart_ih: case 42: /* HPD hotplug */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD1\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD1\n"); + break; case 1: - if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD2\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD2\n"); + break; case 2: - if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD3\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD3\n"); + break; case 3: - if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD4\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD4\n"); + break; case 4: - if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD5\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD5\n"); + break; case 5: - if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD6\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD6\n"); + break; case 6: - if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 1\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 1\n"); + break; case 7: - if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 2\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 2\n"); + break; case 8: - if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 3\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 3\n"); + break; case 9: - if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 4\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 4\n"); + break; case 10: - if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 5\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 5\n"); + break; case 11: - if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 6\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 6\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index f86eb54e7763..d16f2eebd95e 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -268,6 +268,17 @@ static void cik_sdma_gfx_stop(struct radeon_device *rdev) } rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false; + + /* FIXME use something else than big hammer but after few days can not + * seem to find good combination so reset SDMA blocks as it seems we + * do not shut them down properly. This fix hibernation and does not + * affect suspend to ram. + */ + WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1); + (void)RREG32(SRBM_SOFT_RESET); + udelay(50); + WREG32(SRBM_SOFT_RESET, 0); + (void)RREG32(SRBM_SOFT_RESET); } /** diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 3a6d483a2c36..0acde1949c18 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -4924,7 +4924,7 @@ restart_ih: return IRQ_NONE; rptr = rdev->ih.rptr; - DRM_DEBUG("r600_irq_process start: rptr %d, wptr %d\n", rptr, wptr); + DRM_DEBUG("evergreen_irq_process start: rptr %d, wptr %d\n", rptr, wptr); /* Order reading of wptr vs. reading of IH ring data */ rmb(); @@ -4942,23 +4942,27 @@ restart_ih: case 1: /* D1 vblank/vline */ switch (src_data) { case 0: /* D1 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[0])) - radeon_crtc_handle_vblank(rdev, 0); - rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D1 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D1 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[0]) { + drm_handle_vblank(rdev->ddev, 0); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[0])) + radeon_crtc_handle_vblank(rdev, 0); + rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D1 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT; - DRM_DEBUG("IH: D1 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D1 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT; + DRM_DEBUG("IH: D1 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4968,23 +4972,27 @@ restart_ih: case 2: /* D2 vblank/vline */ switch (src_data) { case 0: /* D2 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[1])) - radeon_crtc_handle_vblank(rdev, 1); - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D2 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D2 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[1]) { + drm_handle_vblank(rdev->ddev, 1); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[1])) + radeon_crtc_handle_vblank(rdev, 1); + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D2 vblank\n"); + break; case 1: /* D2 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; - DRM_DEBUG("IH: D2 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D2 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; + DRM_DEBUG("IH: D2 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4994,23 +5002,27 @@ restart_ih: case 3: /* D3 vblank/vline */ switch (src_data) { case 0: /* D3 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[2]) { - drm_handle_vblank(rdev->ddev, 2); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[2])) - radeon_crtc_handle_vblank(rdev, 2); - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D3 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D3 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[2]) { + drm_handle_vblank(rdev->ddev, 2); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[2])) + radeon_crtc_handle_vblank(rdev, 2); + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D3 vblank\n"); + break; case 1: /* D3 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; - DRM_DEBUG("IH: D3 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D3 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; + DRM_DEBUG("IH: D3 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -5020,23 +5032,27 @@ restart_ih: case 4: /* D4 vblank/vline */ switch (src_data) { case 0: /* D4 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[3]) { - drm_handle_vblank(rdev->ddev, 3); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[3])) - radeon_crtc_handle_vblank(rdev, 3); - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D4 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D4 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[3]) { + drm_handle_vblank(rdev->ddev, 3); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[3])) + radeon_crtc_handle_vblank(rdev, 3); + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D4 vblank\n"); + break; case 1: /* D4 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; - DRM_DEBUG("IH: D4 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D4 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; + DRM_DEBUG("IH: D4 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -5046,23 +5062,27 @@ restart_ih: case 5: /* D5 vblank/vline */ switch (src_data) { case 0: /* D5 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[4]) { - drm_handle_vblank(rdev->ddev, 4); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[4])) - radeon_crtc_handle_vblank(rdev, 4); - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D5 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D5 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[4]) { + drm_handle_vblank(rdev->ddev, 4); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[4])) + radeon_crtc_handle_vblank(rdev, 4); + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D5 vblank\n"); + break; case 1: /* D5 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; - DRM_DEBUG("IH: D5 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D5 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; + DRM_DEBUG("IH: D5 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -5072,23 +5092,27 @@ restart_ih: case 6: /* D6 vblank/vline */ switch (src_data) { case 0: /* D6 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[5]) { - drm_handle_vblank(rdev->ddev, 5); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[5])) - radeon_crtc_handle_vblank(rdev, 5); - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D6 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D6 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[5]) { + drm_handle_vblank(rdev->ddev, 5); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[5])) + radeon_crtc_handle_vblank(rdev, 5); + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D6 vblank\n"); + break; case 1: /* D6 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; - DRM_DEBUG("IH: D6 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D6 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; + DRM_DEBUG("IH: D6 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -5108,88 +5132,100 @@ restart_ih: case 42: /* HPD hotplug */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD1\n"); break; case 1: - if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD2\n"); break; case 2: - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD3\n"); break; case 3: - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD4\n"); break; case 4: - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD5\n"); break; case 5: - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD6\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD6\n"); break; case 6: - if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 1\n"); break; case 7: - if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 2\n"); break; case 8: - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 3\n"); break; case 9: - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 4\n"); break; case 10: - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 5\n"); break; case 11: - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 6\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 6\n"); break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -5199,46 +5235,52 @@ restart_ih: case 44: /* hdmi */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.evergreen.afmt_status1 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status1 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI0\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status1 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status1 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI0\n"); break; case 1: - if (rdev->irq.stat_regs.evergreen.afmt_status2 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status2 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status2 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status2 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI1\n"); break; case 2: - if (rdev->irq.stat_regs.evergreen.afmt_status3 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status3 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status3 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status3 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI2\n"); break; case 3: - if (rdev->irq.stat_regs.evergreen.afmt_status4 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status4 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status4 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status4 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI3\n"); break; case 4: - if (rdev->irq.stat_regs.evergreen.afmt_status5 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status5 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status5 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status5 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI4\n"); break; case 5: - if (rdev->irq.stat_regs.evergreen.afmt_status6 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status6 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status6 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status6 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI5\n"); break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data); diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 8e5aeeb058a5..158872eb78e4 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -2162,18 +2162,20 @@ static int cayman_startup(struct radeon_device *rdev) DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); } - ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; - if (ring->ring_size) - r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0); + if (rdev->family == CHIP_ARUBA) { + ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; + if (ring->ring_size) + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0); - ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; - if (ring->ring_size) - r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0); + ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; + if (ring->ring_size) + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0); - if (!r) - r = vce_v1_0_init(rdev); - else if (r != -ENOENT) - DRM_ERROR("radeon: failed initializing VCE (%d).\n", r); + if (!r) + r = vce_v1_0_init(rdev); + if (r) + DRM_ERROR("radeon: failed initializing VCE (%d).\n", r); + } r = radeon_ib_pool_init(rdev); if (r) { @@ -2396,7 +2398,8 @@ void cayman_fini(struct radeon_device *rdev) radeon_irq_kms_fini(rdev); uvd_v1_0_fini(rdev); radeon_uvd_fini(rdev); - radeon_vce_fini(rdev); + if (rdev->family == CHIP_ARUBA) + radeon_vce_fini(rdev); cayman_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); radeon_gem_fini(rdev); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 35dafd77a639..4ea5b10ff5f4 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -4086,23 +4086,27 @@ restart_ih: case 1: /* D1 vblank/vline */ switch (src_data) { case 0: /* D1 vblank */ - if (rdev->irq.stat_regs.r600.disp_int & LB_D1_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[0])) - radeon_crtc_handle_vblank(rdev, 0); - rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D1 vblank\n"); + if (!(rdev->irq.stat_regs.r600.disp_int & LB_D1_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D1 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[0]) { + drm_handle_vblank(rdev->ddev, 0); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[0])) + radeon_crtc_handle_vblank(rdev, 0); + rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D1 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.r600.disp_int & LB_D1_VLINE_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VLINE_INTERRUPT; - DRM_DEBUG("IH: D1 vline\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int & LB_D1_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D1 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VLINE_INTERRUPT; + DRM_DEBUG("IH: D1 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4112,23 +4116,27 @@ restart_ih: case 5: /* D2 vblank/vline */ switch (src_data) { case 0: /* D2 vblank */ - if (rdev->irq.stat_regs.r600.disp_int & LB_D2_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[1])) - radeon_crtc_handle_vblank(rdev, 1); - rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D2 vblank\n"); + if (!(rdev->irq.stat_regs.r600.disp_int & LB_D2_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D2 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[1]) { + drm_handle_vblank(rdev->ddev, 1); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[1])) + radeon_crtc_handle_vblank(rdev, 1); + rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D2 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.r600.disp_int & LB_D2_VLINE_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VLINE_INTERRUPT; - DRM_DEBUG("IH: D2 vline\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int & LB_D2_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D2 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VLINE_INTERRUPT; + DRM_DEBUG("IH: D2 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4148,46 +4156,53 @@ restart_ih: case 19: /* HPD/DAC hotplug */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.r600.disp_int & DC_HPD1_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD1_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD1\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int & DC_HPD1_INTERRUPT)) + DRM_DEBUG("IH: HPD1 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD1_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD1\n"); break; case 1: - if (rdev->irq.stat_regs.r600.disp_int & DC_HPD2_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD2_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD2\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int & DC_HPD2_INTERRUPT)) + DRM_DEBUG("IH: HPD2 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD2_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD2\n"); break; case 4: - if (rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD3_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD3_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD3\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD3_INTERRUPT)) + DRM_DEBUG("IH: HPD3 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD3_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD3\n"); break; case 5: - if (rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD4_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD4_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD4\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD4_INTERRUPT)) + DRM_DEBUG("IH: HPD4 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD4_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD4\n"); break; case 10: - if (rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD5_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD5_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD5\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD5_INTERRUPT)) + DRM_DEBUG("IH: HPD5 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD5_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD5\n"); break; case 12: - if (rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD6_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD6_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD6\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD6_INTERRUPT)) + DRM_DEBUG("IH: HPD6 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD6_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD6\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4197,18 +4212,22 @@ restart_ih: case 21: /* hdmi */ switch (src_data) { case 4: - if (rdev->irq.stat_regs.r600.hdmi0_status & HDMI0_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.r600.hdmi0_status &= ~HDMI0_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI0\n"); - } + if (!(rdev->irq.stat_regs.r600.hdmi0_status & HDMI0_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: HDMI0 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.hdmi0_status &= ~HDMI0_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI0\n"); + break; case 5: - if (rdev->irq.stat_regs.r600.hdmi1_status & HDMI0_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.r600.hdmi1_status &= ~HDMI0_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI1\n"); - } + if (!(rdev->irq.stat_regs.r600.hdmi1_status & HDMI0_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: HDMI1 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.hdmi1_status &= ~HDMI0_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI1\n"); + break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data); diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c index 09e3f39925fa..98f9adaccc3d 100644 --- a/drivers/gpu/drm/radeon/r600_cp.c +++ b/drivers/gpu/drm/radeon/r600_cp.c @@ -2483,7 +2483,7 @@ int r600_cp_dispatch_texture(struct drm_device *dev, struct drm_buf *buf; u32 *buffer; const u8 __user *data; - int size, pass_size; + unsigned int size, pass_size; u64 src_offset, dst_offset; if (!radeon_check_offset(dev_priv, tex->offset)) { diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index c89215275053..fa719c53449b 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -469,22 +469,22 @@ void radeon_audio_detect(struct drm_connector *connector, dig = radeon_encoder->enc_priv; if (status == connector_status_connected) { - struct radeon_connector *radeon_connector; - int sink_type; - if (!drm_detect_monitor_audio(radeon_connector_edid(connector))) { radeon_encoder->audio = NULL; return; } - radeon_connector = to_radeon_connector(connector); - sink_type = radeon_dp_getsinktype(radeon_connector); + if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) { + struct radeon_connector *radeon_connector = to_radeon_connector(connector); - if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort && - sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) - radeon_encoder->audio = rdev->audio.dp_funcs; - else + if (radeon_dp_getsinktype(radeon_connector) == + CONNECTOR_OBJECT_ID_DISPLAYPORT) + radeon_encoder->audio = rdev->audio.dp_funcs; + else + radeon_encoder->audio = rdev->audio.hdmi_funcs; + } else { radeon_encoder->audio = rdev->audio.hdmi_funcs; + } dig->afmt->pin = radeon_audio_get_pin(connector->encoder); radeon_audio_enable(rdev, dig->afmt->pin, 0xf); diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c index 45e54060ee97..afaf346bd50e 100644 --- a/drivers/gpu/drm/radeon/radeon_cursor.c +++ b/drivers/gpu/drm/radeon/radeon_cursor.c @@ -91,15 +91,34 @@ static void radeon_show_cursor(struct drm_crtc *crtc) struct radeon_device *rdev = crtc->dev->dev_private; if (ASIC_IS_DCE4(rdev)) { + WREG32(EVERGREEN_CUR_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset, + upper_32_bits(radeon_crtc->cursor_addr)); + WREG32(EVERGREEN_CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, + lower_32_bits(radeon_crtc->cursor_addr)); WREG32(RADEON_MM_INDEX, EVERGREEN_CUR_CONTROL + radeon_crtc->crtc_offset); WREG32(RADEON_MM_DATA, EVERGREEN_CURSOR_EN | EVERGREEN_CURSOR_MODE(EVERGREEN_CURSOR_24_8_PRE_MULT) | EVERGREEN_CURSOR_URGENT_CONTROL(EVERGREEN_CURSOR_URGENT_1_2)); } else if (ASIC_IS_AVIVO(rdev)) { + if (rdev->family >= CHIP_RV770) { + if (radeon_crtc->crtc_id) + WREG32(R700_D2CUR_SURFACE_ADDRESS_HIGH, + upper_32_bits(radeon_crtc->cursor_addr)); + else + WREG32(R700_D1CUR_SURFACE_ADDRESS_HIGH, + upper_32_bits(radeon_crtc->cursor_addr)); + } + + WREG32(AVIVO_D1CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, + lower_32_bits(radeon_crtc->cursor_addr)); WREG32(RADEON_MM_INDEX, AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset); WREG32(RADEON_MM_DATA, AVIVO_D1CURSOR_EN | (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT)); } else { + /* offset is from DISP(2)_BASE_ADDRESS */ + WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, + radeon_crtc->cursor_addr - radeon_crtc->legacy_display_base_addr); + switch (radeon_crtc->crtc_id) { case 0: WREG32(RADEON_MM_INDEX, RADEON_CRTC_GEN_CNTL); @@ -205,8 +224,9 @@ static int radeon_cursor_move_locked(struct drm_crtc *crtc, int x, int y) | (x << 16) | y)); /* offset is from DISP(2)_BASE_ADDRESS */ - WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, (radeon_crtc->legacy_cursor_offset + - (yorigin * 256))); + WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, + radeon_crtc->cursor_addr - radeon_crtc->legacy_display_base_addr + + yorigin * 256); } radeon_crtc->cursor_x = x; @@ -227,53 +247,6 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc, return ret; } -static int radeon_set_cursor(struct drm_crtc *crtc, struct drm_gem_object *obj) -{ - struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); - struct radeon_device *rdev = crtc->dev->dev_private; - struct radeon_bo *robj = gem_to_radeon_bo(obj); - uint64_t gpu_addr; - int ret; - - ret = radeon_bo_reserve(robj, false); - if (unlikely(ret != 0)) - goto fail; - /* Only 27 bit offset for legacy cursor */ - ret = radeon_bo_pin_restricted(robj, RADEON_GEM_DOMAIN_VRAM, - ASIC_IS_AVIVO(rdev) ? 0 : 1 << 27, - &gpu_addr); - radeon_bo_unreserve(robj); - if (ret) - goto fail; - - if (ASIC_IS_DCE4(rdev)) { - WREG32(EVERGREEN_CUR_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset, - upper_32_bits(gpu_addr)); - WREG32(EVERGREEN_CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, - gpu_addr & 0xffffffff); - } else if (ASIC_IS_AVIVO(rdev)) { - if (rdev->family >= CHIP_RV770) { - if (radeon_crtc->crtc_id) - WREG32(R700_D2CUR_SURFACE_ADDRESS_HIGH, upper_32_bits(gpu_addr)); - else - WREG32(R700_D1CUR_SURFACE_ADDRESS_HIGH, upper_32_bits(gpu_addr)); - } - WREG32(AVIVO_D1CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, - gpu_addr & 0xffffffff); - } else { - radeon_crtc->legacy_cursor_offset = gpu_addr - radeon_crtc->legacy_display_base_addr; - /* offset is from DISP(2)_BASE_ADDRESS */ - WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, radeon_crtc->legacy_cursor_offset); - } - - return 0; - -fail: - drm_gem_object_unreference_unlocked(obj); - - return ret; -} - int radeon_crtc_cursor_set2(struct drm_crtc *crtc, struct drm_file *file_priv, uint32_t handle, @@ -283,7 +256,9 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, int32_t hot_y) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); + struct radeon_device *rdev = crtc->dev->dev_private; struct drm_gem_object *obj; + struct radeon_bo *robj; int ret; if (!handle) { @@ -305,6 +280,23 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, return -ENOENT; } + robj = gem_to_radeon_bo(obj); + ret = radeon_bo_reserve(robj, false); + if (ret != 0) { + drm_gem_object_unreference_unlocked(obj); + return ret; + } + /* Only 27 bit offset for legacy cursor */ + ret = radeon_bo_pin_restricted(robj, RADEON_GEM_DOMAIN_VRAM, + ASIC_IS_AVIVO(rdev) ? 0 : 1 << 27, + &radeon_crtc->cursor_addr); + radeon_bo_unreserve(robj); + if (ret) { + DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret); + drm_gem_object_unreference_unlocked(obj); + return ret; + } + radeon_crtc->cursor_width = width; radeon_crtc->cursor_height = height; @@ -323,13 +315,7 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, radeon_crtc->cursor_hot_y = hot_y; } - ret = radeon_set_cursor(crtc, obj); - - if (ret) - DRM_ERROR("radeon_set_cursor returned %d, not changing cursor\n", - ret); - else - radeon_show_cursor(crtc); + radeon_show_cursor(crtc); radeon_lock_cursor(crtc, false); @@ -341,8 +327,7 @@ unpin: radeon_bo_unpin(robj); radeon_bo_unreserve(robj); } - if (radeon_crtc->cursor_bo != obj) - drm_gem_object_unreference_unlocked(radeon_crtc->cursor_bo); + drm_gem_object_unreference_unlocked(radeon_crtc->cursor_bo); } radeon_crtc->cursor_bo = obj; @@ -360,7 +345,6 @@ unpin: void radeon_cursor_reset(struct drm_crtc *crtc) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); - int ret; if (radeon_crtc->cursor_bo) { radeon_lock_cursor(crtc, true); @@ -368,12 +352,7 @@ void radeon_cursor_reset(struct drm_crtc *crtc) radeon_cursor_move_locked(crtc, radeon_crtc->cursor_x, radeon_crtc->cursor_y); - ret = radeon_set_cursor(crtc, radeon_crtc->cursor_bo); - if (ret) - DRM_ERROR("radeon_set_cursor returned %d, not showing " - "cursor\n", ret); - else - radeon_show_cursor(crtc); + radeon_show_cursor(crtc); radeon_lock_cursor(crtc, false); } diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 2593b1168bd6..d8319dae8358 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1080,6 +1080,22 @@ static bool radeon_check_pot_argument(int arg) } /** + * Determine a sensible default GART size according to ASIC family. + * + * @family ASIC family name + */ +static int radeon_gart_size_auto(enum radeon_family family) +{ + /* default to a larger gart size on newer asics */ + if (family >= CHIP_TAHITI) + return 2048; + else if (family >= CHIP_RV770) + return 1024; + else + return 512; +} + +/** * radeon_check_arguments - validate module params * * @rdev: radeon_device pointer @@ -1097,27 +1113,17 @@ static void radeon_check_arguments(struct radeon_device *rdev) } if (radeon_gart_size == -1) { - /* default to a larger gart size on newer asics */ - if (rdev->family >= CHIP_RV770) - radeon_gart_size = 1024; - else - radeon_gart_size = 512; + radeon_gart_size = radeon_gart_size_auto(rdev->family); } /* gtt size must be power of two and greater or equal to 32M */ if (radeon_gart_size < 32) { dev_warn(rdev->dev, "gart size (%d) too small\n", radeon_gart_size); - if (rdev->family >= CHIP_RV770) - radeon_gart_size = 1024; - else - radeon_gart_size = 512; + radeon_gart_size = radeon_gart_size_auto(rdev->family); } else if (!radeon_check_pot_argument(radeon_gart_size)) { dev_warn(rdev->dev, "gart size (%d) must be a power of 2\n", radeon_gart_size); - if (rdev->family >= CHIP_RV770) - radeon_gart_size = 1024; - else - radeon_gart_size = 512; + radeon_gart_size = radeon_gart_size_auto(rdev->family); } rdev->mc.gtt_size = (uint64_t)radeon_gart_size << 20; @@ -1572,11 +1578,21 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon) drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); } - /* unpin the front buffers */ + /* unpin the front buffers and cursors */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct radeon_framebuffer *rfb = to_radeon_framebuffer(crtc->primary->fb); struct radeon_bo *robj; + if (radeon_crtc->cursor_bo) { + struct radeon_bo *robj = gem_to_radeon_bo(radeon_crtc->cursor_bo); + r = radeon_bo_reserve(robj, false); + if (r == 0) { + radeon_bo_unpin(robj); + radeon_bo_unreserve(robj); + } + } + if (rfb == NULL || rfb->obj == NULL) { continue; } @@ -1639,6 +1655,7 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon) { struct drm_connector *connector; struct radeon_device *rdev = dev->dev_private; + struct drm_crtc *crtc; int r; if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) @@ -1678,6 +1695,27 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon) radeon_restore_bios_scratch_regs(rdev); + /* pin cursors */ + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); + + if (radeon_crtc->cursor_bo) { + struct radeon_bo *robj = gem_to_radeon_bo(radeon_crtc->cursor_bo); + r = radeon_bo_reserve(robj, false); + if (r == 0) { + /* Only 27 bit offset for legacy cursor */ + r = radeon_bo_pin_restricted(robj, + RADEON_GEM_DOMAIN_VRAM, + ASIC_IS_AVIVO(rdev) ? + 0 : 1 << 27, + &radeon_crtc->cursor_addr); + if (r != 0) + DRM_ERROR("Failed to pin cursor BO (%d)\n", r); + radeon_bo_unreserve(robj); + } + } + } + /* init dig PHYs, disp eng pll */ if (rdev->is_atom_bios) { radeon_atom_encoder_init(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index ac3c1310b953..013ec7106e55 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -428,7 +428,6 @@ int radeon_gem_mmap_ioctl(struct drm_device *dev, void *data, int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { - struct radeon_device *rdev = dev->dev_private; struct drm_radeon_gem_busy *args = data; struct drm_gem_object *gobj; struct radeon_bo *robj; @@ -440,10 +439,16 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, return -ENOENT; } robj = gem_to_radeon_bo(gobj); - r = radeon_bo_wait(robj, &cur_placement, true); + + r = reservation_object_test_signaled_rcu(robj->tbo.resv, true); + if (r == 0) + r = -EBUSY; + else + r = 0; + + cur_placement = ACCESS_ONCE(robj->tbo.mem.mem_type); args->domain = radeon_mem_type_to_domain(cur_placement); drm_gem_object_unreference_unlocked(gobj); - r = radeon_gem_handle_lockup(rdev, r); return r; } @@ -471,6 +476,7 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, r = ret; /* Flush HDP cache via MMIO if necessary */ + cur_placement = ACCESS_ONCE(robj->tbo.mem.mem_type); if (rdev->asic->mmio_hdp_flush && radeon_mem_type_to_domain(cur_placement) == RADEON_GEM_DOMAIN_VRAM) robj->rdev->asic->mmio_hdp_flush(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index 6de5459316b5..07909d817381 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -343,7 +343,6 @@ struct radeon_crtc { int max_cursor_width; int max_cursor_height; uint32_t legacy_display_base_addr; - uint32_t legacy_cursor_offset; enum radeon_rmx_type rmx_type; u8 h_border; u8 v_border; diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index edafd3c2b170..06ac59fe332a 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -719,7 +719,7 @@ static int radeon_ttm_tt_populate(struct ttm_tt *ttm) return 0; if (gtt && gtt->userptr) { - ttm->sg = kcalloc(1, sizeof(struct sg_table), GFP_KERNEL); + ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL); if (!ttm->sg) return -ENOMEM; diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 3662157c2b15..48d97c040f49 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -493,38 +493,35 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, } if (bo_va->it.start || bo_va->it.last) { - spin_lock(&vm->status_lock); - if (list_empty(&bo_va->vm_status)) { - /* add a clone of the bo_va to clear the old address */ - struct radeon_bo_va *tmp; - spin_unlock(&vm->status_lock); - tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); - if (!tmp) { - mutex_unlock(&vm->mutex); - r = -ENOMEM; - goto error_unreserve; - } - tmp->it.start = bo_va->it.start; - tmp->it.last = bo_va->it.last; - tmp->vm = vm; - tmp->bo = radeon_bo_ref(bo_va->bo); - spin_lock(&vm->status_lock); - list_add(&tmp->vm_status, &vm->freed); + /* add a clone of the bo_va to clear the old address */ + struct radeon_bo_va *tmp; + tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); + if (!tmp) { + mutex_unlock(&vm->mutex); + r = -ENOMEM; + goto error_unreserve; } - spin_unlock(&vm->status_lock); + tmp->it.start = bo_va->it.start; + tmp->it.last = bo_va->it.last; + tmp->vm = vm; + tmp->bo = radeon_bo_ref(bo_va->bo); interval_tree_remove(&bo_va->it, &vm->va); + spin_lock(&vm->status_lock); bo_va->it.start = 0; bo_va->it.last = 0; + list_del_init(&bo_va->vm_status); + list_add(&tmp->vm_status, &vm->freed); + spin_unlock(&vm->status_lock); } if (soffset || eoffset) { + spin_lock(&vm->status_lock); bo_va->it.start = soffset; bo_va->it.last = eoffset - 1; - interval_tree_insert(&bo_va->it, &vm->va); - spin_lock(&vm->status_lock); list_add(&bo_va->vm_status, &vm->cleared); spin_unlock(&vm->status_lock); + interval_tree_insert(&bo_va->it, &vm->va); } bo_va->flags = flags; @@ -1129,12 +1126,12 @@ void radeon_vm_bo_rmv(struct radeon_device *rdev, interval_tree_remove(&bo_va->it, &vm->va); spin_lock(&vm->status_lock); - if (list_empty(&bo_va->vm_status)) { + list_del(&bo_va->vm_status); + if (bo_va->it.start || bo_va->it.last) { bo_va->bo = radeon_bo_ref(bo_va->bo); list_add(&bo_va->vm_status, &vm->freed); } else { radeon_fence_unref(&bo_va->last_pt_update); - list_del(&bo_va->vm_status); kfree(bo_va); } spin_unlock(&vm->status_lock); @@ -1158,7 +1155,8 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev, list_for_each_entry(bo_va, &bo->va, bo_list) { spin_lock(&bo_va->vm->status_lock); - if (list_empty(&bo_va->vm_status)) + if (list_empty(&bo_va->vm_status) && + (bo_va->it.start || bo_va->it.last)) list_add(&bo_va->vm_status, &bo_va->vm->invalidated); spin_unlock(&bo_va->vm->status_lock); } diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 26388b5dd6ed..07037e32dea3 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6466,23 +6466,27 @@ restart_ih: case 1: /* D1 vblank/vline */ switch (src_data) { case 0: /* D1 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[0])) - radeon_crtc_handle_vblank(rdev, 0); - rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D1 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[0]) { + drm_handle_vblank(rdev->ddev, 0); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[0])) + radeon_crtc_handle_vblank(rdev, 0); + rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D1 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT; - DRM_DEBUG("IH: D1 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT; + DRM_DEBUG("IH: D1 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6492,23 +6496,27 @@ restart_ih: case 2: /* D2 vblank/vline */ switch (src_data) { case 0: /* D2 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[1])) - radeon_crtc_handle_vblank(rdev, 1); - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D2 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[1]) { + drm_handle_vblank(rdev->ddev, 1); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[1])) + radeon_crtc_handle_vblank(rdev, 1); + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D2 vblank\n"); + break; case 1: /* D2 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; - DRM_DEBUG("IH: D2 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; + DRM_DEBUG("IH: D2 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6518,23 +6526,27 @@ restart_ih: case 3: /* D3 vblank/vline */ switch (src_data) { case 0: /* D3 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[2]) { - drm_handle_vblank(rdev->ddev, 2); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[2])) - radeon_crtc_handle_vblank(rdev, 2); - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D3 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[2]) { + drm_handle_vblank(rdev->ddev, 2); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[2])) + radeon_crtc_handle_vblank(rdev, 2); + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D3 vblank\n"); + break; case 1: /* D3 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; - DRM_DEBUG("IH: D3 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; + DRM_DEBUG("IH: D3 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6544,23 +6556,27 @@ restart_ih: case 4: /* D4 vblank/vline */ switch (src_data) { case 0: /* D4 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[3]) { - drm_handle_vblank(rdev->ddev, 3); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[3])) - radeon_crtc_handle_vblank(rdev, 3); - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D4 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[3]) { + drm_handle_vblank(rdev->ddev, 3); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[3])) + radeon_crtc_handle_vblank(rdev, 3); + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D4 vblank\n"); + break; case 1: /* D4 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; - DRM_DEBUG("IH: D4 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; + DRM_DEBUG("IH: D4 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6570,23 +6586,27 @@ restart_ih: case 5: /* D5 vblank/vline */ switch (src_data) { case 0: /* D5 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[4]) { - drm_handle_vblank(rdev->ddev, 4); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[4])) - radeon_crtc_handle_vblank(rdev, 4); - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D5 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[4]) { + drm_handle_vblank(rdev->ddev, 4); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[4])) + radeon_crtc_handle_vblank(rdev, 4); + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D5 vblank\n"); + break; case 1: /* D5 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; - DRM_DEBUG("IH: D5 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; + DRM_DEBUG("IH: D5 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6596,23 +6616,27 @@ restart_ih: case 6: /* D6 vblank/vline */ switch (src_data) { case 0: /* D6 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[5]) { - drm_handle_vblank(rdev->ddev, 5); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[5])) - radeon_crtc_handle_vblank(rdev, 5); - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D6 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[5]) { + drm_handle_vblank(rdev->ddev, 5); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[5])) + radeon_crtc_handle_vblank(rdev, 5); + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D6 vblank\n"); + break; case 1: /* D6 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; - DRM_DEBUG("IH: D6 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; + DRM_DEBUG("IH: D6 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6632,88 +6656,112 @@ restart_ih: case 42: /* HPD hotplug */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD1\n"); + break; case 1: - if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD2\n"); + break; case 2: - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD3\n"); + break; case 3: - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD4\n"); + break; case 4: - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD5\n"); + break; case 5: - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD6\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD6\n"); + break; case 6: - if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 1\n"); + break; case 7: - if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 2\n"); + break; case 8: - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 3\n"); + break; case 9: - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 4\n"); + break; case 10: - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 5\n"); + break; case 11: - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 6\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 6\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c index 2a808822af21..37c16afe007a 100644 --- a/drivers/hwmon/dell-smm-hwmon.c +++ b/drivers/hwmon/dell-smm-hwmon.c @@ -777,7 +777,7 @@ static int __init i8k_init_hwmon(void) if (err >= 0) i8k_hwmon_flags |= I8K_HWMON_HAVE_FAN2; - i8k_hwmon_dev = hwmon_device_register_with_groups(NULL, "dell-smm", + i8k_hwmon_dev = hwmon_device_register_with_groups(NULL, "dell_smm", NULL, i8k_groups); if (IS_ERR(i8k_hwmon_dev)) { err = PTR_ERR(i8k_hwmon_dev); diff --git a/drivers/hwmon/mcp3021.c b/drivers/hwmon/mcp3021.c index d219c06a857b..972444a14cca 100644 --- a/drivers/hwmon/mcp3021.c +++ b/drivers/hwmon/mcp3021.c @@ -31,14 +31,11 @@ /* output format */ #define MCP3021_SAR_SHIFT 2 #define MCP3021_SAR_MASK 0x3ff - #define MCP3021_OUTPUT_RES 10 /* 10-bit resolution */ -#define MCP3021_OUTPUT_SCALE 4 #define MCP3221_SAR_SHIFT 0 #define MCP3221_SAR_MASK 0xfff #define MCP3221_OUTPUT_RES 12 /* 12-bit resolution */ -#define MCP3221_OUTPUT_SCALE 1 enum chips { mcp3021, @@ -54,7 +51,6 @@ struct mcp3021_data { u16 sar_shift; u16 sar_mask; u8 output_res; - u8 output_scale; }; static int mcp3021_read16(struct i2c_client *client) @@ -84,13 +80,7 @@ static int mcp3021_read16(struct i2c_client *client) static inline u16 volts_from_reg(struct mcp3021_data *data, u16 val) { - if (val == 0) - return 0; - - val = val * data->output_scale - data->output_scale / 2; - - return val * DIV_ROUND_CLOSEST(data->vdd, - (1 << data->output_res) * data->output_scale); + return DIV_ROUND_CLOSEST(data->vdd * val, 1 << data->output_res); } static ssize_t show_in_input(struct device *dev, struct device_attribute *attr, @@ -132,14 +122,12 @@ static int mcp3021_probe(struct i2c_client *client, data->sar_shift = MCP3021_SAR_SHIFT; data->sar_mask = MCP3021_SAR_MASK; data->output_res = MCP3021_OUTPUT_RES; - data->output_scale = MCP3021_OUTPUT_SCALE; break; case mcp3221: data->sar_shift = MCP3221_SAR_SHIFT; data->sar_mask = MCP3221_SAR_MASK; data->output_res = MCP3221_OUTPUT_RES; - data->output_scale = MCP3221_OUTPUT_SCALE; break; } diff --git a/drivers/hwmon/nct7802.c b/drivers/hwmon/nct7802.c index 55765790907b..28fcb2e246d5 100644 --- a/drivers/hwmon/nct7802.c +++ b/drivers/hwmon/nct7802.c @@ -547,7 +547,7 @@ static umode_t nct7802_temp_is_visible(struct kobject *kobj, if (index >= 9 && index < 18 && (reg & 0x0c) != 0x04 && (reg & 0x0c) != 0x08) /* RD2 */ return 0; - if (index >= 18 && index < 27 && (reg & 0x30) != 0x10) /* RD3 */ + if (index >= 18 && index < 27 && (reg & 0x30) != 0x20) /* RD3 */ return 0; if (index >= 27 && index < 35) /* local */ return attr->mode; diff --git a/drivers/hwmon/w83627ehf.c b/drivers/hwmon/w83627ehf.c index b10353b31806..697007afb99c 100644 --- a/drivers/hwmon/w83627ehf.c +++ b/drivers/hwmon/w83627ehf.c @@ -1937,27 +1937,11 @@ static inline void w83627ehf_init_device(struct w83627ehf_data *data, static void w82627ehf_swap_tempreg(struct w83627ehf_data *data, int r1, int r2) { - u16 tmp; - - tmp = data->temp_src[r1]; - data->temp_src[r1] = data->temp_src[r2]; - data->temp_src[r2] = tmp; - - tmp = data->reg_temp[r1]; - data->reg_temp[r1] = data->reg_temp[r2]; - data->reg_temp[r2] = tmp; - - tmp = data->reg_temp_over[r1]; - data->reg_temp_over[r1] = data->reg_temp_over[r2]; - data->reg_temp_over[r2] = tmp; - - tmp = data->reg_temp_hyst[r1]; - data->reg_temp_hyst[r1] = data->reg_temp_hyst[r2]; - data->reg_temp_hyst[r2] = tmp; - - tmp = data->reg_temp_config[r1]; - data->reg_temp_config[r1] = data->reg_temp_config[r2]; - data->reg_temp_config[r2] = tmp; + swap(data->temp_src[r1], data->temp_src[r2]); + swap(data->reg_temp[r1], data->reg_temp[r2]); + swap(data->reg_temp_over[r1], data->reg_temp_over[r2]); + swap(data->reg_temp_hyst[r1], data->reg_temp_hyst[r2]); + swap(data->reg_temp_config[r1], data->reg_temp_config[r2]); } static void diff --git a/drivers/hwmon/w83792d.c b/drivers/hwmon/w83792d.c index 4068db4d9580..0a8bce726b4b 100644 --- a/drivers/hwmon/w83792d.c +++ b/drivers/hwmon/w83792d.c @@ -289,10 +289,7 @@ struct w83792d_data { u8 temp1[3]; /* current, over, thyst */ u8 temp_add[2][6]; /* Register value */ u8 fan_div[7]; /* Register encoding, shifted right */ - u8 pwm[7]; /* - * We only consider the first 3 set of pwm, - * although 792 chip has 7 set of pwm. - */ + u8 pwm[7]; /* The 7 PWM outputs */ u8 pwmenable[3]; u32 alarms; /* realtime status register encoding,combined */ u8 chassis; /* Chassis status */ @@ -1075,6 +1072,10 @@ static DEVICE_ATTR(intrusion0_alarm, S_IRUGO | S_IWUSR, static SENSOR_DEVICE_ATTR(pwm1, S_IWUSR | S_IRUGO, show_pwm, store_pwm, 0); static SENSOR_DEVICE_ATTR(pwm2, S_IWUSR | S_IRUGO, show_pwm, store_pwm, 1); static SENSOR_DEVICE_ATTR(pwm3, S_IWUSR | S_IRUGO, show_pwm, store_pwm, 2); +static SENSOR_DEVICE_ATTR(pwm4, S_IWUSR | S_IRUGO, show_pwm, store_pwm, 3); +static SENSOR_DEVICE_ATTR(pwm5, S_IWUSR | S_IRUGO, show_pwm, store_pwm, 4); +static SENSOR_DEVICE_ATTR(pwm6, S_IWUSR | S_IRUGO, show_pwm, store_pwm, 5); +static SENSOR_DEVICE_ATTR(pwm7, S_IWUSR | S_IRUGO, show_pwm, store_pwm, 6); static SENSOR_DEVICE_ATTR(pwm1_enable, S_IWUSR | S_IRUGO, show_pwmenable, store_pwmenable, 1); static SENSOR_DEVICE_ATTR(pwm2_enable, S_IWUSR | S_IRUGO, @@ -1087,6 +1088,14 @@ static SENSOR_DEVICE_ATTR(pwm2_mode, S_IWUSR | S_IRUGO, show_pwm_mode, store_pwm_mode, 1); static SENSOR_DEVICE_ATTR(pwm3_mode, S_IWUSR | S_IRUGO, show_pwm_mode, store_pwm_mode, 2); +static SENSOR_DEVICE_ATTR(pwm4_mode, S_IWUSR | S_IRUGO, + show_pwm_mode, store_pwm_mode, 3); +static SENSOR_DEVICE_ATTR(pwm5_mode, S_IWUSR | S_IRUGO, + show_pwm_mode, store_pwm_mode, 4); +static SENSOR_DEVICE_ATTR(pwm6_mode, S_IWUSR | S_IRUGO, + show_pwm_mode, store_pwm_mode, 5); +static SENSOR_DEVICE_ATTR(pwm7_mode, S_IWUSR | S_IRUGO, + show_pwm_mode, store_pwm_mode, 6); static SENSOR_DEVICE_ATTR(tolerance1, S_IWUSR | S_IRUGO, show_tolerance, store_tolerance, 1); static SENSOR_DEVICE_ATTR(tolerance2, S_IWUSR | S_IRUGO, @@ -1177,30 +1186,38 @@ static SENSOR_DEVICE_ATTR(fan6_div, S_IWUSR | S_IRUGO, static SENSOR_DEVICE_ATTR(fan7_div, S_IWUSR | S_IRUGO, show_fan_div, store_fan_div, 7); -static struct attribute *w83792d_attributes_fan[4][5] = { +static struct attribute *w83792d_attributes_fan[4][7] = { { &sensor_dev_attr_fan4_input.dev_attr.attr, &sensor_dev_attr_fan4_min.dev_attr.attr, &sensor_dev_attr_fan4_div.dev_attr.attr, &sensor_dev_attr_fan4_alarm.dev_attr.attr, + &sensor_dev_attr_pwm4.dev_attr.attr, + &sensor_dev_attr_pwm4_mode.dev_attr.attr, NULL }, { &sensor_dev_attr_fan5_input.dev_attr.attr, &sensor_dev_attr_fan5_min.dev_attr.attr, &sensor_dev_attr_fan5_div.dev_attr.attr, &sensor_dev_attr_fan5_alarm.dev_attr.attr, + &sensor_dev_attr_pwm5.dev_attr.attr, + &sensor_dev_attr_pwm5_mode.dev_attr.attr, NULL }, { &sensor_dev_attr_fan6_input.dev_attr.attr, &sensor_dev_attr_fan6_min.dev_attr.attr, &sensor_dev_attr_fan6_div.dev_attr.attr, &sensor_dev_attr_fan6_alarm.dev_attr.attr, + &sensor_dev_attr_pwm6.dev_attr.attr, + &sensor_dev_attr_pwm6_mode.dev_attr.attr, NULL }, { &sensor_dev_attr_fan7_input.dev_attr.attr, &sensor_dev_attr_fan7_min.dev_attr.attr, &sensor_dev_attr_fan7_div.dev_attr.attr, &sensor_dev_attr_fan7_alarm.dev_attr.attr, + &sensor_dev_attr_pwm7.dev_attr.attr, + &sensor_dev_attr_pwm7_mode.dev_attr.attr, NULL } }; diff --git a/drivers/hwspinlock/Kconfig b/drivers/hwspinlock/Kconfig index 3612cb5b30b2..73a401662853 100644 --- a/drivers/hwspinlock/Kconfig +++ b/drivers/hwspinlock/Kconfig @@ -18,6 +18,30 @@ config HWSPINLOCK_OMAP If unsure, say N. +config HWSPINLOCK_QCOM + tristate "Qualcomm Hardware Spinlock device" + depends on ARCH_QCOM + select HWSPINLOCK + select MFD_SYSCON + help + Say y here to support the Qualcomm Hardware Mutex functionality, which + provides a synchronisation mechanism for the various processors on + the SoC. + + If unsure, say N. + +config HWSPINLOCK_SIRF + tristate "SIRF Hardware Spinlock device" + depends on ARCH_SIRF + select HWSPINLOCK + help + Say y here to support the SIRF Hardware Spinlock device, which + provides a synchronisation mechanism for the various processors + on the SoC. + + It's safe to say n here if you're not interested in SIRF hardware + spinlock or just want a bare minimum kernel. + config HSEM_U8500 tristate "STE Hardware Semaphore functionality" depends on ARCH_U8500 diff --git a/drivers/hwspinlock/Makefile b/drivers/hwspinlock/Makefile index 93eb64b66486..6b59cb5a4f3a 100644 --- a/drivers/hwspinlock/Makefile +++ b/drivers/hwspinlock/Makefile @@ -4,4 +4,6 @@ obj-$(CONFIG_HWSPINLOCK) += hwspinlock_core.o obj-$(CONFIG_HWSPINLOCK_OMAP) += omap_hwspinlock.o +obj-$(CONFIG_HWSPINLOCK_QCOM) += qcom_hwspinlock.o +obj-$(CONFIG_HWSPINLOCK_SIRF) += sirf_hwspinlock.o obj-$(CONFIG_HSEM_U8500) += u8500_hsem.o diff --git a/drivers/hwspinlock/hwspinlock_core.c b/drivers/hwspinlock/hwspinlock_core.c index 461a0d739d75..52f708bcf77f 100644 --- a/drivers/hwspinlock/hwspinlock_core.c +++ b/drivers/hwspinlock/hwspinlock_core.c @@ -27,6 +27,7 @@ #include <linux/hwspinlock.h> #include <linux/pm_runtime.h> #include <linux/mutex.h> +#include <linux/of.h> #include "hwspinlock_internal.h" @@ -257,6 +258,84 @@ void __hwspin_unlock(struct hwspinlock *hwlock, int mode, unsigned long *flags) } EXPORT_SYMBOL_GPL(__hwspin_unlock); +/** + * of_hwspin_lock_simple_xlate - translate hwlock_spec to return a lock id + * @bank: the hwspinlock device bank + * @hwlock_spec: hwlock specifier as found in the device tree + * + * This is a simple translation function, suitable for hwspinlock platform + * drivers that only has a lock specifier length of 1. + * + * Returns a relative index of the lock within a specified bank on success, + * or -EINVAL on invalid specifier cell count. + */ +static inline int +of_hwspin_lock_simple_xlate(const struct of_phandle_args *hwlock_spec) +{ + if (WARN_ON(hwlock_spec->args_count != 1)) + return -EINVAL; + + return hwlock_spec->args[0]; +} + +/** + * of_hwspin_lock_get_id() - get lock id for an OF phandle-based specific lock + * @np: device node from which to request the specific hwlock + * @index: index of the hwlock in the list of values + * + * This function provides a means for DT users of the hwspinlock module to + * get the global lock id of a specific hwspinlock using the phandle of the + * hwspinlock device, so that it can be requested using the normal + * hwspin_lock_request_specific() API. + * + * Returns the global lock id number on success, -EPROBE_DEFER if the hwspinlock + * device is not yet registered, -EINVAL on invalid args specifier value or an + * appropriate error as returned from the OF parsing of the DT client node. + */ +int of_hwspin_lock_get_id(struct device_node *np, int index) +{ + struct of_phandle_args args; + struct hwspinlock *hwlock; + struct radix_tree_iter iter; + void **slot; + int id; + int ret; + + ret = of_parse_phandle_with_args(np, "hwlocks", "#hwlock-cells", index, + &args); + if (ret) + return ret; + + /* Find the hwspinlock device: we need its base_id */ + ret = -EPROBE_DEFER; + rcu_read_lock(); + radix_tree_for_each_slot(slot, &hwspinlock_tree, &iter, 0) { + hwlock = radix_tree_deref_slot(slot); + if (unlikely(!hwlock)) + continue; + + if (hwlock->bank->dev->of_node == args.np) { + ret = 0; + break; + } + } + rcu_read_unlock(); + if (ret < 0) + goto out; + + id = of_hwspin_lock_simple_xlate(&args); + if (id < 0 || id >= hwlock->bank->num_locks) { + ret = -EINVAL; + goto out; + } + id += hwlock->bank->base_id; + +out: + of_node_put(args.np); + return ret ? ret : id; +} +EXPORT_SYMBOL_GPL(of_hwspin_lock_get_id); + static int hwspin_lock_register_single(struct hwspinlock *hwlock, int id) { struct hwspinlock *tmp; diff --git a/drivers/hwspinlock/omap_hwspinlock.c b/drivers/hwspinlock/omap_hwspinlock.c index 47a275c6ece1..ad2f8cac8487 100644 --- a/drivers/hwspinlock/omap_hwspinlock.c +++ b/drivers/hwspinlock/omap_hwspinlock.c @@ -1,7 +1,7 @@ /* * OMAP hardware spinlock driver * - * Copyright (C) 2010 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2010-2015 Texas Instruments Incorporated - http://www.ti.com * * Contact: Simon Que <sque@ti.com> * Hari Kanigeri <h-kanigeri2@ti.com> @@ -27,6 +27,7 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/hwspinlock.h> +#include <linux/of.h> #include <linux/platform_device.h> #include "hwspinlock_internal.h" @@ -80,14 +81,16 @@ static const struct hwspinlock_ops omap_hwspinlock_ops = { static int omap_hwspinlock_probe(struct platform_device *pdev) { - struct hwspinlock_pdata *pdata = pdev->dev.platform_data; + struct device_node *node = pdev->dev.of_node; struct hwspinlock_device *bank; struct hwspinlock *hwlock; struct resource *res; void __iomem *io_base; int num_locks, i, ret; + /* Only a single hwspinlock block device is supported */ + int base_id = 0; - if (!pdata) + if (!node) return -ENODEV; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -141,7 +144,7 @@ static int omap_hwspinlock_probe(struct platform_device *pdev) hwlock->priv = io_base + LOCK_BASE_OFFSET + sizeof(u32) * i; ret = hwspin_lock_register(bank, &pdev->dev, &omap_hwspinlock_ops, - pdata->base_id, num_locks); + base_id, num_locks); if (ret) goto reg_fail; @@ -174,11 +177,18 @@ static int omap_hwspinlock_remove(struct platform_device *pdev) return 0; } +static const struct of_device_id omap_hwspinlock_of_match[] = { + { .compatible = "ti,omap4-hwspinlock", }, + { /* end */ }, +}; +MODULE_DEVICE_TABLE(of, omap_hwspinlock_of_match); + static struct platform_driver omap_hwspinlock_driver = { .probe = omap_hwspinlock_probe, .remove = omap_hwspinlock_remove, .driver = { .name = "omap_hwspinlock", + .of_match_table = of_match_ptr(omap_hwspinlock_of_match), }, }; diff --git a/drivers/hwspinlock/qcom_hwspinlock.c b/drivers/hwspinlock/qcom_hwspinlock.c new file mode 100644 index 000000000000..c752447fbac7 --- /dev/null +++ b/drivers/hwspinlock/qcom_hwspinlock.c @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2013, The Linux Foundation. All rights reserved. + * Copyright (c) 2015, Sony Mobile Communications AB + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/hwspinlock.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/mfd/syscon.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include <linux/regmap.h> + +#include "hwspinlock_internal.h" + +#define QCOM_MUTEX_APPS_PROC_ID 1 +#define QCOM_MUTEX_NUM_LOCKS 32 + +static int qcom_hwspinlock_trylock(struct hwspinlock *lock) +{ + struct regmap_field *field = lock->priv; + u32 lock_owner; + int ret; + + ret = regmap_field_write(field, QCOM_MUTEX_APPS_PROC_ID); + if (ret) + return ret; + + ret = regmap_field_read(field, &lock_owner); + if (ret) + return ret; + + return lock_owner == QCOM_MUTEX_APPS_PROC_ID; +} + +static void qcom_hwspinlock_unlock(struct hwspinlock *lock) +{ + struct regmap_field *field = lock->priv; + u32 lock_owner; + int ret; + + ret = regmap_field_read(field, &lock_owner); + if (ret) { + pr_err("%s: unable to query spinlock owner\n", __func__); + return; + } + + if (lock_owner != QCOM_MUTEX_APPS_PROC_ID) { + pr_err("%s: spinlock not owned by us (actual owner is %d)\n", + __func__, lock_owner); + } + + ret = regmap_field_write(field, 0); + if (ret) + pr_err("%s: failed to unlock spinlock\n", __func__); +} + +static const struct hwspinlock_ops qcom_hwspinlock_ops = { + .trylock = qcom_hwspinlock_trylock, + .unlock = qcom_hwspinlock_unlock, +}; + +static const struct of_device_id qcom_hwspinlock_of_match[] = { + { .compatible = "qcom,sfpb-mutex" }, + { .compatible = "qcom,tcsr-mutex" }, + { } +}; +MODULE_DEVICE_TABLE(of, qcom_hwspinlock_of_match); + +static int qcom_hwspinlock_probe(struct platform_device *pdev) +{ + struct hwspinlock_device *bank; + struct device_node *syscon; + struct reg_field field; + struct regmap *regmap; + size_t array_size; + u32 stride; + u32 base; + int ret; + int i; + + syscon = of_parse_phandle(pdev->dev.of_node, "syscon", 0); + if (!syscon) { + dev_err(&pdev->dev, "no syscon property\n"); + return -ENODEV; + } + + regmap = syscon_node_to_regmap(syscon); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + + ret = of_property_read_u32_index(pdev->dev.of_node, "syscon", 1, &base); + if (ret < 0) { + dev_err(&pdev->dev, "no offset in syscon\n"); + return -EINVAL; + } + + ret = of_property_read_u32_index(pdev->dev.of_node, "syscon", 2, &stride); + if (ret < 0) { + dev_err(&pdev->dev, "no stride syscon\n"); + return -EINVAL; + } + + array_size = QCOM_MUTEX_NUM_LOCKS * sizeof(struct hwspinlock); + bank = devm_kzalloc(&pdev->dev, sizeof(*bank) + array_size, GFP_KERNEL); + if (!bank) + return -ENOMEM; + + platform_set_drvdata(pdev, bank); + + for (i = 0; i < QCOM_MUTEX_NUM_LOCKS; i++) { + field.reg = base + i * stride; + field.lsb = 0; + field.msb = 31; + + bank->lock[i].priv = devm_regmap_field_alloc(&pdev->dev, + regmap, field); + } + + pm_runtime_enable(&pdev->dev); + + ret = hwspin_lock_register(bank, &pdev->dev, &qcom_hwspinlock_ops, + 0, QCOM_MUTEX_NUM_LOCKS); + if (ret) + pm_runtime_disable(&pdev->dev); + + return ret; +} + +static int qcom_hwspinlock_remove(struct platform_device *pdev) +{ + struct hwspinlock_device *bank = platform_get_drvdata(pdev); + int ret; + + ret = hwspin_lock_unregister(bank); + if (ret) { + dev_err(&pdev->dev, "%s failed: %d\n", __func__, ret); + return ret; + } + + pm_runtime_disable(&pdev->dev); + + return 0; +} + +static struct platform_driver qcom_hwspinlock_driver = { + .probe = qcom_hwspinlock_probe, + .remove = qcom_hwspinlock_remove, + .driver = { + .name = "qcom_hwspinlock", + .of_match_table = qcom_hwspinlock_of_match, + }, +}; + +static int __init qcom_hwspinlock_init(void) +{ + return platform_driver_register(&qcom_hwspinlock_driver); +} +/* board init code might need to reserve hwspinlocks for predefined purposes */ +postcore_initcall(qcom_hwspinlock_init); + +static void __exit qcom_hwspinlock_exit(void) +{ + platform_driver_unregister(&qcom_hwspinlock_driver); +} +module_exit(qcom_hwspinlock_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Hardware spinlock driver for Qualcomm SoCs"); diff --git a/drivers/hwspinlock/sirf_hwspinlock.c b/drivers/hwspinlock/sirf_hwspinlock.c new file mode 100644 index 000000000000..16018544d431 --- /dev/null +++ b/drivers/hwspinlock/sirf_hwspinlock.c @@ -0,0 +1,136 @@ +/* + * SIRF hardware spinlock driver + * + * Copyright (c) 2015 Cambridge Silicon Radio Limited, a CSR plc group company. + * + * Licensed under GPLv2. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/io.h> +#include <linux/pm_runtime.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/hwspinlock.h> +#include <linux/platform_device.h> +#include <linux/of.h> +#include <linux/of_address.h> + +#include "hwspinlock_internal.h" + +struct sirf_hwspinlock { + void __iomem *io_base; + struct hwspinlock_device bank; +}; + +/* Number of Hardware Spinlocks*/ +#define HW_SPINLOCK_NUMBER 30 + +/* Hardware spinlock register offsets */ +#define HW_SPINLOCK_BASE 0x404 +#define HW_SPINLOCK_OFFSET(x) (HW_SPINLOCK_BASE + 0x4 * (x)) + +static int sirf_hwspinlock_trylock(struct hwspinlock *lock) +{ + void __iomem *lock_addr = lock->priv; + + /* attempt to acquire the lock by reading value == 1 from it */ + return !!readl(lock_addr); +} + +static void sirf_hwspinlock_unlock(struct hwspinlock *lock) +{ + void __iomem *lock_addr = lock->priv; + + /* release the lock by writing 0 to it */ + writel(0, lock_addr); +} + +static const struct hwspinlock_ops sirf_hwspinlock_ops = { + .trylock = sirf_hwspinlock_trylock, + .unlock = sirf_hwspinlock_unlock, +}; + +static int sirf_hwspinlock_probe(struct platform_device *pdev) +{ + struct sirf_hwspinlock *hwspin; + struct hwspinlock *hwlock; + int idx, ret; + + if (!pdev->dev.of_node) + return -ENODEV; + + hwspin = devm_kzalloc(&pdev->dev, sizeof(*hwspin) + + sizeof(*hwlock) * HW_SPINLOCK_NUMBER, GFP_KERNEL); + if (!hwspin) + return -ENOMEM; + + /* retrieve io base */ + hwspin->io_base = of_iomap(pdev->dev.of_node, 0); + if (!hwspin->io_base) + return -ENOMEM; + + for (idx = 0; idx < HW_SPINLOCK_NUMBER; idx++) { + hwlock = &hwspin->bank.lock[idx]; + hwlock->priv = hwspin->io_base + HW_SPINLOCK_OFFSET(idx); + } + + platform_set_drvdata(pdev, hwspin); + + pm_runtime_enable(&pdev->dev); + + ret = hwspin_lock_register(&hwspin->bank, &pdev->dev, + &sirf_hwspinlock_ops, 0, + HW_SPINLOCK_NUMBER); + if (ret) + goto reg_failed; + + return 0; + +reg_failed: + pm_runtime_disable(&pdev->dev); + iounmap(hwspin->io_base); + + return ret; +} + +static int sirf_hwspinlock_remove(struct platform_device *pdev) +{ + struct sirf_hwspinlock *hwspin = platform_get_drvdata(pdev); + int ret; + + ret = hwspin_lock_unregister(&hwspin->bank); + if (ret) { + dev_err(&pdev->dev, "%s failed: %d\n", __func__, ret); + return ret; + } + + pm_runtime_disable(&pdev->dev); + + iounmap(hwspin->io_base); + + return 0; +} + +static const struct of_device_id sirf_hwpinlock_ids[] = { + { .compatible = "sirf,hwspinlock", }, + {}, +}; +MODULE_DEVICE_TABLE(of, sirf_hwpinlock_ids); + +static struct platform_driver sirf_hwspinlock_driver = { + .probe = sirf_hwspinlock_probe, + .remove = sirf_hwspinlock_remove, + .driver = { + .name = "atlas7_hwspinlock", + .of_match_table = of_match_ptr(sirf_hwpinlock_ids), + }, +}; + +module_platform_driver(sirf_hwspinlock_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("SIRF Hardware spinlock driver"); +MODULE_AUTHOR("Wei Chen <wei.chen@csr.com>"); diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 35ac23768ce9..577d58d1f1a1 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -633,6 +633,7 @@ config I2C_MPC config I2C_MT65XX tristate "MediaTek I2C adapter" depends on ARCH_MEDIATEK || COMPILE_TEST + depends on HAS_DMA help This selects the MediaTek(R) Integrated Inter Circuit bus driver for MT65xx and MT81xx. diff --git a/drivers/i2c/busses/i2c-jz4780.c b/drivers/i2c/busses/i2c-jz4780.c index 19b2d689a5ef..f325663c27c5 100644 --- a/drivers/i2c/busses/i2c-jz4780.c +++ b/drivers/i2c/busses/i2c-jz4780.c @@ -764,12 +764,15 @@ static int jz4780_i2c_probe(struct platform_device *pdev) if (IS_ERR(i2c->clk)) return PTR_ERR(i2c->clk); - clk_prepare_enable(i2c->clk); + ret = clk_prepare_enable(i2c->clk); + if (ret) + return ret; - if (of_property_read_u32(pdev->dev.of_node, "clock-frequency", - &clk_freq)) { + ret = of_property_read_u32(pdev->dev.of_node, "clock-frequency", + &clk_freq); + if (ret) { dev_err(&pdev->dev, "clock-frequency not specified in DT"); - return clk_freq; + goto err; } i2c->speed = clk_freq / 1000; @@ -790,10 +793,8 @@ static int jz4780_i2c_probe(struct platform_device *pdev) i2c->irq = platform_get_irq(pdev, 0); ret = devm_request_irq(&pdev->dev, i2c->irq, jz4780_i2c_irq, 0, dev_name(&pdev->dev), i2c); - if (ret) { - ret = -ENODEV; + if (ret) goto err; - } ret = i2c_add_adapter(&i2c->adap); if (ret < 0) { diff --git a/drivers/i2c/busses/i2c-xgene-slimpro.c b/drivers/i2c/busses/i2c-xgene-slimpro.c index dcca7076231e..1c9cb65ac4cf 100644 --- a/drivers/i2c/busses/i2c-xgene-slimpro.c +++ b/drivers/i2c/busses/i2c-xgene-slimpro.c @@ -419,6 +419,7 @@ static int xgene_slimpro_i2c_probe(struct platform_device *pdev) rc = i2c_add_adapter(adapter); if (rc) { dev_err(&pdev->dev, "Adapter registeration failed\n"); + mbox_free_channel(ctx->mbox_chan); return rc; } diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 069a41f116dd..e6d4935161e4 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -1012,6 +1012,8 @@ EXPORT_SYMBOL_GPL(i2c_new_device); */ void i2c_unregister_device(struct i2c_client *client) { + if (client->dev.of_node) + of_node_clear_flag(client->dev.of_node, OF_POPULATED); device_unregister(&client->dev); } EXPORT_SYMBOL_GPL(i2c_unregister_device); @@ -1320,8 +1322,11 @@ static void of_i2c_register_devices(struct i2c_adapter *adap) dev_dbg(&adap->dev, "of_i2c: walking child nodes\n"); - for_each_available_child_of_node(adap->dev.of_node, node) + for_each_available_child_of_node(adap->dev.of_node, node) { + if (of_node_test_and_set_flag(node, OF_POPULATED)) + continue; of_i2c_register_device(adap, node); + } } static int of_dev_node_match(struct device *dev, void *data) @@ -1853,6 +1858,11 @@ static int of_i2c_notify(struct notifier_block *nb, unsigned long action, if (adap == NULL) return NOTIFY_OK; /* not for us */ + if (of_node_test_and_set_flag(rd->dn, OF_POPULATED)) { + put_device(&adap->dev); + return NOTIFY_OK; + } + client = of_i2c_register_device(adap, rd->dn); put_device(&adap->dev); @@ -1863,6 +1873,10 @@ static int of_i2c_notify(struct notifier_block *nb, unsigned long action, } break; case OF_RECONFIG_CHANGE_REMOVE: + /* already depopulated? */ + if (!of_node_check_flag(rd->dn, OF_POPULATED)) + return NOTIFY_OK; + /* find our device by node */ client = of_find_i2c_device_by_node(rd->dn); if (client == NULL) diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 1ca8e32a9592..25422a3a7238 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -277,7 +277,7 @@ static int remove_file(struct dentry *parent, char *name) } spin_lock(&tmp->d_lock); - if (!d_unhashed(tmp) && d_really_is_positive(tmp)) { + if (simple_positive(tmp)) { dget_dlock(tmp); __d_drop(tmp); spin_unlock(&tmp->d_lock); diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index bdd5d3857203..13ef22bd9459 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -455,7 +455,7 @@ static int remove_file(struct dentry *parent, char *name) } spin_lock(&tmp->d_lock); - if (!d_unhashed(tmp) && d_really_is_positive(tmp)) { + if (simple_positive(tmp)) { __d_drop(tmp); spin_unlock(&tmp->d_lock); simple_unlink(d_inode(parent), tmp); diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index f3b7a34e10d8..771700963127 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1356,7 +1356,7 @@ sequence_cmd: if (!rc && dump_payload == false && unsol_data) iscsit_set_unsoliticed_dataout(cmd); else if (dump_payload && imm_data) - target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + target_put_sess_cmd(&cmd->se_cmd); return 0; } @@ -1781,7 +1781,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd, bool comp_err) cmd->se_cmd.t_state == TRANSPORT_WRITE_PENDING) { struct se_cmd *se_cmd = &cmd->se_cmd; - target_put_sess_cmd(se_cmd->se_sess, se_cmd); + target_put_sess_cmd(se_cmd); } } @@ -1954,7 +1954,7 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc, spin_unlock_bh(&cmd->istate_lock); if (ret) { - target_put_sess_cmd(se_cmd->se_sess, se_cmd); + target_put_sess_cmd(se_cmd); transport_send_check_condition_and_sense(se_cmd, se_cmd->pi_err, 0); } else { diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 4556cd11288e..82897ca17f32 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -47,7 +47,6 @@ #include <target/target_core_base.h> #include <target/target_core_fabric_configfs.h> #include <target/target_core_fabric.h> -#include <target/target_core_configfs.h> #include "ib_srpt.h" /* Name of this kernel module. */ @@ -94,7 +93,6 @@ MODULE_PARM_DESC(srpt_service_guid, " instead of using the node_guid of the first HCA."); static struct ib_client srpt_client; -static const struct target_core_fabric_ops srpt_template; static void srpt_release_channel(struct srpt_rdma_ch *ch); static int srpt_queue_status(struct se_cmd *cmd); @@ -1336,12 +1334,12 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) BUG_ON(ch->sess == NULL); - target_put_sess_cmd(ch->sess, &ioctx->cmd); + target_put_sess_cmd(&ioctx->cmd); goto out; } pr_debug("Aborting cmd with state %d and tag %lld\n", state, - ioctx->tag); + ioctx->cmd.tag); switch (state) { case SRPT_STATE_NEW: @@ -1367,11 +1365,11 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) * not been received in time. */ srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx); - target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd); + target_put_sess_cmd(&ioctx->cmd); break; case SRPT_STATE_MGMT_RSP_SENT: srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); - target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd); + target_put_sess_cmd(&ioctx->cmd); break; default: WARN(1, "Unexpected command state (%d)", state); @@ -1389,7 +1387,6 @@ static void srpt_handle_send_err_comp(struct srpt_rdma_ch *ch, u64 wr_id) { struct srpt_send_ioctx *ioctx; enum srpt_command_state state; - struct se_cmd *cmd; u32 index; atomic_inc(&ch->sq_wr_avail); @@ -1397,7 +1394,6 @@ static void srpt_handle_send_err_comp(struct srpt_rdma_ch *ch, u64 wr_id) index = idx_from_wr_id(wr_id); ioctx = ch->ioctx_ring[index]; state = srpt_get_cmd_state(ioctx); - cmd = &ioctx->cmd; WARN_ON(state != SRPT_STATE_CMD_RSP_SENT && state != SRPT_STATE_MGMT_RSP_SENT @@ -1474,10 +1470,8 @@ static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch, struct srpt_send_ioctx *ioctx, enum srpt_opcode opcode) { - struct se_cmd *cmd; enum srpt_command_state state; - cmd = &ioctx->cmd; state = srpt_get_cmd_state(ioctx); switch (opcode) { case SRPT_RDMA_READ_LAST: @@ -1681,7 +1675,7 @@ static int srpt_check_stop_free(struct se_cmd *cmd) struct srpt_send_ioctx *ioctx = container_of(cmd, struct srpt_send_ioctx, cmd); - return target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd); + return target_put_sess_cmd(&ioctx->cmd); } /** @@ -1703,7 +1697,7 @@ static int srpt_handle_cmd(struct srpt_rdma_ch *ch, srp_cmd = recv_ioctx->ioctx.buf; cmd = &send_ioctx->cmd; - send_ioctx->tag = srp_cmd->tag; + cmd->tag = srp_cmd->tag; switch (srp_cmd->task_attr) { case SRP_CMD_SIMPLE_Q: @@ -1774,7 +1768,7 @@ static int srpt_rx_mgmt_fn_tag(struct srpt_send_ioctx *ioctx, u64 tag) for (i = 0; i < ch->rq_size; ++i) { target = ch->ioctx_ring[i]; if (target->cmd.se_lun == ioctx->cmd.se_lun && - target->tag == tag && + target->cmd.tag == tag && srpt_get_cmd_state(target) != SRPT_STATE_DONE) { ret = 0; /* now let the target core abort &target->cmd; */ @@ -1833,7 +1827,7 @@ static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch, srp_tsk->task_tag, srp_tsk->tag, ch->cm_id, ch->sess); srpt_set_cmd_state(send_ioctx, SRPT_STATE_MGMT); - send_ioctx->tag = srp_tsk->tag; + send_ioctx->cmd.tag = srp_tsk->tag; tcm_tmr = srp_tmr_to_tcm(srp_tsk->tsk_mgmt_func); if (tcm_tmr < 0) { send_ioctx->cmd.se_tmr_req->response = @@ -2180,12 +2174,9 @@ static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch) */ static void __srpt_close_ch(struct srpt_rdma_ch *ch) { - struct srpt_device *sdev; enum rdma_ch_state prev_state; unsigned long flags; - sdev = ch->sport->sdev; - spin_lock_irqsave(&ch->spinlock, flags); prev_state = ch->state; switch (prev_state) { @@ -2983,7 +2974,7 @@ static int srpt_write_pending(struct se_cmd *se_cmd) case CH_DRAINING: case CH_RELEASING: pr_debug("cmd with tag %lld: channel disconnecting\n", - ioctx->tag); + ioctx->cmd.tag); srpt_set_cmd_state(ioctx, SRPT_STATE_DATA_IN); ret = -EINVAL; goto out; @@ -3058,27 +3049,27 @@ static void srpt_queue_response(struct se_cmd *cmd) ret = srpt_xfer_data(ch, ioctx); if (ret) { pr_err("xfer_data failed for tag %llu\n", - ioctx->tag); + ioctx->cmd.tag); return; } } if (state != SRPT_STATE_MGMT) - resp_len = srpt_build_cmd_rsp(ch, ioctx, ioctx->tag, + resp_len = srpt_build_cmd_rsp(ch, ioctx, ioctx->cmd.tag, cmd->scsi_status); else { srp_tm_status = tcm_to_srp_tsk_mgmt_status(cmd->se_tmr_req->response); resp_len = srpt_build_tskmgmt_rsp(ch, ioctx, srp_tm_status, - ioctx->tag); + ioctx->cmd.tag); } ret = srpt_post_send(ch, ioctx, resp_len); if (ret) { pr_err("sending cmd response failed for tag %llu\n", - ioctx->tag); + ioctx->cmd.tag); srpt_unmap_sg_to_ib_sge(ch, ioctx); srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); - target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd); + target_put_sess_cmd(&ioctx->cmd); } } @@ -3398,11 +3389,6 @@ static char *srpt_get_fabric_name(void) return "srpt"; } -static u8 srpt_get_fabric_proto_ident(struct se_portal_group *se_tpg) -{ - return SCSI_TRANSPORTID_PROTOCOLID_SRP; -} - static char *srpt_get_fabric_wwn(struct se_portal_group *tpg) { struct srpt_port *sport = container_of(tpg, struct srpt_port, port_tpg_1); @@ -3415,69 +3401,6 @@ static u16 srpt_get_tag(struct se_portal_group *tpg) return 1; } -static u32 srpt_get_default_depth(struct se_portal_group *se_tpg) -{ - return 1; -} - -static u32 srpt_get_pr_transport_id(struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code, unsigned char *buf) -{ - struct srpt_node_acl *nacl; - struct spc_rdma_transport_id *tr_id; - - nacl = container_of(se_nacl, struct srpt_node_acl, nacl); - tr_id = (void *)buf; - tr_id->protocol_identifier = SCSI_TRANSPORTID_PROTOCOLID_SRP; - memcpy(tr_id->i_port_id, nacl->i_port_id, sizeof(tr_id->i_port_id)); - return sizeof(*tr_id); -} - -static u32 srpt_get_pr_transport_id_len(struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code) -{ - *format_code = 0; - return sizeof(struct spc_rdma_transport_id); -} - -static char *srpt_parse_pr_out_transport_id(struct se_portal_group *se_tpg, - const char *buf, u32 *out_tid_len, - char **port_nexus_ptr) -{ - struct spc_rdma_transport_id *tr_id; - - *port_nexus_ptr = NULL; - *out_tid_len = sizeof(struct spc_rdma_transport_id); - tr_id = (void *)buf; - return (char *)tr_id->i_port_id; -} - -static struct se_node_acl *srpt_alloc_fabric_acl(struct se_portal_group *se_tpg) -{ - struct srpt_node_acl *nacl; - - nacl = kzalloc(sizeof(struct srpt_node_acl), GFP_KERNEL); - if (!nacl) { - pr_err("Unable to allocate struct srpt_node_acl\n"); - return NULL; - } - - return &nacl->nacl; -} - -static void srpt_release_fabric_acl(struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl) -{ - struct srpt_node_acl *nacl; - - nacl = container_of(se_nacl, struct srpt_node_acl, nacl); - kfree(nacl); -} - static u32 srpt_tpg_get_inst_index(struct se_portal_group *se_tpg) { return 1; @@ -3551,14 +3474,6 @@ static void srpt_set_default_node_attrs(struct se_node_acl *nacl) { } -static u32 srpt_get_task_tag(struct se_cmd *se_cmd) -{ - struct srpt_send_ioctx *ioctx; - - ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd); - return ioctx->tag; -} - /* Note: only used from inside debug printk's by the TCM core. */ static int srpt_get_tcm_cmd_state(struct se_cmd *se_cmd) { @@ -3601,40 +3516,19 @@ out: * configfs callback function invoked for * mkdir /sys/kernel/config/target/$driver/$port/$tpg/acls/$i_port_id */ -static struct se_node_acl *srpt_make_nodeacl(struct se_portal_group *tpg, - struct config_group *group, - const char *name) +static int srpt_init_nodeacl(struct se_node_acl *se_nacl, const char *name) { - struct srpt_port *sport = container_of(tpg, struct srpt_port, port_tpg_1); - struct se_node_acl *se_nacl, *se_nacl_new; - struct srpt_node_acl *nacl; - int ret = 0; - u32 nexus_depth = 1; + struct srpt_port *sport = + container_of(se_nacl->se_tpg, struct srpt_port, port_tpg_1); + struct srpt_node_acl *nacl = + container_of(se_nacl, struct srpt_node_acl, nacl); u8 i_port_id[16]; if (srpt_parse_i_port_id(i_port_id, name) < 0) { pr_err("invalid initiator port ID %s\n", name); - ret = -EINVAL; - goto err; + return -EINVAL; } - se_nacl_new = srpt_alloc_fabric_acl(tpg); - if (!se_nacl_new) { - ret = -ENOMEM; - goto err; - } - /* - * nacl_new may be released by core_tpg_add_initiator_node_acl() - * when converting a node ACL from demo mode to explict - */ - se_nacl = core_tpg_add_initiator_node_acl(tpg, se_nacl_new, name, - nexus_depth); - if (IS_ERR(se_nacl)) { - ret = PTR_ERR(se_nacl); - goto err; - } - /* Locate our struct srpt_node_acl and set sdev and i_port_id. */ - nacl = container_of(se_nacl, struct srpt_node_acl, nacl); memcpy(&nacl->i_port_id[0], &i_port_id[0], 16); nacl->sport = sport; @@ -3642,29 +3536,22 @@ static struct se_node_acl *srpt_make_nodeacl(struct se_portal_group *tpg, list_add_tail(&nacl->list, &sport->port_acl_list); spin_unlock_irq(&sport->port_acl_lock); - return se_nacl; -err: - return ERR_PTR(ret); + return 0; } /* * configfs callback function invoked for * rmdir /sys/kernel/config/target/$driver/$port/$tpg/acls/$i_port_id */ -static void srpt_drop_nodeacl(struct se_node_acl *se_nacl) +static void srpt_cleanup_nodeacl(struct se_node_acl *se_nacl) { - struct srpt_node_acl *nacl; - struct srpt_device *sdev; - struct srpt_port *sport; + struct srpt_node_acl *nacl = + container_of(se_nacl, struct srpt_node_acl, nacl); + struct srpt_port *sport = nacl->sport; - nacl = container_of(se_nacl, struct srpt_node_acl, nacl); - sport = nacl->sport; - sdev = sport->sdev; spin_lock_irq(&sport->port_acl_lock); list_del(&nacl->list); spin_unlock_irq(&sport->port_acl_lock); - core_tpg_del_initiator_node_acl(&sport->port_tpg_1, se_nacl, 1); - srpt_release_fabric_acl(NULL, se_nacl); } static ssize_t srpt_tpg_attrib_show_srp_max_rdma_size( @@ -3849,8 +3736,7 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn, int res; /* Initialize sport->port_wwn and sport->port_tpg_1 */ - res = core_tpg_register(&srpt_template, &sport->port_wwn, - &sport->port_tpg_1, sport, TRANSPORT_TPG_TYPE_NORMAL); + res = core_tpg_register(&sport->port_wwn, &sport->port_tpg_1, SCSI_PROTOCOL_SRP); if (res) return ERR_PTR(res); @@ -3920,20 +3806,14 @@ static struct configfs_attribute *srpt_wwn_attrs[] = { static const struct target_core_fabric_ops srpt_template = { .module = THIS_MODULE, .name = "srpt", + .node_acl_size = sizeof(struct srpt_node_acl), .get_fabric_name = srpt_get_fabric_name, - .get_fabric_proto_ident = srpt_get_fabric_proto_ident, .tpg_get_wwn = srpt_get_fabric_wwn, .tpg_get_tag = srpt_get_tag, - .tpg_get_default_depth = srpt_get_default_depth, - .tpg_get_pr_transport_id = srpt_get_pr_transport_id, - .tpg_get_pr_transport_id_len = srpt_get_pr_transport_id_len, - .tpg_parse_pr_out_transport_id = srpt_parse_pr_out_transport_id, .tpg_check_demo_mode = srpt_check_false, .tpg_check_demo_mode_cache = srpt_check_true, .tpg_check_demo_mode_write_protect = srpt_check_true, .tpg_check_prod_mode_write_protect = srpt_check_false, - .tpg_alloc_fabric_acl = srpt_alloc_fabric_acl, - .tpg_release_fabric_acl = srpt_release_fabric_acl, .tpg_get_inst_index = srpt_tpg_get_inst_index, .release_cmd = srpt_release_cmd, .check_stop_free = srpt_check_stop_free, @@ -3944,7 +3824,6 @@ static const struct target_core_fabric_ops srpt_template = { .write_pending = srpt_write_pending, .write_pending_status = srpt_write_pending_status, .set_default_node_attributes = srpt_set_default_node_attrs, - .get_task_tag = srpt_get_task_tag, .get_cmd_state = srpt_get_tcm_cmd_state, .queue_data_in = srpt_queue_data_in, .queue_status = srpt_queue_status, @@ -3958,12 +3837,8 @@ static const struct target_core_fabric_ops srpt_template = { .fabric_drop_wwn = srpt_drop_tport, .fabric_make_tpg = srpt_make_tpg, .fabric_drop_tpg = srpt_drop_tpg, - .fabric_post_link = NULL, - .fabric_pre_unlink = NULL, - .fabric_make_np = NULL, - .fabric_drop_np = NULL, - .fabric_make_nodeacl = srpt_make_nodeacl, - .fabric_drop_nodeacl = srpt_drop_nodeacl, + .fabric_init_nodeacl = srpt_init_nodeacl, + .fabric_cleanup_nodeacl = srpt_cleanup_nodeacl, .tfc_wwn_attrs = srpt_wwn_attrs, .tfc_tpg_base_attrs = srpt_tpg_attrs, diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index d85c0c205625..21f8df67522a 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -238,7 +238,6 @@ struct srpt_send_ioctx { bool rdma_aborted; struct se_cmd cmd; struct completion tx_done; - u64 tag; int sg_cnt; int mapped_sg_count; u16 n_rdma_ius; @@ -410,34 +409,16 @@ struct srpt_device { /** * struct srpt_node_acl - Per-initiator ACL data (managed via configfs). + * @nacl: Target core node ACL information. * @i_port_id: 128-bit SRP initiator port ID. * @sport: port information. - * @nacl: Target core node ACL information. * @list: Element of the per-HCA ACL list. */ struct srpt_node_acl { + struct se_node_acl nacl; u8 i_port_id[16]; struct srpt_port *sport; - struct se_node_acl nacl; struct list_head list; }; -/* - * SRP-releated SCSI persistent reservation definitions. - * - * See also SPC4r28, section 7.6.1 (Protocol specific parameters introduction). - * See also SPC4r28, section 7.6.4.5 (TransportID for initiator ports using - * SCSI over an RDMA interface). - */ - -enum { - SCSI_TRANSPORTID_PROTOCOLID_SRP = 4, -}; - -struct spc_rdma_transport_id { - uint8_t protocol_identifier; - uint8_t reserved[7]; - uint8_t i_port_id[16]; -}; - #endif /* IB_SRPT_H */ diff --git a/drivers/input/input.c b/drivers/input/input.c index f31578423636..78d24990a816 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -677,12 +677,9 @@ static void input_dev_release_keys(struct input_dev *dev) int code; if (is_event_supported(EV_KEY, dev->evbit, EV_MAX)) { - for (code = 0; code <= KEY_MAX; code++) { - if (is_event_supported(code, dev->keybit, KEY_MAX) && - __test_and_clear_bit(code, dev->key)) { - input_pass_event(dev, EV_KEY, code, 0); - } - } + for_each_set_bit(code, dev->key, KEY_CNT) + input_pass_event(dev, EV_KEY, code, 0); + memset(dev->key, 0, sizeof(dev->key)); input_pass_event(dev, EV_SYN, SYN_REPORT, 1); } } @@ -1626,10 +1623,7 @@ static int input_dev_uevent(struct device *device, struct kobj_uevent_env *env) if (!test_bit(EV_##type, dev->evbit)) \ break; \ \ - for (i = 0; i < type##_MAX; i++) { \ - if (!test_bit(i, dev->bits##bit)) \ - continue; \ - \ + for_each_set_bit(i, dev->bits##bit, type##_CNT) { \ active = test_bit(i, dev->bits); \ if (!active && !on) \ continue; \ @@ -1980,22 +1974,12 @@ static unsigned int input_estimate_events_per_packet(struct input_dev *dev) events = mt_slots + 1; /* count SYN_MT_REPORT and SYN_REPORT */ - if (test_bit(EV_ABS, dev->evbit)) { - for (i = 0; i < ABS_CNT; i++) { - if (test_bit(i, dev->absbit)) { - if (input_is_mt_axis(i)) - events += mt_slots; - else - events++; - } - } - } + if (test_bit(EV_ABS, dev->evbit)) + for_each_set_bit(i, dev->absbit, ABS_CNT) + events += input_is_mt_axis(i) ? mt_slots : 1; - if (test_bit(EV_REL, dev->evbit)) { - for (i = 0; i < REL_CNT; i++) - if (test_bit(i, dev->relbit)) - events++; - } + if (test_bit(EV_REL, dev->evbit)) + events += bitmap_weight(dev->relbit, REL_CNT); /* Make room for KEY and MSC events */ events += 7; diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 61c761156371..f8850f9cb331 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -344,6 +344,7 @@ struct usb_xpad { int mapping; /* map d-pad to buttons or to axes */ int xtype; /* type of xbox device */ + unsigned long led_no; /* led to lit on xbox360 controllers */ }; /* @@ -488,6 +489,8 @@ static void xpad360_process_packet(struct usb_xpad *xpad, input_sync(dev); } +static void xpad_identify_controller(struct usb_xpad *xpad); + /* * xpad360w_process_packet * @@ -510,6 +513,11 @@ static void xpad360w_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned cha if (data[1] & 0x80) { xpad->pad_present = 1; usb_submit_urb(xpad->bulk_out, GFP_ATOMIC); + /* + * Light up the segment corresponding to + * controller number. + */ + xpad_identify_controller(xpad); } else xpad->pad_present = 0; } @@ -881,17 +889,63 @@ struct xpad_led { struct usb_xpad *xpad; }; +/** + * @param command + * 0: off + * 1: all blink, then previous setting + * 2: 1/top-left blink, then on + * 3: 2/top-right blink, then on + * 4: 3/bottom-left blink, then on + * 5: 4/bottom-right blink, then on + * 6: 1/top-left on + * 7: 2/top-right on + * 8: 3/bottom-left on + * 9: 4/bottom-right on + * 10: rotate + * 11: blink, based on previous setting + * 12: slow blink, based on previous setting + * 13: rotate with two lights + * 14: persistent slow all blink + * 15: blink once, then previous setting + */ static void xpad_send_led_command(struct usb_xpad *xpad, int command) { - if (command >= 0 && command < 14) { - mutex_lock(&xpad->odata_mutex); + command %= 16; + + mutex_lock(&xpad->odata_mutex); + + switch (xpad->xtype) { + case XTYPE_XBOX360: xpad->odata[0] = 0x01; xpad->odata[1] = 0x03; xpad->odata[2] = command; xpad->irq_out->transfer_buffer_length = 3; - usb_submit_urb(xpad->irq_out, GFP_KERNEL); - mutex_unlock(&xpad->odata_mutex); + break; + case XTYPE_XBOX360W: + xpad->odata[0] = 0x00; + xpad->odata[1] = 0x00; + xpad->odata[2] = 0x08; + xpad->odata[3] = 0x40 + command; + xpad->odata[4] = 0x00; + xpad->odata[5] = 0x00; + xpad->odata[6] = 0x00; + xpad->odata[7] = 0x00; + xpad->odata[8] = 0x00; + xpad->odata[9] = 0x00; + xpad->odata[10] = 0x00; + xpad->odata[11] = 0x00; + xpad->irq_out->transfer_buffer_length = 12; + break; } + + usb_submit_urb(xpad->irq_out, GFP_KERNEL); + mutex_unlock(&xpad->odata_mutex); +} + +static void xpad_identify_controller(struct usb_xpad *xpad) +{ + /* Light up the segment corresponding to controller number */ + xpad_send_led_command(xpad, (xpad->led_no % 4) + 2); } static void xpad_led_set(struct led_classdev *led_cdev, @@ -905,22 +959,21 @@ static void xpad_led_set(struct led_classdev *led_cdev, static int xpad_led_probe(struct usb_xpad *xpad) { - static atomic_t led_seq = ATOMIC_INIT(-1); - unsigned long led_no; + static atomic_t led_seq = ATOMIC_INIT(-1); struct xpad_led *led; struct led_classdev *led_cdev; int error; - if (xpad->xtype != XTYPE_XBOX360) + if (xpad->xtype != XTYPE_XBOX360 && xpad->xtype != XTYPE_XBOX360W) return 0; xpad->led = led = kzalloc(sizeof(struct xpad_led), GFP_KERNEL); if (!led) return -ENOMEM; - led_no = atomic_inc_return(&led_seq); + xpad->led_no = atomic_inc_return(&led_seq); - snprintf(led->name, sizeof(led->name), "xpad%lu", led_no); + snprintf(led->name, sizeof(led->name), "xpad%lu", xpad->led_no); led->xpad = xpad; led_cdev = &led->led_cdev; @@ -934,10 +987,8 @@ static int xpad_led_probe(struct usb_xpad *xpad) return error; } - /* - * Light up the segment corresponding to controller number - */ - xpad_send_led_command(xpad, (led_no % 4) + 2); + /* Light up the segment corresponding to controller number */ + xpad_identify_controller(xpad); return 0; } @@ -954,6 +1005,7 @@ static void xpad_led_disconnect(struct usb_xpad *xpad) #else static int xpad_led_probe(struct usb_xpad *xpad) { return 0; } static void xpad_led_disconnect(struct usb_xpad *xpad) { } +static void xpad_identify_controller(struct usb_xpad *xpad) { } #endif diff --git a/drivers/input/keyboard/imx_keypad.c b/drivers/input/keyboard/imx_keypad.c index 2e855e6f3565..d2ea863d6a45 100644 --- a/drivers/input/keyboard/imx_keypad.c +++ b/drivers/input/keyboard/imx_keypad.c @@ -506,7 +506,9 @@ static int imx_keypad_probe(struct platform_device *pdev) input_set_drvdata(input_dev, keypad); /* Ensure that the keypad will stay dormant until opened */ - clk_prepare_enable(keypad->clk); + error = clk_prepare_enable(keypad->clk); + if (error) + return error; imx_keypad_inhibit(keypad); clk_disable_unprepare(keypad->clk); diff --git a/drivers/input/misc/axp20x-pek.c b/drivers/input/misc/axp20x-pek.c index f1c844739cd7..10e140af5aac 100644 --- a/drivers/input/misc/axp20x-pek.c +++ b/drivers/input/misc/axp20x-pek.c @@ -167,9 +167,13 @@ static irqreturn_t axp20x_pek_irq(int irq, void *pwr) struct input_dev *idev = pwr; struct axp20x_pek *axp20x_pek = input_get_drvdata(idev); - if (irq == axp20x_pek->irq_dbr) + /* + * The power-button is connected to ground so a falling edge (dbf) + * means it is pressed. + */ + if (irq == axp20x_pek->irq_dbf) input_report_key(idev, KEY_POWER, true); - else if (irq == axp20x_pek->irq_dbf) + else if (irq == axp20x_pek->irq_dbr) input_report_key(idev, KEY_POWER, false); input_sync(idev); diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 62641f2adaf7..5b5f403d8ce6 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -771,7 +771,7 @@ static const struct attribute_group *elan_sysfs_groups[] = { */ static void elan_report_contact(struct elan_tp_data *data, int contact_num, bool contact_valid, - bool hover_event, u8 *finger_data) + u8 *finger_data) { struct input_dev *input = data->input; unsigned int pos_x, pos_y; @@ -815,9 +815,7 @@ static void elan_report_contact(struct elan_tp_data *data, input_mt_report_slot_state(input, MT_TOOL_FINGER, true); input_report_abs(input, ABS_MT_POSITION_X, pos_x); input_report_abs(input, ABS_MT_POSITION_Y, data->max_y - pos_y); - input_report_abs(input, ABS_MT_DISTANCE, hover_event); - input_report_abs(input, ABS_MT_PRESSURE, - hover_event ? 0 : scaled_pressure); + input_report_abs(input, ABS_MT_PRESSURE, scaled_pressure); input_report_abs(input, ABS_TOOL_WIDTH, mk_x); input_report_abs(input, ABS_MT_TOUCH_MAJOR, major); input_report_abs(input, ABS_MT_TOUCH_MINOR, minor); @@ -839,14 +837,14 @@ static void elan_report_absolute(struct elan_tp_data *data, u8 *packet) hover_event = hover_info & 0x40; for (i = 0; i < ETP_MAX_FINGERS; i++) { contact_valid = tp_info & (1U << (3 + i)); - elan_report_contact(data, i, contact_valid, hover_event, - finger_data); + elan_report_contact(data, i, contact_valid, finger_data); if (contact_valid) finger_data += ETP_FINGER_DATA_LEN; } input_report_key(input, BTN_LEFT, tp_info & 0x01); + input_report_abs(input, ABS_DISTANCE, hover_event != 0); input_mt_report_pointer_emulation(input, true); input_sync(input); } @@ -922,6 +920,7 @@ static int elan_setup_input_device(struct elan_tp_data *data) input_abs_set_res(input, ABS_Y, data->y_res); input_set_abs_params(input, ABS_PRESSURE, 0, ETP_MAX_PRESSURE, 0, 0); input_set_abs_params(input, ABS_TOOL_WIDTH, 0, ETP_FINGER_WIDTH, 0, 0); + input_set_abs_params(input, ABS_DISTANCE, 0, 1, 0, 0); /* And MT parameters */ input_set_abs_params(input, ABS_MT_POSITION_X, 0, data->max_x, 0, 0); @@ -934,7 +933,6 @@ static int elan_setup_input_device(struct elan_tp_data *data) ETP_FINGER_WIDTH * max_width, 0, 0); input_set_abs_params(input, ABS_MT_TOUCH_MINOR, 0, ETP_FINGER_WIDTH * min_width, 0, 0); - input_set_abs_params(input, ABS_MT_DISTANCE, 0, 1, 0, 0); data->input = input; diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 35c8d0ceabee..3a32caf06bf1 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -1199,7 +1199,7 @@ static void set_input_params(struct psmouse *psmouse, ABS_MT_POSITION_Y); /* Image sensors can report per-contact pressure */ input_set_abs_params(dev, ABS_MT_PRESSURE, 0, 255, 0, 0); - input_mt_init_slots(dev, 3, INPUT_MT_POINTER | INPUT_MT_TRACK); + input_mt_init_slots(dev, 2, INPUT_MT_POINTER | INPUT_MT_TRACK); /* Image sensors can signal 4 and 5 finger clicks */ __set_bit(BTN_TOOL_QUADTAP, dev->keybit); diff --git a/drivers/input/serio/Kconfig b/drivers/input/serio/Kconfig index 77833d7a004b..200841b77edb 100644 --- a/drivers/input/serio/Kconfig +++ b/drivers/input/serio/Kconfig @@ -244,6 +244,7 @@ config SERIO_PS2MULT config SERIO_ARC_PS2 tristate "ARC PS/2 support" + depends on HAS_IOMEM help Say Y here if you have an ARC FPGA platform with a PS/2 controller in it. diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index d20fe1dff403..a854c6e5f09e 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -658,6 +658,18 @@ config TOUCHSCREEN_PIXCIR To compile this driver as a module, choose M here: the module will be called pixcir_i2c_ts. +config TOUCHSCREEN_WDT87XX_I2C + tristate "Weida HiTech I2C touchscreen" + depends on I2C + help + Say Y here if you have a Weida WDT87XX I2C touchscreen + connected to your system. + + If unsure, say N. + + To compile this driver as a module, choose M here: the + module will be called wdt87xx_i2c. + config TOUCHSCREEN_WM831X tristate "Support for WM831x touchscreen controllers" depends on MFD_WM831X diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile index 44deea743d02..fa3d33bac7fc 100644 --- a/drivers/input/touchscreen/Makefile +++ b/drivers/input/touchscreen/Makefile @@ -72,6 +72,7 @@ obj-$(CONFIG_TOUCHSCREEN_TSC2007) += tsc2007.o obj-$(CONFIG_TOUCHSCREEN_UCB1400) += ucb1400_ts.o obj-$(CONFIG_TOUCHSCREEN_WACOM_W8001) += wacom_w8001.o obj-$(CONFIG_TOUCHSCREEN_WACOM_I2C) += wacom_i2c.o +obj-$(CONFIG_TOUCHSCREEN_WDT87XX_I2C) += wdt87xx_i2c.o obj-$(CONFIG_TOUCHSCREEN_WM831X) += wm831x-ts.o obj-$(CONFIG_TOUCHSCREEN_WM97XX) += wm97xx-ts.o wm97xx-ts-$(CONFIG_TOUCHSCREEN_WM9705) += wm9705.o diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c index e6aef3e48bd9..394b1de9a2a3 100644 --- a/drivers/input/touchscreen/edt-ft5x06.c +++ b/drivers/input/touchscreen/edt-ft5x06.c @@ -1035,20 +1035,15 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client, input->id.bustype = BUS_I2C; input->dev.parent = &client->dev; - __set_bit(EV_KEY, input->evbit); - __set_bit(EV_ABS, input->evbit); - __set_bit(BTN_TOUCH, input->keybit); - input_set_abs_params(input, ABS_X, 0, tsdata->num_x * 64 - 1, 0, 0); - input_set_abs_params(input, ABS_Y, 0, tsdata->num_y * 64 - 1, 0, 0); input_set_abs_params(input, ABS_MT_POSITION_X, 0, tsdata->num_x * 64 - 1, 0, 0); input_set_abs_params(input, ABS_MT_POSITION_Y, 0, tsdata->num_y * 64 - 1, 0, 0); if (!pdata) - touchscreen_parse_of_params(input); + touchscreen_parse_of_params(input, true); - error = input_mt_init_slots(input, MAX_SUPPORT_POINTS, 0); + error = input_mt_init_slots(input, MAX_SUPPORT_POINTS, INPUT_MT_DIRECT); if (error) { dev_err(&client->dev, "Unable to init MT slots.\n"); return error; diff --git a/drivers/input/touchscreen/of_touchscreen.c b/drivers/input/touchscreen/of_touchscreen.c index b82b5207c78b..806cd0ad160f 100644 --- a/drivers/input/touchscreen/of_touchscreen.c +++ b/drivers/input/touchscreen/of_touchscreen.c @@ -14,14 +14,22 @@ #include <linux/input/mt.h> #include <linux/input/touchscreen.h> -static u32 of_get_optional_u32(struct device_node *np, - const char *property) +static bool touchscreen_get_prop_u32(struct device_node *np, + const char *property, + unsigned int default_value, + unsigned int *value) { - u32 val = 0; + u32 val; + int error; - of_property_read_u32(np, property, &val); + error = of_property_read_u32(np, property, &val); + if (error) { + *value = default_value; + return false; + } - return val; + *value = val; + return true; } static void touchscreen_set_params(struct input_dev *dev, @@ -54,34 +62,45 @@ static void touchscreen_set_params(struct input_dev *dev, * input device accordingly. The function keeps previously setuped default * values if no value is specified via DT. */ -void touchscreen_parse_of_params(struct input_dev *dev) +void touchscreen_parse_of_params(struct input_dev *dev, bool multitouch) { struct device_node *np = dev->dev.parent->of_node; - u32 maximum, fuzz; + unsigned int axis; + unsigned int maximum, fuzz; + bool data_present; input_alloc_absinfo(dev); if (!dev->absinfo) return; - maximum = of_get_optional_u32(np, "touchscreen-size-x"); - fuzz = of_get_optional_u32(np, "touchscreen-fuzz-x"); - if (maximum || fuzz) { - touchscreen_set_params(dev, ABS_X, maximum, fuzz); - touchscreen_set_params(dev, ABS_MT_POSITION_X, maximum, fuzz); - } + axis = multitouch ? ABS_MT_POSITION_X : ABS_X; + data_present = touchscreen_get_prop_u32(np, "touchscreen-size-x", + input_abs_get_max(dev, axis), + &maximum) | + touchscreen_get_prop_u32(np, "touchscreen-fuzz-x", + input_abs_get_fuzz(dev, axis), + &fuzz); + if (data_present) + touchscreen_set_params(dev, axis, maximum, fuzz); - maximum = of_get_optional_u32(np, "touchscreen-size-y"); - fuzz = of_get_optional_u32(np, "touchscreen-fuzz-y"); - if (maximum || fuzz) { - touchscreen_set_params(dev, ABS_Y, maximum, fuzz); - touchscreen_set_params(dev, ABS_MT_POSITION_Y, maximum, fuzz); - } + axis = multitouch ? ABS_MT_POSITION_Y : ABS_Y; + data_present = touchscreen_get_prop_u32(np, "touchscreen-size-y", + input_abs_get_max(dev, axis), + &maximum) | + touchscreen_get_prop_u32(np, "touchscreen-fuzz-y", + input_abs_get_fuzz(dev, axis), + &fuzz); + if (data_present) + touchscreen_set_params(dev, axis, maximum, fuzz); - maximum = of_get_optional_u32(np, "touchscreen-max-pressure"); - fuzz = of_get_optional_u32(np, "touchscreen-fuzz-pressure"); - if (maximum || fuzz) { - touchscreen_set_params(dev, ABS_PRESSURE, maximum, fuzz); - touchscreen_set_params(dev, ABS_MT_PRESSURE, maximum, fuzz); - } + axis = multitouch ? ABS_MT_PRESSURE : ABS_PRESSURE; + data_present = touchscreen_get_prop_u32(np, "touchscreen-max-pressure", + input_abs_get_max(dev, axis), + &maximum) | + touchscreen_get_prop_u32(np, "touchscreen-fuzz-pressure", + input_abs_get_fuzz(dev, axis), + &fuzz); + if (data_present) + touchscreen_set_params(dev, axis, maximum, fuzz); } EXPORT_SYMBOL(touchscreen_parse_of_params); diff --git a/drivers/input/touchscreen/tsc2005.c b/drivers/input/touchscreen/tsc2005.c index 72657c579430..d8c025b0f88c 100644 --- a/drivers/input/touchscreen/tsc2005.c +++ b/drivers/input/touchscreen/tsc2005.c @@ -709,7 +709,7 @@ static int tsc2005_probe(struct spi_device *spi) input_set_abs_params(input_dev, ABS_PRESSURE, 0, max_p, fudge_p, 0); if (np) - touchscreen_parse_of_params(input_dev); + touchscreen_parse_of_params(input_dev, false); input_dev->open = tsc2005_open; input_dev->close = tsc2005_close; diff --git a/drivers/input/touchscreen/wdt87xx_i2c.c b/drivers/input/touchscreen/wdt87xx_i2c.c new file mode 100644 index 000000000000..fb92ae1c5fae --- /dev/null +++ b/drivers/input/touchscreen/wdt87xx_i2c.c @@ -0,0 +1,1149 @@ +/* + * Weida HiTech WDT87xx TouchScreen I2C driver + * + * Copyright (c) 2015 Weida Hi-Tech Co., Ltd. + * HN Chen <hn.chen@weidahitech.com> + * + * This software is licensed under the terms of the GNU General Public + * License, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + */ + +#include <linux/i2c.h> +#include <linux/input.h> +#include <linux/interrupt.h> +#include <linux/delay.h> +#include <linux/irq.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/firmware.h> +#include <linux/input/mt.h> +#include <linux/acpi.h> +#include <asm/unaligned.h> + +#define WDT87XX_NAME "wdt87xx_i2c" +#define WDT87XX_DRV_VER "0.9.6" +#define WDT87XX_FW_NAME "wdt87xx_fw.bin" +#define WDT87XX_CFG_NAME "wdt87xx_cfg.bin" + +#define MODE_ACTIVE 0x01 +#define MODE_READY 0x02 +#define MODE_IDLE 0x03 +#define MODE_SLEEP 0x04 +#define MODE_STOP 0xFF + +#define WDT_MAX_FINGER 10 +#define WDT_RAW_BUF_COUNT 54 +#define WDT_V1_RAW_BUF_COUNT 74 +#define WDT_FIRMWARE_ID 0xa9e368f5 + +#define PG_SIZE 0x1000 +#define MAX_RETRIES 3 + +#define MAX_UNIT_AXIS 0x7FFF + +#define PKT_READ_SIZE 72 +#define PKT_WRITE_SIZE 80 + +/* the finger definition of the report event */ +#define FINGER_EV_OFFSET_ID 0 +#define FINGER_EV_OFFSET_X 1 +#define FINGER_EV_OFFSET_Y 3 +#define FINGER_EV_SIZE 5 + +#define FINGER_EV_V1_OFFSET_ID 0 +#define FINGER_EV_V1_OFFSET_W 1 +#define FINGER_EV_V1_OFFSET_P 2 +#define FINGER_EV_V1_OFFSET_X 3 +#define FINGER_EV_V1_OFFSET_Y 5 +#define FINGER_EV_V1_SIZE 7 + +/* The definition of a report packet */ +#define TOUCH_PK_OFFSET_REPORT_ID 0 +#define TOUCH_PK_OFFSET_EVENT 1 +#define TOUCH_PK_OFFSET_SCAN_TIME 51 +#define TOUCH_PK_OFFSET_FNGR_NUM 53 + +#define TOUCH_PK_V1_OFFSET_REPORT_ID 0 +#define TOUCH_PK_V1_OFFSET_EVENT 1 +#define TOUCH_PK_V1_OFFSET_SCAN_TIME 71 +#define TOUCH_PK_V1_OFFSET_FNGR_NUM 73 + +/* The definition of the controller parameters */ +#define CTL_PARAM_OFFSET_FW_ID 0 +#define CTL_PARAM_OFFSET_PLAT_ID 2 +#define CTL_PARAM_OFFSET_XMLS_ID1 4 +#define CTL_PARAM_OFFSET_XMLS_ID2 6 +#define CTL_PARAM_OFFSET_PHY_CH_X 8 +#define CTL_PARAM_OFFSET_PHY_CH_Y 10 +#define CTL_PARAM_OFFSET_PHY_X0 12 +#define CTL_PARAM_OFFSET_PHY_X1 14 +#define CTL_PARAM_OFFSET_PHY_Y0 16 +#define CTL_PARAM_OFFSET_PHY_Y1 18 +#define CTL_PARAM_OFFSET_PHY_W 22 +#define CTL_PARAM_OFFSET_PHY_H 24 +#define CTL_PARAM_OFFSET_FACTOR 32 + +/* Communication commands */ +#define PACKET_SIZE 56 +#define VND_REQ_READ 0x06 +#define VND_READ_DATA 0x07 +#define VND_REQ_WRITE 0x08 + +#define VND_CMD_START 0x00 +#define VND_CMD_STOP 0x01 +#define VND_CMD_RESET 0x09 + +#define VND_CMD_ERASE 0x1A + +#define VND_GET_CHECKSUM 0x66 + +#define VND_SET_DATA 0x83 +#define VND_SET_COMMAND_DATA 0x84 +#define VND_SET_CHECKSUM_CALC 0x86 +#define VND_SET_CHECKSUM_LENGTH 0x87 + +#define VND_CMD_SFLCK 0xFC +#define VND_CMD_SFUNL 0xFD + +#define CMD_SFLCK_KEY 0xC39B +#define CMD_SFUNL_KEY 0x95DA + +#define STRIDX_PLATFORM_ID 0x80 +#define STRIDX_PARAMETERS 0x81 + +#define CMD_BUF_SIZE 8 +#define PKT_BUF_SIZE 64 + +/* The definition of the command packet */ +#define CMD_REPORT_ID_OFFSET 0x0 +#define CMD_TYPE_OFFSET 0x1 +#define CMD_INDEX_OFFSET 0x2 +#define CMD_KEY_OFFSET 0x3 +#define CMD_LENGTH_OFFSET 0x4 +#define CMD_DATA_OFFSET 0x8 + +/* The definition of firmware chunk tags */ +#define FOURCC_ID_RIFF 0x46464952 +#define FOURCC_ID_WHIF 0x46494857 +#define FOURCC_ID_FRMT 0x544D5246 +#define FOURCC_ID_FRWR 0x52575246 +#define FOURCC_ID_CNFG 0x47464E43 + +#define CHUNK_ID_FRMT FOURCC_ID_FRMT +#define CHUNK_ID_FRWR FOURCC_ID_FRWR +#define CHUNK_ID_CNFG FOURCC_ID_CNFG + +#define FW_FOURCC1_OFFSET 0 +#define FW_SIZE_OFFSET 4 +#define FW_FOURCC2_OFFSET 8 +#define FW_PAYLOAD_OFFSET 40 + +#define FW_CHUNK_ID_OFFSET 0 +#define FW_CHUNK_SIZE_OFFSET 4 +#define FW_CHUNK_TGT_START_OFFSET 8 +#define FW_CHUNK_PAYLOAD_LEN_OFFSET 12 +#define FW_CHUNK_SRC_START_OFFSET 16 +#define FW_CHUNK_VERSION_OFFSET 20 +#define FW_CHUNK_ATTR_OFFSET 24 +#define FW_CHUNK_PAYLOAD_OFFSET 32 + +/* Controller requires minimum 300us between commands */ +#define WDT_COMMAND_DELAY_MS 2 +#define WDT_FLASH_WRITE_DELAY_MS 4 + +struct wdt87xx_sys_param { + u16 fw_id; + u16 plat_id; + u16 xmls_id1; + u16 xmls_id2; + u16 phy_ch_x; + u16 phy_ch_y; + u16 phy_w; + u16 phy_h; + u16 scaling_factor; + u32 max_x; + u32 max_y; +}; + +struct wdt87xx_data { + struct i2c_client *client; + struct input_dev *input; + /* Mutex for fw update to prevent concurrent access */ + struct mutex fw_mutex; + struct wdt87xx_sys_param param; + u8 phys[32]; +}; + +static int wdt87xx_i2c_xfer(struct i2c_client *client, + void *txdata, size_t txlen, + void *rxdata, size_t rxlen) +{ + struct i2c_msg msgs[] = { + { + .addr = client->addr, + .flags = 0, + .len = txlen, + .buf = txdata, + }, + { + .addr = client->addr, + .flags = I2C_M_RD, + .len = rxlen, + .buf = rxdata, + }, + }; + int error; + int ret; + + ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs)); + if (ret != ARRAY_SIZE(msgs)) { + error = ret < 0 ? ret : -EIO; + dev_err(&client->dev, "%s: i2c transfer failed: %d\n", + __func__, error); + return error; + } + + return 0; +} + +static int wdt87xx_get_string(struct i2c_client *client, u8 str_idx, + u8 *buf, size_t len) +{ + u8 tx_buf[] = { 0x22, 0x00, 0x13, 0x0E, str_idx, 0x23, 0x00 }; + u8 rx_buf[PKT_WRITE_SIZE]; + size_t rx_len = len + 2; + int error; + + if (rx_len > sizeof(rx_buf)) + return -EINVAL; + + error = wdt87xx_i2c_xfer(client, tx_buf, sizeof(tx_buf), + rx_buf, rx_len); + if (error) { + dev_err(&client->dev, "get string failed: %d\n", error); + return error; + } + + if (rx_buf[1] != 0x03) { + dev_err(&client->dev, "unexpected response to get string: %d\n", + rx_buf[1]); + return -EINVAL; + } + + rx_len = min_t(size_t, len, rx_buf[0]); + memcpy(buf, &rx_buf[2], rx_len); + + mdelay(WDT_COMMAND_DELAY_MS); + + return 0; +} + +static int wdt87xx_get_feature(struct i2c_client *client, + u8 *buf, size_t buf_size) +{ + u8 tx_buf[8]; + u8 rx_buf[PKT_WRITE_SIZE]; + size_t tx_len = 0; + size_t rx_len = buf_size + 2; + int error; + + if (rx_len > sizeof(rx_buf)) + return -EINVAL; + + /* Get feature command packet */ + tx_buf[tx_len++] = 0x22; + tx_buf[tx_len++] = 0x00; + if (buf[CMD_REPORT_ID_OFFSET] > 0xF) { + tx_buf[tx_len++] = 0x30; + tx_buf[tx_len++] = 0x02; + tx_buf[tx_len++] = buf[CMD_REPORT_ID_OFFSET]; + } else { + tx_buf[tx_len++] = 0x30 | buf[CMD_REPORT_ID_OFFSET]; + tx_buf[tx_len++] = 0x02; + } + tx_buf[tx_len++] = 0x23; + tx_buf[tx_len++] = 0x00; + + error = wdt87xx_i2c_xfer(client, tx_buf, tx_len, rx_buf, rx_len); + if (error) { + dev_err(&client->dev, "get feature failed: %d\n", error); + return error; + } + + rx_len = min_t(size_t, buf_size, get_unaligned_le16(rx_buf)); + memcpy(buf, &rx_buf[2], rx_len); + + mdelay(WDT_COMMAND_DELAY_MS); + + return 0; +} + +static int wdt87xx_set_feature(struct i2c_client *client, + const u8 *buf, size_t buf_size) +{ + u8 tx_buf[PKT_WRITE_SIZE]; + int tx_len = 0; + int error; + + /* Set feature command packet */ + tx_buf[tx_len++] = 0x22; + tx_buf[tx_len++] = 0x00; + if (buf[CMD_REPORT_ID_OFFSET] > 0xF) { + tx_buf[tx_len++] = 0x30; + tx_buf[tx_len++] = 0x03; + tx_buf[tx_len++] = buf[CMD_REPORT_ID_OFFSET]; + } else { + tx_buf[tx_len++] = 0x30 | buf[CMD_REPORT_ID_OFFSET]; + tx_buf[tx_len++] = 0x03; + } + tx_buf[tx_len++] = 0x23; + tx_buf[tx_len++] = 0x00; + tx_buf[tx_len++] = (buf_size & 0xFF); + tx_buf[tx_len++] = ((buf_size & 0xFF00) >> 8); + + if (tx_len + buf_size > sizeof(tx_buf)) + return -EINVAL; + + memcpy(&tx_buf[tx_len], buf, buf_size); + tx_len += buf_size; + + error = i2c_master_send(client, tx_buf, tx_len); + if (error < 0) { + dev_err(&client->dev, "set feature failed: %d\n", error); + return error; + } + + mdelay(WDT_COMMAND_DELAY_MS); + + return 0; +} + +static int wdt87xx_send_command(struct i2c_client *client, int cmd, int value) +{ + u8 cmd_buf[CMD_BUF_SIZE]; + + /* Set the command packet */ + cmd_buf[CMD_REPORT_ID_OFFSET] = VND_REQ_WRITE; + cmd_buf[CMD_TYPE_OFFSET] = VND_SET_COMMAND_DATA; + put_unaligned_le16((u16)cmd, &cmd_buf[CMD_INDEX_OFFSET]); + + switch (cmd) { + case VND_CMD_START: + case VND_CMD_STOP: + case VND_CMD_RESET: + /* Mode selector */ + put_unaligned_le32((value & 0xFF), &cmd_buf[CMD_LENGTH_OFFSET]); + break; + + case VND_CMD_SFLCK: + put_unaligned_le16(CMD_SFLCK_KEY, &cmd_buf[CMD_KEY_OFFSET]); + break; + + case VND_CMD_SFUNL: + put_unaligned_le16(CMD_SFUNL_KEY, &cmd_buf[CMD_KEY_OFFSET]); + break; + + case VND_CMD_ERASE: + case VND_SET_CHECKSUM_CALC: + case VND_SET_CHECKSUM_LENGTH: + put_unaligned_le32(value, &cmd_buf[CMD_KEY_OFFSET]); + break; + + default: + cmd_buf[CMD_REPORT_ID_OFFSET] = 0; + dev_err(&client->dev, "Invalid command: %d\n", cmd); + return -EINVAL; + } + + return wdt87xx_set_feature(client, cmd_buf, sizeof(cmd_buf)); +} + +static int wdt87xx_sw_reset(struct i2c_client *client) +{ + int error; + + dev_dbg(&client->dev, "resetting device now\n"); + + error = wdt87xx_send_command(client, VND_CMD_RESET, 0); + if (error) { + dev_err(&client->dev, "reset failed\n"); + return error; + } + + /* Wait the device to be ready */ + msleep(200); + + return 0; +} + +static const void *wdt87xx_get_fw_chunk(const struct firmware *fw, u32 id) +{ + size_t pos = FW_PAYLOAD_OFFSET; + u32 chunk_id, chunk_size; + + while (pos < fw->size) { + chunk_id = get_unaligned_le32(fw->data + + pos + FW_CHUNK_ID_OFFSET); + if (chunk_id == id) + return fw->data + pos; + + chunk_size = get_unaligned_le32(fw->data + + pos + FW_CHUNK_SIZE_OFFSET); + pos += chunk_size + 2 * sizeof(u32); /* chunk ID + size */ + } + + return NULL; +} + +static int wdt87xx_get_sysparam(struct i2c_client *client, + struct wdt87xx_sys_param *param) +{ + u8 buf[PKT_READ_SIZE]; + int error; + + error = wdt87xx_get_string(client, STRIDX_PARAMETERS, buf, 34); + if (error) { + dev_err(&client->dev, "failed to get parameters\n"); + return error; + } + + param->xmls_id1 = get_unaligned_le16(buf + CTL_PARAM_OFFSET_XMLS_ID1); + param->xmls_id2 = get_unaligned_le16(buf + CTL_PARAM_OFFSET_XMLS_ID2); + param->phy_ch_x = get_unaligned_le16(buf + CTL_PARAM_OFFSET_PHY_CH_X); + param->phy_ch_y = get_unaligned_le16(buf + CTL_PARAM_OFFSET_PHY_CH_Y); + param->phy_w = get_unaligned_le16(buf + CTL_PARAM_OFFSET_PHY_W) / 10; + param->phy_h = get_unaligned_le16(buf + CTL_PARAM_OFFSET_PHY_H) / 10; + + /* Get the scaling factor of pixel to logical coordinate */ + param->scaling_factor = + get_unaligned_le16(buf + CTL_PARAM_OFFSET_FACTOR); + + param->max_x = MAX_UNIT_AXIS; + param->max_y = DIV_ROUND_CLOSEST(MAX_UNIT_AXIS * param->phy_h, + param->phy_w); + + error = wdt87xx_get_string(client, STRIDX_PLATFORM_ID, buf, 8); + if (error) { + dev_err(&client->dev, "failed to get platform id\n"); + return error; + } + + param->plat_id = buf[1]; + + buf[0] = 0xf2; + error = wdt87xx_get_feature(client, buf, 16); + if (error) { + dev_err(&client->dev, "failed to get firmware id\n"); + return error; + } + + if (buf[0] != 0xf2) { + dev_err(&client->dev, "wrong id of fw response: 0x%x\n", + buf[0]); + return -EINVAL; + } + + param->fw_id = get_unaligned_le16(&buf[1]); + + dev_info(&client->dev, + "fw_id: 0x%x, plat_id: 0x%x, xml_id1: %04x, xml_id2: %04x\n", + param->fw_id, param->plat_id, + param->xmls_id1, param->xmls_id2); + + return 0; +} + +static int wdt87xx_validate_firmware(struct wdt87xx_data *wdt, + const struct firmware *fw) +{ + const void *fw_chunk; + u32 data1, data2; + u32 size; + u8 fw_chip_id; + u8 chip_id; + + data1 = get_unaligned_le32(fw->data + FW_FOURCC1_OFFSET); + data2 = get_unaligned_le32(fw->data + FW_FOURCC2_OFFSET); + if (data1 != FOURCC_ID_RIFF || data2 != FOURCC_ID_WHIF) { + dev_err(&wdt->client->dev, "check fw tag failed\n"); + return -EINVAL; + } + + size = get_unaligned_le32(fw->data + FW_SIZE_OFFSET); + if (size != fw->size) { + dev_err(&wdt->client->dev, + "fw size mismatch: expected %d, actual %zu\n", + size, fw->size); + return -EINVAL; + } + + /* + * Get the chip_id from the firmware. Make sure that it is the + * right controller to do the firmware and config update. + */ + fw_chunk = wdt87xx_get_fw_chunk(fw, CHUNK_ID_FRWR); + if (!fw_chunk) { + dev_err(&wdt->client->dev, + "unable to locate firmware chunk\n"); + return -EINVAL; + } + + fw_chip_id = (get_unaligned_le32(fw_chunk + + FW_CHUNK_VERSION_OFFSET) >> 12) & 0xF; + chip_id = (wdt->param.fw_id >> 12) & 0xF; + + if (fw_chip_id != chip_id) { + dev_err(&wdt->client->dev, + "fw version mismatch: fw %d vs. chip %d\n", + fw_chip_id, chip_id); + return -ENODEV; + } + + return 0; +} + +static int wdt87xx_validate_fw_chunk(const void *data, int id) +{ + if (id == CHUNK_ID_FRWR) { + u32 fw_id; + + fw_id = get_unaligned_le32(data + FW_CHUNK_PAYLOAD_OFFSET); + if (fw_id != WDT_FIRMWARE_ID) + return -EINVAL; + } + + return 0; +} + +static int wdt87xx_write_data(struct i2c_client *client, const char *data, + u32 address, int length) +{ + u16 packet_size; + int count = 0; + int error; + u8 pkt_buf[PKT_BUF_SIZE]; + + /* Address and length should be 4 bytes aligned */ + if ((address & 0x3) != 0 || (length & 0x3) != 0) { + dev_err(&client->dev, + "addr & len must be 4 bytes aligned %x, %x\n", + address, length); + return -EINVAL; + } + + while (length) { + packet_size = min(length, PACKET_SIZE); + + pkt_buf[CMD_REPORT_ID_OFFSET] = VND_REQ_WRITE; + pkt_buf[CMD_TYPE_OFFSET] = VND_SET_DATA; + put_unaligned_le16(packet_size, &pkt_buf[CMD_INDEX_OFFSET]); + put_unaligned_le32(address, &pkt_buf[CMD_LENGTH_OFFSET]); + memcpy(&pkt_buf[CMD_DATA_OFFSET], data, packet_size); + + error = wdt87xx_set_feature(client, pkt_buf, sizeof(pkt_buf)); + if (error) + return error; + + length -= packet_size; + data += packet_size; + address += packet_size; + + /* Wait for the controller to finish the write */ + mdelay(WDT_FLASH_WRITE_DELAY_MS); + + if ((++count % 32) == 0) { + /* Delay for fw to clear watch dog */ + msleep(20); + } + } + + return 0; +} + +static u16 misr(u16 cur_value, u8 new_value) +{ + u32 a, b; + u32 bit0; + u32 y; + + a = cur_value; + b = new_value; + bit0 = a ^ (b & 1); + bit0 ^= a >> 1; + bit0 ^= a >> 2; + bit0 ^= a >> 4; + bit0 ^= a >> 5; + bit0 ^= a >> 7; + bit0 ^= a >> 11; + bit0 ^= a >> 15; + y = (a << 1) ^ b; + y = (y & ~1) | (bit0 & 1); + + return (u16)y; +} + +static u16 wdt87xx_calculate_checksum(const u8 *data, size_t length) +{ + u16 checksum = 0; + size_t i; + + for (i = 0; i < length; i++) + checksum = misr(checksum, data[i]); + + return checksum; +} + +static int wdt87xx_get_checksum(struct i2c_client *client, u16 *checksum, + u32 address, int length) +{ + int error; + int time_delay; + u8 pkt_buf[PKT_BUF_SIZE]; + u8 cmd_buf[CMD_BUF_SIZE]; + + error = wdt87xx_send_command(client, VND_SET_CHECKSUM_LENGTH, length); + if (error) { + dev_err(&client->dev, "failed to set checksum length\n"); + return error; + } + + error = wdt87xx_send_command(client, VND_SET_CHECKSUM_CALC, address); + if (error) { + dev_err(&client->dev, "failed to set checksum address\n"); + return error; + } + + /* Wait the operation to complete */ + time_delay = DIV_ROUND_UP(length, 1024); + msleep(time_delay * 30); + + memset(cmd_buf, 0, sizeof(cmd_buf)); + cmd_buf[CMD_REPORT_ID_OFFSET] = VND_REQ_READ; + cmd_buf[CMD_TYPE_OFFSET] = VND_GET_CHECKSUM; + error = wdt87xx_set_feature(client, cmd_buf, sizeof(cmd_buf)); + if (error) { + dev_err(&client->dev, "failed to request checksum\n"); + return error; + } + + memset(pkt_buf, 0, sizeof(pkt_buf)); + pkt_buf[CMD_REPORT_ID_OFFSET] = VND_READ_DATA; + error = wdt87xx_get_feature(client, pkt_buf, sizeof(pkt_buf)); + if (error) { + dev_err(&client->dev, "failed to read checksum\n"); + return error; + } + + *checksum = get_unaligned_le16(&pkt_buf[CMD_DATA_OFFSET]); + return 0; +} + +static int wdt87xx_write_firmware(struct i2c_client *client, const void *chunk) +{ + u32 start_addr = get_unaligned_le32(chunk + FW_CHUNK_TGT_START_OFFSET); + u32 size = get_unaligned_le32(chunk + FW_CHUNK_PAYLOAD_LEN_OFFSET); + const void *data = chunk + FW_CHUNK_PAYLOAD_OFFSET; + int error; + int err1; + int page_size; + int retry = 0; + u16 device_checksum, firmware_checksum; + + dev_dbg(&client->dev, "start 4k page program\n"); + + error = wdt87xx_send_command(client, VND_CMD_STOP, MODE_STOP); + if (error) { + dev_err(&client->dev, "stop report mode failed\n"); + return error; + } + + error = wdt87xx_send_command(client, VND_CMD_SFUNL, 0); + if (error) { + dev_err(&client->dev, "unlock failed\n"); + goto out_enable_reporting; + } + + mdelay(10); + + while (size) { + dev_dbg(&client->dev, "%s: %x, %x\n", __func__, + start_addr, size); + + page_size = min_t(u32, size, PG_SIZE); + size -= page_size; + + for (retry = 0; retry < MAX_RETRIES; retry++) { + error = wdt87xx_send_command(client, VND_CMD_ERASE, + start_addr); + if (error) { + dev_err(&client->dev, + "erase failed at %#08x\n", start_addr); + break; + } + + msleep(50); + + error = wdt87xx_write_data(client, data, start_addr, + page_size); + if (error) { + dev_err(&client->dev, + "write failed at %#08x (%d bytes)\n", + start_addr, page_size); + break; + } + + error = wdt87xx_get_checksum(client, &device_checksum, + start_addr, page_size); + if (error) { + dev_err(&client->dev, + "failed to retrieve checksum for %#08x (len: %d)\n", + start_addr, page_size); + break; + } + + firmware_checksum = + wdt87xx_calculate_checksum(data, page_size); + + if (device_checksum == firmware_checksum) + break; + + dev_err(&client->dev, + "checksum fail: %d vs %d, retry %d\n", + device_checksum, firmware_checksum, retry); + } + + if (retry == MAX_RETRIES) { + dev_err(&client->dev, "page write failed\n"); + error = -EIO; + goto out_lock_device; + } + + start_addr = start_addr + page_size; + data = data + page_size; + } + +out_lock_device: + err1 = wdt87xx_send_command(client, VND_CMD_SFLCK, 0); + if (err1) + dev_err(&client->dev, "lock failed\n"); + + mdelay(10); + +out_enable_reporting: + err1 = wdt87xx_send_command(client, VND_CMD_START, 0); + if (err1) + dev_err(&client->dev, "start to report failed\n"); + + return error ? error : err1; +} + +static int wdt87xx_load_chunk(struct i2c_client *client, + const struct firmware *fw, u32 ck_id) +{ + const void *chunk; + int error; + + chunk = wdt87xx_get_fw_chunk(fw, ck_id); + if (!chunk) { + dev_err(&client->dev, "unable to locate chunk (type %d)\n", + ck_id); + return -EINVAL; + } + + error = wdt87xx_validate_fw_chunk(chunk, ck_id); + if (error) { + dev_err(&client->dev, "invalid chunk (type %d): %d\n", + ck_id, error); + return error; + } + + error = wdt87xx_write_firmware(client, chunk); + if (error) { + dev_err(&client->dev, + "failed to write fw chunk (type %d): %d\n", + ck_id, error); + return error; + } + + return 0; +} + +static int wdt87xx_do_update_firmware(struct i2c_client *client, + const struct firmware *fw, + unsigned int chunk_id) +{ + struct wdt87xx_data *wdt = i2c_get_clientdata(client); + int error; + + error = wdt87xx_validate_firmware(wdt, fw); + if (error) + return error; + + error = mutex_lock_interruptible(&wdt->fw_mutex); + if (error) + return error; + + disable_irq(client->irq); + + error = wdt87xx_load_chunk(client, fw, chunk_id); + if (error) { + dev_err(&client->dev, + "firmware load failed (type: %d): %d\n", + chunk_id, error); + goto out; + } + + error = wdt87xx_sw_reset(client); + if (error) { + dev_err(&client->dev, "soft reset failed: %d\n", error); + goto out; + } + + /* Refresh the parameters */ + error = wdt87xx_get_sysparam(client, &wdt->param); + if (error) + dev_err(&client->dev, + "failed to refresh system paramaters: %d\n", error); +out: + enable_irq(client->irq); + mutex_unlock(&wdt->fw_mutex); + + return error ? error : 0; +} + +static int wdt87xx_update_firmware(struct device *dev, + const char *fw_name, unsigned int chunk_id) +{ + struct i2c_client *client = to_i2c_client(dev); + const struct firmware *fw; + int error; + + error = request_firmware(&fw, fw_name, dev); + if (error) { + dev_err(&client->dev, "unable to retrieve firmware %s: %d\n", + fw_name, error); + return error; + } + + error = wdt87xx_do_update_firmware(client, fw, chunk_id); + + release_firmware(fw); + + return error ? error : 0; +} + +static ssize_t config_csum_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + struct wdt87xx_data *wdt = i2c_get_clientdata(client); + u32 cfg_csum; + + cfg_csum = wdt->param.xmls_id1; + cfg_csum = (cfg_csum << 16) | wdt->param.xmls_id2; + + return scnprintf(buf, PAGE_SIZE, "%x\n", cfg_csum); +} + +static ssize_t fw_version_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + struct wdt87xx_data *wdt = i2c_get_clientdata(client); + + return scnprintf(buf, PAGE_SIZE, "%x\n", wdt->param.fw_id); +} + +static ssize_t plat_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + struct wdt87xx_data *wdt = i2c_get_clientdata(client); + + return scnprintf(buf, PAGE_SIZE, "%x\n", wdt->param.plat_id); +} + +static ssize_t update_config_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int error; + + error = wdt87xx_update_firmware(dev, WDT87XX_CFG_NAME, CHUNK_ID_CNFG); + + return error ? error : count; +} + +static ssize_t update_fw_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int error; + + error = wdt87xx_update_firmware(dev, WDT87XX_FW_NAME, CHUNK_ID_FRWR); + + return error ? error : count; +} + +static DEVICE_ATTR_RO(config_csum); +static DEVICE_ATTR_RO(fw_version); +static DEVICE_ATTR_RO(plat_id); +static DEVICE_ATTR_WO(update_config); +static DEVICE_ATTR_WO(update_fw); + +static struct attribute *wdt87xx_attrs[] = { + &dev_attr_config_csum.attr, + &dev_attr_fw_version.attr, + &dev_attr_plat_id.attr, + &dev_attr_update_config.attr, + &dev_attr_update_fw.attr, + NULL +}; + +static const struct attribute_group wdt87xx_attr_group = { + .attrs = wdt87xx_attrs, +}; + +static void wdt87xx_report_contact(struct input_dev *input, + struct wdt87xx_sys_param *param, + u8 *buf) +{ + int finger_id; + u32 x, y, w; + u8 p; + + finger_id = (buf[FINGER_EV_V1_OFFSET_ID] >> 3) - 1; + if (finger_id < 0) + return; + + /* Check if this is an active contact */ + if (!(buf[FINGER_EV_V1_OFFSET_ID] & 0x1)) + return; + + w = buf[FINGER_EV_V1_OFFSET_W]; + w *= param->scaling_factor; + + p = buf[FINGER_EV_V1_OFFSET_P]; + + x = get_unaligned_le16(buf + FINGER_EV_V1_OFFSET_X); + + y = get_unaligned_le16(buf + FINGER_EV_V1_OFFSET_Y); + y = DIV_ROUND_CLOSEST(y * param->phy_h, param->phy_w); + + /* Refuse incorrect coordinates */ + if (x > param->max_x || y > param->max_y) + return; + + dev_dbg(input->dev.parent, "tip on (%d), x(%d), y(%d)\n", + finger_id, x, y); + + input_mt_slot(input, finger_id); + input_mt_report_slot_state(input, MT_TOOL_FINGER, 1); + input_report_abs(input, ABS_MT_TOUCH_MAJOR, w); + input_report_abs(input, ABS_MT_PRESSURE, p); + input_report_abs(input, ABS_MT_POSITION_X, x); + input_report_abs(input, ABS_MT_POSITION_Y, y); +} + +static irqreturn_t wdt87xx_ts_interrupt(int irq, void *dev_id) +{ + struct wdt87xx_data *wdt = dev_id; + struct i2c_client *client = wdt->client; + int i, fingers; + int error; + u8 raw_buf[WDT_V1_RAW_BUF_COUNT] = {0}; + + error = i2c_master_recv(client, raw_buf, WDT_V1_RAW_BUF_COUNT); + if (error < 0) { + dev_err(&client->dev, "read v1 raw data failed: %d\n", error); + goto irq_exit; + } + + fingers = raw_buf[TOUCH_PK_V1_OFFSET_FNGR_NUM]; + if (!fingers) + goto irq_exit; + + for (i = 0; i < WDT_MAX_FINGER; i++) + wdt87xx_report_contact(wdt->input, + &wdt->param, + &raw_buf[TOUCH_PK_V1_OFFSET_EVENT + + i * FINGER_EV_V1_SIZE]); + + input_mt_sync_frame(wdt->input); + input_sync(wdt->input); + +irq_exit: + return IRQ_HANDLED; +} + +static int wdt87xx_ts_create_input_device(struct wdt87xx_data *wdt) +{ + struct device *dev = &wdt->client->dev; + struct input_dev *input; + unsigned int res = DIV_ROUND_CLOSEST(MAX_UNIT_AXIS, wdt->param.phy_w); + int error; + + input = devm_input_allocate_device(dev); + if (!input) { + dev_err(dev, "failed to allocate input device\n"); + return -ENOMEM; + } + wdt->input = input; + + input->name = "WDT87xx Touchscreen"; + input->id.bustype = BUS_I2C; + input->phys = wdt->phys; + + input_set_abs_params(input, ABS_MT_POSITION_X, 0, + wdt->param.max_x, 0, 0); + input_set_abs_params(input, ABS_MT_POSITION_Y, 0, + wdt->param.max_y, 0, 0); + input_abs_set_res(input, ABS_MT_POSITION_X, res); + input_abs_set_res(input, ABS_MT_POSITION_Y, res); + + input_set_abs_params(input, ABS_MT_TOUCH_MAJOR, + 0, wdt->param.max_x, 0, 0); + input_set_abs_params(input, ABS_MT_PRESSURE, 0, 0xFF, 0, 0); + + input_mt_init_slots(input, WDT_MAX_FINGER, + INPUT_MT_DIRECT | INPUT_MT_DROP_UNUSED); + + error = input_register_device(input); + if (error) { + dev_err(dev, "failed to register input device: %d\n", error); + return error; + } + + return 0; +} + +static int wdt87xx_ts_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct wdt87xx_data *wdt; + int error; + + dev_dbg(&client->dev, "adapter=%d, client irq: %d\n", + client->adapter->nr, client->irq); + + /* Check if the I2C function is ok in this adaptor */ + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) + return -ENXIO; + + wdt = devm_kzalloc(&client->dev, sizeof(*wdt), GFP_KERNEL); + if (!wdt) + return -ENOMEM; + + wdt->client = client; + mutex_init(&wdt->fw_mutex); + i2c_set_clientdata(client, wdt); + + snprintf(wdt->phys, sizeof(wdt->phys), "i2c-%u-%04x/input0", + client->adapter->nr, client->addr); + + error = wdt87xx_get_sysparam(client, &wdt->param); + if (error) + return error; + + error = wdt87xx_ts_create_input_device(wdt); + if (error) + return error; + + error = devm_request_threaded_irq(&client->dev, client->irq, + NULL, wdt87xx_ts_interrupt, + IRQF_ONESHOT, + client->name, wdt); + if (error) { + dev_err(&client->dev, "request irq failed: %d\n", error); + return error; + } + + error = sysfs_create_group(&client->dev.kobj, &wdt87xx_attr_group); + if (error) { + dev_err(&client->dev, "create sysfs failed: %d\n", error); + return error; + } + + return 0; +} + +static int wdt87xx_ts_remove(struct i2c_client *client) +{ + sysfs_remove_group(&client->dev.kobj, &wdt87xx_attr_group); + + return 0; +} + +static int __maybe_unused wdt87xx_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + int error; + + disable_irq(client->irq); + + error = wdt87xx_send_command(client, VND_CMD_STOP, MODE_IDLE); + if (error) { + enable_irq(client->irq); + dev_err(&client->dev, + "failed to stop device when suspending: %d\n", + error); + return error; + } + + return 0; +} + +static int __maybe_unused wdt87xx_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + int error; + + /* + * The chip may have been reset while system is resuming, + * give it some time to settle. + */ + mdelay(100); + + error = wdt87xx_send_command(client, VND_CMD_START, 0); + if (error) + dev_err(&client->dev, + "failed to start device when resuming: %d\n", + error); + + enable_irq(client->irq); + + return 0; +} + +static SIMPLE_DEV_PM_OPS(wdt87xx_pm_ops, wdt87xx_suspend, wdt87xx_resume); + +static const struct i2c_device_id wdt87xx_dev_id[] = { + { WDT87XX_NAME, 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, wdt87xx_dev_id); + +static const struct acpi_device_id wdt87xx_acpi_id[] = { + { "WDHT0001", 0 }, + { } +}; +MODULE_DEVICE_TABLE(acpi, wdt87xx_acpi_id); + +static struct i2c_driver wdt87xx_driver = { + .probe = wdt87xx_ts_probe, + .remove = wdt87xx_ts_remove, + .id_table = wdt87xx_dev_id, + .driver = { + .name = WDT87XX_NAME, + .pm = &wdt87xx_pm_ops, + .acpi_match_table = ACPI_PTR(wdt87xx_acpi_id), + }, +}; +module_i2c_driver(wdt87xx_driver); + +MODULE_AUTHOR("HN Chen <hn.chen@weidahitech.com>"); +MODULE_DESCRIPTION("WeidaHiTech WDT87XX Touchscreen driver"); +MODULE_VERSION(WDT87XX_DRV_VER); +MODULE_LICENSE("GPL"); diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 8d7e1c8b6d56..4dd88264dff5 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -1055,7 +1055,7 @@ gic_acpi_parse_madt_cpu(struct acpi_subtable_header *header, processor = (struct acpi_madt_generic_interrupt *)header; - if (BAD_MADT_ENTRY(processor, end)) + if (BAD_MADT_GICC_ENTRY(processor, end)) return -EINVAL; /* diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index 4400edd1a6c7..b7d54d428b5e 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -257,16 +257,6 @@ int gic_get_c0_fdc_int(void) return MIPS_CPU_IRQ_BASE + cp0_fdc_irq; } - /* - * Some cores claim the FDC is routable but it doesn't actually seem to - * be connected. - */ - switch (current_cpu_type()) { - case CPU_INTERAPTIV: - case CPU_PROAPTIV: - return -1; - } - return irq_create_mapping(gic_irq_domain, GIC_LOCAL_TO_HWIRQ(GIC_LOCAL_INT_FDC)); } diff --git a/drivers/irqchip/irqchip.h b/drivers/irqchip/irqchip.h index 0f6486d4f1b0..0f67ae32464f 100644 --- a/drivers/irqchip/irqchip.h +++ b/drivers/irqchip/irqchip.h @@ -8,21 +8,4 @@ * warranty of any kind, whether express or implied. */ -#ifndef _IRQCHIP_H -#define _IRQCHIP_H - -#include <linux/of.h> - -/* - * This macro must be used by the different irqchip drivers to declare - * the association between their DT compatible string and their - * initialization function. - * - * @name: name that must be unique accross all IRQCHIP_DECLARE of the - * same file. - * @compstr: compatible string of the irqchip driver - * @fn: initialization function - */ -#define IRQCHIP_DECLARE(name, compat, fn) OF_DECLARE_2(irqchip, name, compat, fn) - -#endif +#include <linux/irqchip.h> diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 135a0907e9de..ed2346ddf4c9 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -839,7 +839,7 @@ static void bitmap_file_kick(struct bitmap *bitmap) if (bitmap->storage.file) { path = kmalloc(PAGE_SIZE, GFP_KERNEL); if (path) - ptr = d_path(&bitmap->storage.file->f_path, + ptr = file_path(bitmap->storage.file, path, PAGE_SIZE); printk(KERN_ALERT @@ -1927,7 +1927,7 @@ void bitmap_status(struct seq_file *seq, struct bitmap *bitmap) chunk_kb ? "KB" : "B"); if (bitmap->storage.file) { seq_printf(seq, ", file: "); - seq_path(seq, &bitmap->storage.file->f_path, " \t\n"); + seq_file_path(seq, bitmap->storage.file, " \t\n"); } seq_printf(seq, "\n"); diff --git a/drivers/md/md.c b/drivers/md/md.c index df92d30ca054..d429c30cd514 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5766,7 +5766,7 @@ static int get_bitmap_file(struct mddev *mddev, void __user * arg) /* bitmap disabled, zero the first byte and copy out */ if (!mddev->bitmap_info.file) file->pathname[0] = '\0'; - else if ((ptr = d_path(&mddev->bitmap_info.file->f_path, + else if ((ptr = file_path(mddev->bitmap_info.file, file->pathname, sizeof(file->pathname))), IS_ERR(ptr)) err = PTR_ERR(ptr); diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c index 8911e51d410a..3a27a84ad3ec 100644 --- a/drivers/memory/omap-gpmc.c +++ b/drivers/memory/omap-gpmc.c @@ -2074,14 +2074,8 @@ static int gpmc_probe_dt(struct platform_device *pdev) ret = gpmc_probe_nand_child(pdev, child); else if (of_node_cmp(child->name, "onenand") == 0) ret = gpmc_probe_onenand_child(pdev, child); - else if (of_node_cmp(child->name, "ethernet") == 0 || - of_node_cmp(child->name, "nor") == 0 || - of_node_cmp(child->name, "uart") == 0) + else ret = gpmc_probe_generic_child(pdev, child); - - if (WARN(ret < 0, "%s: probing gpmc child %s failed\n", - __func__, child->full_name)) - of_node_put(child); } return 0; diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index 0c77240ae2fc..729e0851167d 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -23,6 +23,7 @@ struct cxl_context *cxl_dev_context_init(struct pci_dev *dev) afu = cxl_pci_to_afu(dev); + get_device(&afu->dev); ctx = cxl_context_alloc(); if (IS_ERR(ctx)) return ctx; @@ -31,6 +32,7 @@ struct cxl_context *cxl_dev_context_init(struct pci_dev *dev) rc = cxl_context_init(ctx, afu, false, NULL); if (rc) { kfree(ctx); + put_device(&afu->dev); return ERR_PTR(-ENOMEM); } cxl_assign_psn_space(ctx); @@ -60,6 +62,8 @@ int cxl_release_context(struct cxl_context *ctx) if (ctx->status != CLOSED) return -EBUSY; + put_device(&ctx->afu->dev); + cxl_context_free(ctx); return 0; @@ -159,7 +163,6 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed, } ctx->status = STARTED; - get_device(&ctx->afu->dev); out: mutex_unlock(&ctx->status_mutex); return rc; @@ -175,12 +178,7 @@ EXPORT_SYMBOL_GPL(cxl_process_element); /* Stop a context. Returns 0 on success, otherwise -Errno */ int cxl_stop_context(struct cxl_context *ctx) { - int rc; - - rc = __detach_context(ctx); - if (!rc) - put_device(&ctx->afu->dev); - return rc; + return __detach_context(ctx); } EXPORT_SYMBOL_GPL(cxl_stop_context); diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index 2a4c80ac322a..1287148629c0 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -113,11 +113,11 @@ static int cxl_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (ctx->afu->current_mode == CXL_MODE_DEDICATED) { area = ctx->afu->psn_phys; - if (offset > ctx->afu->adapter->ps_size) + if (offset >= ctx->afu->adapter->ps_size) return VM_FAULT_SIGBUS; } else { area = ctx->psn_phys; - if (offset > ctx->psn_size) + if (offset >= ctx->psn_size) return VM_FAULT_SIGBUS; } @@ -145,8 +145,16 @@ static const struct vm_operations_struct cxl_mmap_vmops = { */ int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma) { + u64 start = vma->vm_pgoff << PAGE_SHIFT; u64 len = vma->vm_end - vma->vm_start; - len = min(len, ctx->psn_size); + + if (ctx->afu->current_mode == CXL_MODE_DEDICATED) { + if (start + len > ctx->afu->adapter->ps_size) + return -EINVAL; + } else { + if (start + len > ctx->psn_size) + return -EINVAL; + } if (ctx->afu->current_mode != CXL_MODE_DEDICATED) { /* make sure there is a valid per process space for this AFU */ diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index 833348e2c9cb..4a164ab8b35a 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -73,7 +73,7 @@ static inline void cxl_slbia_core(struct mm_struct *mm) spin_lock(&adapter->afu_list_lock); for (slice = 0; slice < adapter->slices; slice++) { afu = adapter->afu[slice]; - if (!afu->enabled) + if (!afu || !afu->enabled) continue; rcu_read_lock(); idr_for_each_entry(&afu->contexts_idr, ctx, id) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index c68ef5806dbe..32ad09705949 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -539,7 +539,7 @@ err: static void cxl_unmap_slice_regs(struct cxl_afu *afu) { - if (afu->p1n_mmio) + if (afu->p2n_mmio) iounmap(afu->p2n_mmio); if (afu->p1n_mmio) iounmap(afu->p1n_mmio); diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index b1d1983a84a5..2eba002b580b 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -112,9 +112,10 @@ static int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn, unsigned long addr; phb = pci_bus_to_host(bus); - afu = (struct cxl_afu *)phb->private_data; if (phb == NULL) return PCIBIOS_DEVICE_NOT_FOUND; + afu = (struct cxl_afu *)phb->private_data; + if (cxl_pcie_cfg_record(bus->number, devfn) > afu->crs_num) return PCIBIOS_DEVICE_NOT_FOUND; if (offset >= (unsigned long)phb->cfg_data) diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 357b6ae4d207..458aa5a09c52 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -552,22 +552,6 @@ void mei_cl_bus_rx_event(struct mei_cl *cl) schedule_work(&device->event_work); } -void mei_cl_bus_remove_devices(struct mei_device *dev) -{ - struct mei_cl *cl, *next; - - mutex_lock(&dev->device_lock); - list_for_each_entry_safe(cl, next, &dev->device_list, device_link) { - if (cl->device) - mei_cl_remove_device(cl->device); - - list_del(&cl->device_link); - mei_cl_unlink(cl); - kfree(cl); - } - mutex_unlock(&dev->device_lock); -} - int __init mei_cl_bus_init(void) { return bus_register(&mei_cl_bus_type); diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c index 94514b2c7a50..00c3865ca3b1 100644 --- a/drivers/misc/mei/init.c +++ b/drivers/misc/mei/init.c @@ -333,8 +333,6 @@ void mei_stop(struct mei_device *dev) mei_nfc_host_exit(dev); - mei_cl_bus_remove_devices(dev); - mutex_lock(&dev->device_lock); mei_wd_stop(dev); diff --git a/drivers/misc/mei/nfc.c b/drivers/misc/mei/nfc.c index b983c4ecad38..290ef3037437 100644 --- a/drivers/misc/mei/nfc.c +++ b/drivers/misc/mei/nfc.c @@ -402,11 +402,12 @@ void mei_nfc_host_exit(struct mei_device *dev) cldev->priv_data = NULL; - mutex_lock(&dev->device_lock); /* Need to remove the device here * since mei_nfc_free will unlink the clients */ mei_cl_remove_device(cldev); + + mutex_lock(&dev->device_lock); mei_nfc_free(ndev); mutex_unlock(&dev->device_lock); } diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index 9df2b6801f76..b2b411da297b 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -43,6 +43,7 @@ #include <linux/regulator/consumer.h> #include <linux/pinctrl/consumer.h> #include <linux/pm_runtime.h> +#include <linux/pm_wakeirq.h> #include <linux/platform_data/hsmmc-omap.h> /* OMAP HSMMC Host Controller Registers */ @@ -218,7 +219,6 @@ struct omap_hsmmc_host { unsigned int flags; #define AUTO_CMD23 (1 << 0) /* Auto CMD23 support */ #define HSMMC_SDIO_IRQ_ENABLED (1 << 1) /* SDIO irq enabled */ -#define HSMMC_WAKE_IRQ_ENABLED (1 << 2) struct omap_hsmmc_next next_data; struct omap_hsmmc_platform_data *pdata; @@ -1117,22 +1117,6 @@ static irqreturn_t omap_hsmmc_irq(int irq, void *dev_id) return IRQ_HANDLED; } -static irqreturn_t omap_hsmmc_wake_irq(int irq, void *dev_id) -{ - struct omap_hsmmc_host *host = dev_id; - - /* cirq is level triggered, disable to avoid infinite loop */ - spin_lock(&host->irq_lock); - if (host->flags & HSMMC_WAKE_IRQ_ENABLED) { - disable_irq_nosync(host->wake_irq); - host->flags &= ~HSMMC_WAKE_IRQ_ENABLED; - } - spin_unlock(&host->irq_lock); - pm_request_resume(host->dev); /* no use counter */ - - return IRQ_HANDLED; -} - static void set_sd_bus_power(struct omap_hsmmc_host *host) { unsigned long i; @@ -1665,7 +1649,6 @@ static void omap_hsmmc_enable_sdio_irq(struct mmc_host *mmc, int enable) static int omap_hsmmc_configure_wake_irq(struct omap_hsmmc_host *host) { - struct mmc_host *mmc = host->mmc; int ret; /* @@ -1677,11 +1660,7 @@ static int omap_hsmmc_configure_wake_irq(struct omap_hsmmc_host *host) if (!host->dev->of_node || !host->wake_irq) return -ENODEV; - /* Prevent auto-enabling of IRQ */ - irq_set_status_flags(host->wake_irq, IRQ_NOAUTOEN); - ret = devm_request_irq(host->dev, host->wake_irq, omap_hsmmc_wake_irq, - IRQF_TRIGGER_LOW | IRQF_ONESHOT, - mmc_hostname(mmc), host); + ret = dev_pm_set_dedicated_wake_irq(host->dev, host->wake_irq); if (ret) { dev_err(mmc_dev(host->mmc), "Unable to request wake IRQ\n"); goto err; @@ -1718,7 +1697,7 @@ static int omap_hsmmc_configure_wake_irq(struct omap_hsmmc_host *host) return 0; err_free_irq: - devm_free_irq(host->dev, host->wake_irq, host); + dev_pm_clear_wake_irq(host->dev); err: dev_warn(host->dev, "no SDIO IRQ support, falling back to polling\n"); host->wake_irq = 0; @@ -2007,6 +1986,7 @@ static int omap_hsmmc_probe(struct platform_device *pdev) omap_hsmmc_ops.multi_io_quirk = omap_hsmmc_multi_io_quirk; } + device_init_wakeup(&pdev->dev, true); pm_runtime_enable(host->dev); pm_runtime_get_sync(host->dev); pm_runtime_set_autosuspend_delay(host->dev, MMC_AUTOSUSPEND_DELAY); @@ -2147,6 +2127,7 @@ err_slot_name: if (host->use_reg) omap_hsmmc_reg_put(host); err_irq: + device_init_wakeup(&pdev->dev, false); if (host->tx_chan) dma_release_channel(host->tx_chan); if (host->rx_chan) @@ -2178,6 +2159,7 @@ static int omap_hsmmc_remove(struct platform_device *pdev) pm_runtime_put_sync(host->dev); pm_runtime_disable(host->dev); + device_init_wakeup(&pdev->dev, false); if (host->dbclk) clk_disable_unprepare(host->dbclk); @@ -2204,11 +2186,6 @@ static int omap_hsmmc_suspend(struct device *dev) OMAP_HSMMC_READ(host->base, HCTL) & ~SDBP); } - /* do not wake up due to sdio irq */ - if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) && - !(host->mmc->pm_flags & MMC_PM_WAKE_SDIO_IRQ)) - disable_irq(host->wake_irq); - if (host->dbclk) clk_disable_unprepare(host->dbclk); @@ -2233,11 +2210,6 @@ static int omap_hsmmc_resume(struct device *dev) omap_hsmmc_conf_bus_power(host); omap_hsmmc_protect_card(host); - - if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) && - !(host->mmc->pm_flags & MMC_PM_WAKE_SDIO_IRQ)) - enable_irq(host->wake_irq); - pm_runtime_mark_last_busy(host->dev); pm_runtime_put_autosuspend(host->dev); return 0; @@ -2277,10 +2249,6 @@ static int omap_hsmmc_runtime_suspend(struct device *dev) } pinctrl_pm_select_idle_state(dev); - - WARN_ON(host->flags & HSMMC_WAKE_IRQ_ENABLED); - enable_irq(host->wake_irq); - host->flags |= HSMMC_WAKE_IRQ_ENABLED; } else { pinctrl_pm_select_idle_state(dev); } @@ -2302,11 +2270,6 @@ static int omap_hsmmc_runtime_resume(struct device *dev) spin_lock_irqsave(&host->irq_lock, flags); if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) && (host->flags & HSMMC_SDIO_IRQ_ENABLED)) { - /* sdio irq flag can't change while in runtime suspend */ - if (host->flags & HSMMC_WAKE_IRQ_ENABLED) { - disable_irq_nosync(host->wake_irq); - host->flags &= ~HSMMC_WAKE_IRQ_ENABLED; - } pinctrl_pm_select_default_state(host->dev); diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 019fceffc9e5..c18f9e62a9fa 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -217,8 +217,8 @@ config NET_POLL_CONTROLLER def_bool NETPOLL config NTB_NETDEV - tristate "Virtual Ethernet over NTB" - depends on NTB + tristate "Virtual Ethernet over NTB Transport" + depends on NTB_TRANSPORT config RIONET tristate "RapidIO Ethernet over messaging driver support" @@ -258,6 +258,20 @@ config TUN If you don't know what to use this for, you don't need it. +config TUN_VNET_CROSS_LE + bool "Support for cross-endian vnet headers on little-endian kernels" + default n + ---help--- + This option allows TUN/TAP and MACVTAP device drivers in a + little-endian kernel to parse vnet headers that come from a + big-endian legacy virtio device. + + Userspace programs can control the feature using the TUNSETVNETBE + and TUNGETVNETBE ioctls. + + Unless you have a little-endian system hosting a big-endian virtual + machine with a legacy virtio NIC, you should say N. + config VETH tristate "Virtual ethernet pair device" ---help--- diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index b4b61910f616..3b933bb5a8d5 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -48,15 +48,70 @@ struct macvtap_queue { #define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) #define MACVTAP_VNET_LE 0x80000000 +#define MACVTAP_VNET_BE 0x40000000 + +#ifdef CONFIG_TUN_VNET_CROSS_LE +static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q) +{ + return q->flags & MACVTAP_VNET_BE ? false : + virtio_legacy_is_little_endian(); +} + +static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *sp) +{ + int s = !!(q->flags & MACVTAP_VNET_BE); + + if (put_user(s, sp)) + return -EFAULT; + + return 0; +} + +static long macvtap_set_vnet_be(struct macvtap_queue *q, int __user *sp) +{ + int s; + + if (get_user(s, sp)) + return -EFAULT; + + if (s) + q->flags |= MACVTAP_VNET_BE; + else + q->flags &= ~MACVTAP_VNET_BE; + + return 0; +} +#else +static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q) +{ + return virtio_legacy_is_little_endian(); +} + +static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *argp) +{ + return -EINVAL; +} + +static long macvtap_set_vnet_be(struct macvtap_queue *q, int __user *argp) +{ + return -EINVAL; +} +#endif /* CONFIG_TUN_VNET_CROSS_LE */ + +static inline bool macvtap_is_little_endian(struct macvtap_queue *q) +{ + return q->flags & MACVTAP_VNET_LE || + macvtap_legacy_is_little_endian(q); +} static inline u16 macvtap16_to_cpu(struct macvtap_queue *q, __virtio16 val) { - return __virtio16_to_cpu(q->flags & MACVTAP_VNET_LE, val); + return __virtio16_to_cpu(macvtap_is_little_endian(q), val); } static inline __virtio16 cpu_to_macvtap16(struct macvtap_queue *q, u16 val) { - return __cpu_to_virtio16(q->flags & MACVTAP_VNET_LE, val); + return __cpu_to_virtio16(macvtap_is_little_endian(q), val); } static struct proto macvtap_proto = { @@ -1085,6 +1140,12 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, q->flags &= ~MACVTAP_VNET_LE; return 0; + case TUNGETVNETBE: + return macvtap_get_vnet_be(q, sp); + + case TUNSETVNETBE: + return macvtap_set_vnet_be(q, sp); + case TUNSETOFFLOAD: /* let the user check for future flags */ if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index 5a7e6397440a..3cc316cb7e6b 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -5,6 +5,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2012 Intel Corporation. All rights reserved. + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -13,6 +14,7 @@ * BSD LICENSE * * Copyright(c) 2012 Intel Corporation. All rights reserved. + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -40,7 +42,7 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Intel PCIe NTB Network Linux driver + * PCIe NTB Network Linux driver * * Contact Information: * Jon Mason <jon.mason@intel.com> @@ -50,6 +52,7 @@ #include <linux/module.h> #include <linux/pci.h> #include <linux/ntb.h> +#include <linux/ntb_transport.h> #define NTB_NETDEV_VER "0.7" @@ -70,26 +73,19 @@ struct ntb_netdev { static LIST_HEAD(dev_list); -static void ntb_netdev_event_handler(void *data, int status) +static void ntb_netdev_event_handler(void *data, int link_is_up) { struct net_device *ndev = data; struct ntb_netdev *dev = netdev_priv(ndev); - netdev_dbg(ndev, "Event %x, Link %x\n", status, + netdev_dbg(ndev, "Event %x, Link %x\n", link_is_up, ntb_transport_link_query(dev->qp)); - switch (status) { - case NTB_LINK_DOWN: + if (link_is_up) { + if (ntb_transport_link_query(dev->qp)) + netif_carrier_on(ndev); + } else { netif_carrier_off(ndev); - break; - case NTB_LINK_UP: - if (!ntb_transport_link_query(dev->qp)) - return; - - netif_carrier_on(ndev); - break; - default: - netdev_warn(ndev, "Unsupported event type %d\n", status); } } @@ -160,8 +156,6 @@ static netdev_tx_t ntb_netdev_start_xmit(struct sk_buff *skb, struct ntb_netdev *dev = netdev_priv(ndev); int rc; - netdev_dbg(ndev, "%s: skb len %d\n", __func__, skb->len); - rc = ntb_transport_tx_enqueue(dev->qp, skb, skb->data, skb->len); if (rc) goto err; @@ -322,20 +316,26 @@ static const struct ntb_queue_handlers ntb_netdev_handlers = { .event_handler = ntb_netdev_event_handler, }; -static int ntb_netdev_probe(struct pci_dev *pdev) +static int ntb_netdev_probe(struct device *client_dev) { + struct ntb_dev *ntb; struct net_device *ndev; + struct pci_dev *pdev; struct ntb_netdev *dev; int rc; - ndev = alloc_etherdev(sizeof(struct ntb_netdev)); + ntb = dev_ntb(client_dev->parent); + pdev = ntb->pdev; + if (!pdev) + return -ENODEV; + + ndev = alloc_etherdev(sizeof(*dev)); if (!ndev) return -ENOMEM; dev = netdev_priv(ndev); dev->ndev = ndev; dev->pdev = pdev; - BUG_ON(!dev->pdev); ndev->features = NETIF_F_HIGHDMA; ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; @@ -349,7 +349,8 @@ static int ntb_netdev_probe(struct pci_dev *pdev) ndev->netdev_ops = &ntb_netdev_ops; ndev->ethtool_ops = &ntb_ethtool_ops; - dev->qp = ntb_transport_create_queue(ndev, pdev, &ntb_netdev_handlers); + dev->qp = ntb_transport_create_queue(ndev, client_dev, + &ntb_netdev_handlers); if (!dev->qp) { rc = -EIO; goto err; @@ -372,12 +373,17 @@ err: return rc; } -static void ntb_netdev_remove(struct pci_dev *pdev) +static void ntb_netdev_remove(struct device *client_dev) { + struct ntb_dev *ntb; struct net_device *ndev; + struct pci_dev *pdev; struct ntb_netdev *dev; bool found = false; + ntb = dev_ntb(client_dev->parent); + pdev = ntb->pdev; + list_for_each_entry(dev, &dev_list, list) { if (dev->pdev == pdev) { found = true; @@ -396,7 +402,7 @@ static void ntb_netdev_remove(struct pci_dev *pdev) free_netdev(ndev); } -static struct ntb_client ntb_netdev_client = { +static struct ntb_transport_client ntb_netdev_client = { .driver.name = KBUILD_MODNAME, .driver.owner = THIS_MODULE, .probe = ntb_netdev_probe, @@ -407,16 +413,16 @@ static int __init ntb_netdev_init_module(void) { int rc; - rc = ntb_register_client_dev(KBUILD_MODNAME); + rc = ntb_transport_register_client_dev(KBUILD_MODNAME); if (rc) return rc; - return ntb_register_client(&ntb_netdev_client); + return ntb_transport_register_client(&ntb_netdev_client); } module_init(ntb_netdev_init_module); static void __exit ntb_netdev_exit_module(void) { - ntb_unregister_client(&ntb_netdev_client); - ntb_unregister_client_dev(KBUILD_MODNAME); + ntb_transport_unregister_client(&ntb_netdev_client); + ntb_transport_unregister_client_dev(KBUILD_MODNAME); } module_exit(ntb_netdev_exit_module); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 1a1c4f7b3ec5..06a039414628 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -111,6 +111,7 @@ do { \ #define TUN_FASYNC IFF_ATTACH_QUEUE /* High bits in flags field are unused. */ #define TUN_VNET_LE 0x80000000 +#define TUN_VNET_BE 0x40000000 #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \ IFF_MULTI_QUEUE) @@ -205,14 +206,68 @@ struct tun_struct { u32 flow_count; }; +#ifdef CONFIG_TUN_VNET_CROSS_LE +static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) +{ + return tun->flags & TUN_VNET_BE ? false : + virtio_legacy_is_little_endian(); +} + +static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) +{ + int be = !!(tun->flags & TUN_VNET_BE); + + if (put_user(be, argp)) + return -EFAULT; + + return 0; +} + +static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) +{ + int be; + + if (get_user(be, argp)) + return -EFAULT; + + if (be) + tun->flags |= TUN_VNET_BE; + else + tun->flags &= ~TUN_VNET_BE; + + return 0; +} +#else +static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) +{ + return virtio_legacy_is_little_endian(); +} + +static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) +{ + return -EINVAL; +} + +static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) +{ + return -EINVAL; +} +#endif /* CONFIG_TUN_VNET_CROSS_LE */ + +static inline bool tun_is_little_endian(struct tun_struct *tun) +{ + return tun->flags & TUN_VNET_LE || + tun_legacy_is_little_endian(tun); +} + static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val) { - return __virtio16_to_cpu(tun->flags & TUN_VNET_LE, val); + return __virtio16_to_cpu(tun_is_little_endian(tun), val); } static inline __virtio16 cpu_to_tun16(struct tun_struct *tun, u16 val) { - return __cpu_to_virtio16(tun->flags & TUN_VNET_LE, val); + return __cpu_to_virtio16(tun_is_little_endian(tun), val); } static inline u32 tun_hashfn(u32 rxhash) @@ -2044,6 +2099,14 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, tun->flags &= ~TUN_VNET_LE; break; + case TUNGETVNETBE: + ret = tun_get_vnet_be(tun, argp); + break; + + case TUNSETVNETBE: + ret = tun_set_vnet_be(tun, argp); + break; + case TUNATTACHFILTER: /* Can be set only for TAPs */ ret = -EINVAL; diff --git a/drivers/ntb/Kconfig b/drivers/ntb/Kconfig index f69df793dbe2..95944e52fa36 100644 --- a/drivers/ntb/Kconfig +++ b/drivers/ntb/Kconfig @@ -1,13 +1,28 @@ -config NTB - tristate "Intel Non-Transparent Bridge support" - depends on PCI - depends on X86 - help - The PCI-E Non-transparent bridge hardware is a point-to-point PCI-E bus - connecting 2 systems. When configured, writes to the device's PCI - mapped memory will be mirrored to a buffer on the remote system. The - ntb Linux driver uses this point-to-point communication as a method to - transfer data from one system to the other. - - If unsure, say N. +menuconfig NTB + tristate "Non-Transparent Bridge support" + depends on PCI + help + The PCI-E Non-transparent bridge hardware is a point-to-point PCI-E bus + connecting 2 systems. When configured, writes to the device's PCI + mapped memory will be mirrored to a buffer on the remote system. The + ntb Linux driver uses this point-to-point communication as a method to + transfer data from one system to the other. + If unsure, say N. + +if NTB + +source "drivers/ntb/hw/Kconfig" + +source "drivers/ntb/test/Kconfig" + +config NTB_TRANSPORT + tristate "NTB Transport Client" + help + This is a transport driver that enables connected systems to exchange + messages over the ntb hardware. The transport exposes a queue pair api + to client drivers. + + If unsure, say N. + +endif # NTB diff --git a/drivers/ntb/Makefile b/drivers/ntb/Makefile index 15cb59fd354e..1921dec1949d 100644 --- a/drivers/ntb/Makefile +++ b/drivers/ntb/Makefile @@ -1,3 +1,2 @@ -obj-$(CONFIG_NTB) += ntb.o - -ntb-objs := ntb_hw.o ntb_transport.o +obj-$(CONFIG_NTB) += ntb.o hw/ test/ +obj-$(CONFIG_NTB_TRANSPORT) += ntb_transport.o diff --git a/drivers/ntb/hw/Kconfig b/drivers/ntb/hw/Kconfig new file mode 100644 index 000000000000..4d5535c4cddf --- /dev/null +++ b/drivers/ntb/hw/Kconfig @@ -0,0 +1 @@ +source "drivers/ntb/hw/intel/Kconfig" diff --git a/drivers/ntb/hw/Makefile b/drivers/ntb/hw/Makefile new file mode 100644 index 000000000000..175d7c92a569 --- /dev/null +++ b/drivers/ntb/hw/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_NTB_INTEL) += intel/ diff --git a/drivers/ntb/hw/intel/Kconfig b/drivers/ntb/hw/intel/Kconfig new file mode 100644 index 000000000000..91f995e33ac6 --- /dev/null +++ b/drivers/ntb/hw/intel/Kconfig @@ -0,0 +1,7 @@ +config NTB_INTEL + tristate "Intel Non-Transparent Bridge support" + depends on X86_64 + help + This driver supports Intel NTB on capable Xeon and Atom hardware. + + If unsure, say N. diff --git a/drivers/ntb/hw/intel/Makefile b/drivers/ntb/hw/intel/Makefile new file mode 100644 index 000000000000..1b434568d2ad --- /dev/null +++ b/drivers/ntb/hw/intel/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_NTB_INTEL) += ntb_hw_intel.o diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c new file mode 100644 index 000000000000..87751cfd6f4f --- /dev/null +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -0,0 +1,2274 @@ +/* + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2012 Intel Corporation. All rights reserved. + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * BSD LICENSE + * + * Copyright(c) 2012 Intel Corporation. All rights reserved. + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copy + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Intel PCIe NTB Linux driver + * + * Contact Information: + * Jon Mason <jon.mason@intel.com> + */ + +#include <linux/debugfs.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/random.h> +#include <linux/slab.h> +#include <linux/ntb.h> + +#include "ntb_hw_intel.h" + +#define NTB_NAME "ntb_hw_intel" +#define NTB_DESC "Intel(R) PCI-E Non-Transparent Bridge Driver" +#define NTB_VER "2.0" + +MODULE_DESCRIPTION(NTB_DESC); +MODULE_VERSION(NTB_VER); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Intel Corporation"); + +#define bar0_off(base, bar) ((base) + ((bar) << 2)) +#define bar2_off(base, bar) bar0_off(base, (bar) - 2) + +static const struct intel_ntb_reg atom_reg; +static const struct intel_ntb_alt_reg atom_pri_reg; +static const struct intel_ntb_alt_reg atom_sec_reg; +static const struct intel_ntb_alt_reg atom_b2b_reg; +static const struct intel_ntb_xlat_reg atom_pri_xlat; +static const struct intel_ntb_xlat_reg atom_sec_xlat; +static const struct intel_ntb_reg xeon_reg; +static const struct intel_ntb_alt_reg xeon_pri_reg; +static const struct intel_ntb_alt_reg xeon_sec_reg; +static const struct intel_ntb_alt_reg xeon_b2b_reg; +static const struct intel_ntb_xlat_reg xeon_pri_xlat; +static const struct intel_ntb_xlat_reg xeon_sec_xlat; +static struct intel_b2b_addr xeon_b2b_usd_addr; +static struct intel_b2b_addr xeon_b2b_dsd_addr; +static const struct ntb_dev_ops intel_ntb_ops; + +static const struct file_operations intel_ntb_debugfs_info; +static struct dentry *debugfs_dir; + +static int b2b_mw_idx = -1; +module_param(b2b_mw_idx, int, 0644); +MODULE_PARM_DESC(b2b_mw_idx, "Use this mw idx to access the peer ntb. A " + "value of zero or positive starts from first mw idx, and a " + "negative value starts from last mw idx. Both sides MUST " + "set the same value here!"); + +static unsigned int b2b_mw_share; +module_param(b2b_mw_share, uint, 0644); +MODULE_PARM_DESC(b2b_mw_share, "If the b2b mw is large enough, configure the " + "ntb so that the peer ntb only occupies the first half of " + "the mw, so the second half can still be used as a mw. Both " + "sides MUST set the same value here!"); + +module_param_named(xeon_b2b_usd_bar2_addr64, + xeon_b2b_usd_addr.bar2_addr64, ullong, 0644); +MODULE_PARM_DESC(xeon_b2b_usd_bar2_addr64, + "XEON B2B USD BAR 2 64-bit address"); + +module_param_named(xeon_b2b_usd_bar4_addr64, + xeon_b2b_usd_addr.bar4_addr64, ullong, 0644); +MODULE_PARM_DESC(xeon_b2b_usd_bar2_addr64, + "XEON B2B USD BAR 4 64-bit address"); + +module_param_named(xeon_b2b_usd_bar4_addr32, + xeon_b2b_usd_addr.bar4_addr32, ullong, 0644); +MODULE_PARM_DESC(xeon_b2b_usd_bar2_addr64, + "XEON B2B USD split-BAR 4 32-bit address"); + +module_param_named(xeon_b2b_usd_bar5_addr32, + xeon_b2b_usd_addr.bar5_addr32, ullong, 0644); +MODULE_PARM_DESC(xeon_b2b_usd_bar2_addr64, + "XEON B2B USD split-BAR 5 32-bit address"); + +module_param_named(xeon_b2b_dsd_bar2_addr64, + xeon_b2b_dsd_addr.bar2_addr64, ullong, 0644); +MODULE_PARM_DESC(xeon_b2b_dsd_bar2_addr64, + "XEON B2B DSD BAR 2 64-bit address"); + +module_param_named(xeon_b2b_dsd_bar4_addr64, + xeon_b2b_dsd_addr.bar4_addr64, ullong, 0644); +MODULE_PARM_DESC(xeon_b2b_dsd_bar2_addr64, + "XEON B2B DSD BAR 4 64-bit address"); + +module_param_named(xeon_b2b_dsd_bar4_addr32, + xeon_b2b_dsd_addr.bar4_addr32, ullong, 0644); +MODULE_PARM_DESC(xeon_b2b_dsd_bar2_addr64, + "XEON B2B DSD split-BAR 4 32-bit address"); + +module_param_named(xeon_b2b_dsd_bar5_addr32, + xeon_b2b_dsd_addr.bar5_addr32, ullong, 0644); +MODULE_PARM_DESC(xeon_b2b_dsd_bar2_addr64, + "XEON B2B DSD split-BAR 5 32-bit address"); + +#ifndef ioread64 +#ifdef readq +#define ioread64 readq +#else +#define ioread64 _ioread64 +static inline u64 _ioread64(void __iomem *mmio) +{ + u64 low, high; + + low = ioread32(mmio); + high = ioread32(mmio + sizeof(u32)); + return low | (high << 32); +} +#endif +#endif + +#ifndef iowrite64 +#ifdef writeq +#define iowrite64 writeq +#else +#define iowrite64 _iowrite64 +static inline void _iowrite64(u64 val, void __iomem *mmio) +{ + iowrite32(val, mmio); + iowrite32(val >> 32, mmio + sizeof(u32)); +} +#endif +#endif + +static inline int pdev_is_atom(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_NTB_B2B_BWD: + return 1; + } + return 0; +} + +static inline int pdev_is_xeon(struct pci_dev *pdev) +{ + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_NTB_SS_JSF: + case PCI_DEVICE_ID_INTEL_NTB_SS_SNB: + case PCI_DEVICE_ID_INTEL_NTB_SS_IVT: + case PCI_DEVICE_ID_INTEL_NTB_SS_HSX: + case PCI_DEVICE_ID_INTEL_NTB_PS_JSF: + case PCI_DEVICE_ID_INTEL_NTB_PS_SNB: + case PCI_DEVICE_ID_INTEL_NTB_PS_IVT: + case PCI_DEVICE_ID_INTEL_NTB_PS_HSX: + case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF: + case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB: + case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT: + case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX: + return 1; + } + return 0; +} + +static inline void ndev_reset_unsafe_flags(struct intel_ntb_dev *ndev) +{ + ndev->unsafe_flags = 0; + ndev->unsafe_flags_ignore = 0; + + /* Only B2B has a workaround to avoid SDOORBELL */ + if (ndev->hwerr_flags & NTB_HWERR_SDOORBELL_LOCKUP) + if (!ntb_topo_is_b2b(ndev->ntb.topo)) + ndev->unsafe_flags |= NTB_UNSAFE_DB; + + /* No low level workaround to avoid SB01BASE */ + if (ndev->hwerr_flags & NTB_HWERR_SB01BASE_LOCKUP) { + ndev->unsafe_flags |= NTB_UNSAFE_DB; + ndev->unsafe_flags |= NTB_UNSAFE_SPAD; + } +} + +static inline int ndev_is_unsafe(struct intel_ntb_dev *ndev, + unsigned long flag) +{ + return !!(flag & ndev->unsafe_flags & ~ndev->unsafe_flags_ignore); +} + +static inline int ndev_ignore_unsafe(struct intel_ntb_dev *ndev, + unsigned long flag) +{ + flag &= ndev->unsafe_flags; + ndev->unsafe_flags_ignore |= flag; + + return !!flag; +} + +static int ndev_mw_to_bar(struct intel_ntb_dev *ndev, int idx) +{ + if (idx < 0 || idx > ndev->mw_count) + return -EINVAL; + return ndev->reg->mw_bar[idx]; +} + +static inline int ndev_db_addr(struct intel_ntb_dev *ndev, + phys_addr_t *db_addr, resource_size_t *db_size, + phys_addr_t reg_addr, unsigned long reg) +{ + if (ndev_is_unsafe(ndev, NTB_UNSAFE_DB)) + pr_warn_once("%s: NTB unsafe doorbell access", __func__); + + if (db_addr) { + *db_addr = reg_addr + reg; + dev_dbg(ndev_dev(ndev), "Peer db addr %llx\n", *db_addr); + } + + if (db_size) { + *db_size = ndev->reg->db_size; + dev_dbg(ndev_dev(ndev), "Peer db size %llx\n", *db_size); + } + + return 0; +} + +static inline u64 ndev_db_read(struct intel_ntb_dev *ndev, + void __iomem *mmio) +{ + if (ndev_is_unsafe(ndev, NTB_UNSAFE_DB)) + pr_warn_once("%s: NTB unsafe doorbell access", __func__); + + return ndev->reg->db_ioread(mmio); +} + +static inline int ndev_db_write(struct intel_ntb_dev *ndev, u64 db_bits, + void __iomem *mmio) +{ + if (ndev_is_unsafe(ndev, NTB_UNSAFE_DB)) + pr_warn_once("%s: NTB unsafe doorbell access", __func__); + + if (db_bits & ~ndev->db_valid_mask) + return -EINVAL; + + ndev->reg->db_iowrite(db_bits, mmio); + + return 0; +} + +static inline int ndev_db_set_mask(struct intel_ntb_dev *ndev, u64 db_bits, + void __iomem *mmio) +{ + unsigned long irqflags; + + if (ndev_is_unsafe(ndev, NTB_UNSAFE_DB)) + pr_warn_once("%s: NTB unsafe doorbell access", __func__); + + if (db_bits & ~ndev->db_valid_mask) + return -EINVAL; + + spin_lock_irqsave(&ndev->db_mask_lock, irqflags); + { + ndev->db_mask |= db_bits; + ndev->reg->db_iowrite(ndev->db_mask, mmio); + } + spin_unlock_irqrestore(&ndev->db_mask_lock, irqflags); + + return 0; +} + +static inline int ndev_db_clear_mask(struct intel_ntb_dev *ndev, u64 db_bits, + void __iomem *mmio) +{ + unsigned long irqflags; + + if (ndev_is_unsafe(ndev, NTB_UNSAFE_DB)) + pr_warn_once("%s: NTB unsafe doorbell access", __func__); + + if (db_bits & ~ndev->db_valid_mask) + return -EINVAL; + + spin_lock_irqsave(&ndev->db_mask_lock, irqflags); + { + ndev->db_mask &= ~db_bits; + ndev->reg->db_iowrite(ndev->db_mask, mmio); + } + spin_unlock_irqrestore(&ndev->db_mask_lock, irqflags); + + return 0; +} + +static inline int ndev_vec_mask(struct intel_ntb_dev *ndev, int db_vector) +{ + u64 shift, mask; + + shift = ndev->db_vec_shift; + mask = BIT_ULL(shift) - 1; + + return mask << (shift * db_vector); +} + +static inline int ndev_spad_addr(struct intel_ntb_dev *ndev, int idx, + phys_addr_t *spad_addr, phys_addr_t reg_addr, + unsigned long reg) +{ + if (ndev_is_unsafe(ndev, NTB_UNSAFE_SPAD)) + pr_warn_once("%s: NTB unsafe scratchpad access", __func__); + + if (idx < 0 || idx >= ndev->spad_count) + return -EINVAL; + + if (spad_addr) { + *spad_addr = reg_addr + reg + (idx << 2); + dev_dbg(ndev_dev(ndev), "Peer spad addr %llx\n", *spad_addr); + } + + return 0; +} + +static inline u32 ndev_spad_read(struct intel_ntb_dev *ndev, int idx, + void __iomem *mmio) +{ + if (ndev_is_unsafe(ndev, NTB_UNSAFE_SPAD)) + pr_warn_once("%s: NTB unsafe scratchpad access", __func__); + + if (idx < 0 || idx >= ndev->spad_count) + return 0; + + return ioread32(mmio + (idx << 2)); +} + +static inline int ndev_spad_write(struct intel_ntb_dev *ndev, int idx, u32 val, + void __iomem *mmio) +{ + if (ndev_is_unsafe(ndev, NTB_UNSAFE_SPAD)) + pr_warn_once("%s: NTB unsafe scratchpad access", __func__); + + if (idx < 0 || idx >= ndev->spad_count) + return -EINVAL; + + iowrite32(val, mmio + (idx << 2)); + + return 0; +} + +static irqreturn_t ndev_interrupt(struct intel_ntb_dev *ndev, int vec) +{ + u64 vec_mask; + + vec_mask = ndev_vec_mask(ndev, vec); + + dev_dbg(ndev_dev(ndev), "vec %d vec_mask %llx\n", vec, vec_mask); + + ndev->last_ts = jiffies; + + if (vec_mask & ndev->db_link_mask) { + if (ndev->reg->poll_link(ndev)) + ntb_link_event(&ndev->ntb); + } + + if (vec_mask & ndev->db_valid_mask) + ntb_db_event(&ndev->ntb, vec); + + return IRQ_HANDLED; +} + +static irqreturn_t ndev_vec_isr(int irq, void *dev) +{ + struct intel_ntb_vec *nvec = dev; + + return ndev_interrupt(nvec->ndev, nvec->num); +} + +static irqreturn_t ndev_irq_isr(int irq, void *dev) +{ + struct intel_ntb_dev *ndev = dev; + + return ndev_interrupt(ndev, irq - ndev_pdev(ndev)->irq); +} + +static int ndev_init_isr(struct intel_ntb_dev *ndev, + int msix_min, int msix_max, + int msix_shift, int total_shift) +{ + struct pci_dev *pdev; + int rc, i, msix_count, node; + + pdev = ndev_pdev(ndev); + + node = dev_to_node(&pdev->dev); + + /* Mask all doorbell interrupts */ + ndev->db_mask = ndev->db_valid_mask; + ndev->reg->db_iowrite(ndev->db_mask, + ndev->self_mmio + + ndev->self_reg->db_mask); + + /* Try to set up msix irq */ + + ndev->vec = kzalloc_node(msix_max * sizeof(*ndev->vec), + GFP_KERNEL, node); + if (!ndev->vec) + goto err_msix_vec_alloc; + + ndev->msix = kzalloc_node(msix_max * sizeof(*ndev->msix), + GFP_KERNEL, node); + if (!ndev->msix) + goto err_msix_alloc; + + for (i = 0; i < msix_max; ++i) + ndev->msix[i].entry = i; + + msix_count = pci_enable_msix_range(pdev, ndev->msix, + msix_min, msix_max); + if (msix_count < 0) + goto err_msix_enable; + + for (i = 0; i < msix_count; ++i) { + ndev->vec[i].ndev = ndev; + ndev->vec[i].num = i; + rc = request_irq(ndev->msix[i].vector, ndev_vec_isr, 0, + "ndev_vec_isr", &ndev->vec[i]); + if (rc) + goto err_msix_request; + } + + dev_dbg(ndev_dev(ndev), "Using msix interrupts\n"); + ndev->db_vec_count = msix_count; + ndev->db_vec_shift = msix_shift; + return 0; + +err_msix_request: + while (i-- > 0) + free_irq(ndev->msix[i].vector, ndev); + pci_disable_msix(pdev); +err_msix_enable: + kfree(ndev->msix); +err_msix_alloc: + kfree(ndev->vec); +err_msix_vec_alloc: + ndev->msix = NULL; + ndev->vec = NULL; + + /* Try to set up msi irq */ + + rc = pci_enable_msi(pdev); + if (rc) + goto err_msi_enable; + + rc = request_irq(pdev->irq, ndev_irq_isr, 0, + "ndev_irq_isr", ndev); + if (rc) + goto err_msi_request; + + dev_dbg(ndev_dev(ndev), "Using msi interrupts\n"); + ndev->db_vec_count = 1; + ndev->db_vec_shift = total_shift; + return 0; + +err_msi_request: + pci_disable_msi(pdev); +err_msi_enable: + + /* Try to set up intx irq */ + + pci_intx(pdev, 1); + + rc = request_irq(pdev->irq, ndev_irq_isr, IRQF_SHARED, + "ndev_irq_isr", ndev); + if (rc) + goto err_intx_request; + + dev_dbg(ndev_dev(ndev), "Using intx interrupts\n"); + ndev->db_vec_count = 1; + ndev->db_vec_shift = total_shift; + return 0; + +err_intx_request: + return rc; +} + +static void ndev_deinit_isr(struct intel_ntb_dev *ndev) +{ + struct pci_dev *pdev; + int i; + + pdev = ndev_pdev(ndev); + + /* Mask all doorbell interrupts */ + ndev->db_mask = ndev->db_valid_mask; + ndev->reg->db_iowrite(ndev->db_mask, + ndev->self_mmio + + ndev->self_reg->db_mask); + + if (ndev->msix) { + i = ndev->db_vec_count; + while (i--) + free_irq(ndev->msix[i].vector, &ndev->vec[i]); + pci_disable_msix(pdev); + kfree(ndev->msix); + kfree(ndev->vec); + } else { + free_irq(pdev->irq, ndev); + if (pci_dev_msi_enabled(pdev)) + pci_disable_msi(pdev); + } +} + +static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *offp) +{ + struct intel_ntb_dev *ndev; + void __iomem *mmio; + char *buf; + size_t buf_size; + ssize_t ret, off; + union { u64 v64; u32 v32; u16 v16; } u; + + ndev = filp->private_data; + mmio = ndev->self_mmio; + + buf_size = min(count, 0x800ul); + + buf = kmalloc(buf_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + off = 0; + + off += scnprintf(buf + off, buf_size - off, + "NTB Device Information:\n"); + + off += scnprintf(buf + off, buf_size - off, + "Connection Topology -\t%s\n", + ntb_topo_string(ndev->ntb.topo)); + + off += scnprintf(buf + off, buf_size - off, + "B2B Offset -\t\t%#lx\n", ndev->b2b_off); + off += scnprintf(buf + off, buf_size - off, + "B2B MW Idx -\t\t%d\n", ndev->b2b_idx); + off += scnprintf(buf + off, buf_size - off, + "BAR4 Split -\t\t%s\n", + ndev->bar4_split ? "yes" : "no"); + + off += scnprintf(buf + off, buf_size - off, + "NTB CTL -\t\t%#06x\n", ndev->ntb_ctl); + off += scnprintf(buf + off, buf_size - off, + "LNK STA -\t\t%#06x\n", ndev->lnk_sta); + + if (!ndev->reg->link_is_up(ndev)) { + off += scnprintf(buf + off, buf_size - off, + "Link Status -\t\tDown\n"); + } else { + off += scnprintf(buf + off, buf_size - off, + "Link Status -\t\tUp\n"); + off += scnprintf(buf + off, buf_size - off, + "Link Speed -\t\tPCI-E Gen %u\n", + NTB_LNK_STA_SPEED(ndev->lnk_sta)); + off += scnprintf(buf + off, buf_size - off, + "Link Width -\t\tx%u\n", + NTB_LNK_STA_WIDTH(ndev->lnk_sta)); + } + + off += scnprintf(buf + off, buf_size - off, + "Memory Window Count -\t%u\n", ndev->mw_count); + off += scnprintf(buf + off, buf_size - off, + "Scratchpad Count -\t%u\n", ndev->spad_count); + off += scnprintf(buf + off, buf_size - off, + "Doorbell Count -\t%u\n", ndev->db_count); + off += scnprintf(buf + off, buf_size - off, + "Doorbell Vector Count -\t%u\n", ndev->db_vec_count); + off += scnprintf(buf + off, buf_size - off, + "Doorbell Vector Shift -\t%u\n", ndev->db_vec_shift); + + off += scnprintf(buf + off, buf_size - off, + "Doorbell Valid Mask -\t%#llx\n", ndev->db_valid_mask); + off += scnprintf(buf + off, buf_size - off, + "Doorbell Link Mask -\t%#llx\n", ndev->db_link_mask); + off += scnprintf(buf + off, buf_size - off, + "Doorbell Mask Cached -\t%#llx\n", ndev->db_mask); + + u.v64 = ndev_db_read(ndev, mmio + ndev->self_reg->db_mask); + off += scnprintf(buf + off, buf_size - off, + "Doorbell Mask -\t\t%#llx\n", u.v64); + + u.v64 = ndev_db_read(ndev, mmio + ndev->self_reg->db_bell); + off += scnprintf(buf + off, buf_size - off, + "Doorbell Bell -\t\t%#llx\n", u.v64); + + off += scnprintf(buf + off, buf_size - off, + "\nNTB Incoming XLAT:\n"); + + u.v64 = ioread64(mmio + bar2_off(ndev->xlat_reg->bar2_xlat, 2)); + off += scnprintf(buf + off, buf_size - off, + "XLAT23 -\t\t%#018llx\n", u.v64); + + if (ndev->bar4_split) { + u.v32 = ioread32(mmio + bar2_off(ndev->xlat_reg->bar2_xlat, 4)); + off += scnprintf(buf + off, buf_size - off, + "XLAT4 -\t\t\t%#06x\n", u.v32); + + u.v32 = ioread32(mmio + bar2_off(ndev->xlat_reg->bar2_xlat, 5)); + off += scnprintf(buf + off, buf_size - off, + "XLAT5 -\t\t\t%#06x\n", u.v32); + } else { + u.v64 = ioread64(mmio + bar2_off(ndev->xlat_reg->bar2_xlat, 4)); + off += scnprintf(buf + off, buf_size - off, + "XLAT45 -\t\t%#018llx\n", u.v64); + } + + u.v64 = ioread64(mmio + bar2_off(ndev->xlat_reg->bar2_limit, 2)); + off += scnprintf(buf + off, buf_size - off, + "LMT23 -\t\t\t%#018llx\n", u.v64); + + if (ndev->bar4_split) { + u.v32 = ioread32(mmio + bar2_off(ndev->xlat_reg->bar2_limit, 4)); + off += scnprintf(buf + off, buf_size - off, + "LMT4 -\t\t\t%#06x\n", u.v32); + u.v32 = ioread32(mmio + bar2_off(ndev->xlat_reg->bar2_limit, 5)); + off += scnprintf(buf + off, buf_size - off, + "LMT5 -\t\t\t%#06x\n", u.v32); + } else { + u.v64 = ioread64(mmio + bar2_off(ndev->xlat_reg->bar2_limit, 4)); + off += scnprintf(buf + off, buf_size - off, + "LMT45 -\t\t\t%#018llx\n", u.v64); + } + + if (pdev_is_xeon(ndev->ntb.pdev)) { + if (ntb_topo_is_b2b(ndev->ntb.topo)) { + off += scnprintf(buf + off, buf_size - off, + "\nNTB Outgoing B2B XLAT:\n"); + + u.v64 = ioread64(mmio + XEON_PBAR23XLAT_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "B2B XLAT23 -\t\t%#018llx\n", u.v64); + + if (ndev->bar4_split) { + u.v32 = ioread32(mmio + XEON_PBAR4XLAT_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "B2B XLAT4 -\t\t%#06x\n", + u.v32); + u.v32 = ioread32(mmio + XEON_PBAR5XLAT_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "B2B XLAT5 -\t\t%#06x\n", + u.v32); + } else { + u.v64 = ioread64(mmio + XEON_PBAR45XLAT_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "B2B XLAT45 -\t\t%#018llx\n", + u.v64); + } + + u.v64 = ioread64(mmio + XEON_PBAR23LMT_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "B2B LMT23 -\t\t%#018llx\n", u.v64); + + if (ndev->bar4_split) { + u.v32 = ioread32(mmio + XEON_PBAR4LMT_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "B2B LMT4 -\t\t%#06x\n", + u.v32); + u.v32 = ioread32(mmio + XEON_PBAR5LMT_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "B2B LMT5 -\t\t%#06x\n", + u.v32); + } else { + u.v64 = ioread64(mmio + XEON_PBAR45LMT_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "B2B LMT45 -\t\t%#018llx\n", + u.v64); + } + + off += scnprintf(buf + off, buf_size - off, + "\nNTB Secondary BAR:\n"); + + u.v64 = ioread64(mmio + XEON_SBAR0BASE_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "SBAR01 -\t\t%#018llx\n", u.v64); + + u.v64 = ioread64(mmio + XEON_SBAR23BASE_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "SBAR23 -\t\t%#018llx\n", u.v64); + + if (ndev->bar4_split) { + u.v32 = ioread32(mmio + XEON_SBAR4BASE_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "SBAR4 -\t\t\t%#06x\n", u.v32); + u.v32 = ioread32(mmio + XEON_SBAR5BASE_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "SBAR5 -\t\t\t%#06x\n", u.v32); + } else { + u.v64 = ioread64(mmio + XEON_SBAR45BASE_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "SBAR45 -\t\t%#018llx\n", + u.v64); + } + } + + off += scnprintf(buf + off, buf_size - off, + "\nXEON NTB Statistics:\n"); + + u.v16 = ioread16(mmio + XEON_USMEMMISS_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "Upstream Memory Miss -\t%u\n", u.v16); + + off += scnprintf(buf + off, buf_size - off, + "\nXEON NTB Hardware Errors:\n"); + + if (!pci_read_config_word(ndev->ntb.pdev, + XEON_DEVSTS_OFFSET, &u.v16)) + off += scnprintf(buf + off, buf_size - off, + "DEVSTS -\t\t%#06x\n", u.v16); + + if (!pci_read_config_word(ndev->ntb.pdev, + XEON_LINK_STATUS_OFFSET, &u.v16)) + off += scnprintf(buf + off, buf_size - off, + "LNKSTS -\t\t%#06x\n", u.v16); + + if (!pci_read_config_dword(ndev->ntb.pdev, + XEON_UNCERRSTS_OFFSET, &u.v32)) + off += scnprintf(buf + off, buf_size - off, + "UNCERRSTS -\t\t%#06x\n", u.v32); + + if (!pci_read_config_dword(ndev->ntb.pdev, + XEON_CORERRSTS_OFFSET, &u.v32)) + off += scnprintf(buf + off, buf_size - off, + "CORERRSTS -\t\t%#06x\n", u.v32); + } + + ret = simple_read_from_buffer(ubuf, count, offp, buf, off); + kfree(buf); + return ret; +} + +static void ndev_init_debugfs(struct intel_ntb_dev *ndev) +{ + if (!debugfs_dir) { + ndev->debugfs_dir = NULL; + ndev->debugfs_info = NULL; + } else { + ndev->debugfs_dir = + debugfs_create_dir(ndev_name(ndev), debugfs_dir); + if (!ndev->debugfs_dir) + ndev->debugfs_info = NULL; + else + ndev->debugfs_info = + debugfs_create_file("info", S_IRUSR, + ndev->debugfs_dir, ndev, + &intel_ntb_debugfs_info); + } +} + +static void ndev_deinit_debugfs(struct intel_ntb_dev *ndev) +{ + debugfs_remove_recursive(ndev->debugfs_dir); +} + +static int intel_ntb_mw_count(struct ntb_dev *ntb) +{ + return ntb_ndev(ntb)->mw_count; +} + +static int intel_ntb_mw_get_range(struct ntb_dev *ntb, int idx, + phys_addr_t *base, + resource_size_t *size, + resource_size_t *align, + resource_size_t *align_size) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + int bar; + + if (idx >= ndev->b2b_idx && !ndev->b2b_off) + idx += 1; + + bar = ndev_mw_to_bar(ndev, idx); + if (bar < 0) + return bar; + + if (base) + *base = pci_resource_start(ndev->ntb.pdev, bar) + + (idx == ndev->b2b_idx ? ndev->b2b_off : 0); + + if (size) + *size = pci_resource_len(ndev->ntb.pdev, bar) - + (idx == ndev->b2b_idx ? ndev->b2b_off : 0); + + if (align) + *align = pci_resource_len(ndev->ntb.pdev, bar); + + if (align_size) + *align_size = 1; + + return 0; +} + +static int intel_ntb_mw_set_trans(struct ntb_dev *ntb, int idx, + dma_addr_t addr, resource_size_t size) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + unsigned long base_reg, xlat_reg, limit_reg; + resource_size_t bar_size, mw_size; + void __iomem *mmio; + u64 base, limit, reg_val; + int bar; + + if (idx >= ndev->b2b_idx && !ndev->b2b_off) + idx += 1; + + bar = ndev_mw_to_bar(ndev, idx); + if (bar < 0) + return bar; + + bar_size = pci_resource_len(ndev->ntb.pdev, bar); + + if (idx == ndev->b2b_idx) + mw_size = bar_size - ndev->b2b_off; + else + mw_size = bar_size; + + /* hardware requires that addr is aligned to bar size */ + if (addr & (bar_size - 1)) + return -EINVAL; + + /* make sure the range fits in the usable mw size */ + if (size > mw_size) + return -EINVAL; + + mmio = ndev->self_mmio; + base_reg = bar0_off(ndev->xlat_reg->bar0_base, bar); + xlat_reg = bar2_off(ndev->xlat_reg->bar2_xlat, bar); + limit_reg = bar2_off(ndev->xlat_reg->bar2_limit, bar); + + if (bar < 4 || !ndev->bar4_split) { + base = ioread64(mmio + base_reg); + + /* Set the limit if supported, if size is not mw_size */ + if (limit_reg && size != mw_size) + limit = base + size; + else + limit = 0; + + /* set and verify setting the translation address */ + iowrite64(addr, mmio + xlat_reg); + reg_val = ioread64(mmio + xlat_reg); + if (reg_val != addr) { + iowrite64(0, mmio + xlat_reg); + return -EIO; + } + + /* set and verify setting the limit */ + iowrite64(limit, mmio + limit_reg); + reg_val = ioread64(mmio + limit_reg); + if (reg_val != limit) { + iowrite64(base, mmio + limit_reg); + iowrite64(0, mmio + xlat_reg); + return -EIO; + } + } else { + /* split bar addr range must all be 32 bit */ + if (addr & (~0ull << 32)) + return -EINVAL; + if ((addr + size) & (~0ull << 32)) + return -EINVAL; + + base = ioread32(mmio + base_reg); + + /* Set the limit if supported, if size is not mw_size */ + if (limit_reg && size != mw_size) + limit = base + size; + else + limit = 0; + + /* set and verify setting the translation address */ + iowrite32(addr, mmio + xlat_reg); + reg_val = ioread32(mmio + xlat_reg); + if (reg_val != addr) { + iowrite32(0, mmio + xlat_reg); + return -EIO; + } + + /* set and verify setting the limit */ + iowrite32(limit, mmio + limit_reg); + reg_val = ioread32(mmio + limit_reg); + if (reg_val != limit) { + iowrite32(base, mmio + limit_reg); + iowrite32(0, mmio + xlat_reg); + return -EIO; + } + } + + return 0; +} + +static int intel_ntb_link_is_up(struct ntb_dev *ntb, + enum ntb_speed *speed, + enum ntb_width *width) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + + if (ndev->reg->link_is_up(ndev)) { + if (speed) + *speed = NTB_LNK_STA_SPEED(ndev->lnk_sta); + if (width) + *width = NTB_LNK_STA_WIDTH(ndev->lnk_sta); + return 1; + } else { + /* TODO MAYBE: is it possible to observe the link speed and + * width while link is training? */ + if (speed) + *speed = NTB_SPEED_NONE; + if (width) + *width = NTB_WIDTH_NONE; + return 0; + } +} + +static int intel_ntb_link_enable(struct ntb_dev *ntb, + enum ntb_speed max_speed, + enum ntb_width max_width) +{ + struct intel_ntb_dev *ndev; + u32 ntb_ctl; + + ndev = container_of(ntb, struct intel_ntb_dev, ntb); + + if (ndev->ntb.topo == NTB_TOPO_SEC) + return -EINVAL; + + dev_dbg(ndev_dev(ndev), + "Enabling link with max_speed %d max_width %d\n", + max_speed, max_width); + if (max_speed != NTB_SPEED_AUTO) + dev_dbg(ndev_dev(ndev), "ignoring max_speed %d\n", max_speed); + if (max_width != NTB_WIDTH_AUTO) + dev_dbg(ndev_dev(ndev), "ignoring max_width %d\n", max_width); + + ntb_ctl = ioread32(ndev->self_mmio + ndev->reg->ntb_ctl); + ntb_ctl &= ~(NTB_CTL_DISABLE | NTB_CTL_CFG_LOCK); + ntb_ctl |= NTB_CTL_P2S_BAR2_SNOOP | NTB_CTL_S2P_BAR2_SNOOP; + ntb_ctl |= NTB_CTL_P2S_BAR4_SNOOP | NTB_CTL_S2P_BAR4_SNOOP; + if (ndev->bar4_split) + ntb_ctl |= NTB_CTL_P2S_BAR5_SNOOP | NTB_CTL_S2P_BAR5_SNOOP; + iowrite32(ntb_ctl, ndev->self_mmio + ndev->reg->ntb_ctl); + + return 0; +} + +static int intel_ntb_link_disable(struct ntb_dev *ntb) +{ + struct intel_ntb_dev *ndev; + u32 ntb_cntl; + + ndev = container_of(ntb, struct intel_ntb_dev, ntb); + + if (ndev->ntb.topo == NTB_TOPO_SEC) + return -EINVAL; + + dev_dbg(ndev_dev(ndev), "Disabling link\n"); + + /* Bring NTB link down */ + ntb_cntl = ioread32(ndev->self_mmio + ndev->reg->ntb_ctl); + ntb_cntl &= ~(NTB_CTL_P2S_BAR2_SNOOP | NTB_CTL_S2P_BAR2_SNOOP); + ntb_cntl &= ~(NTB_CTL_P2S_BAR4_SNOOP | NTB_CTL_S2P_BAR4_SNOOP); + if (ndev->bar4_split) + ntb_cntl &= ~(NTB_CTL_P2S_BAR5_SNOOP | NTB_CTL_S2P_BAR5_SNOOP); + ntb_cntl |= NTB_CTL_DISABLE | NTB_CTL_CFG_LOCK; + iowrite32(ntb_cntl, ndev->self_mmio + ndev->reg->ntb_ctl); + + return 0; +} + +static int intel_ntb_db_is_unsafe(struct ntb_dev *ntb) +{ + return ndev_ignore_unsafe(ntb_ndev(ntb), NTB_UNSAFE_DB); +} + +static u64 intel_ntb_db_valid_mask(struct ntb_dev *ntb) +{ + return ntb_ndev(ntb)->db_valid_mask; +} + +static int intel_ntb_db_vector_count(struct ntb_dev *ntb) +{ + struct intel_ntb_dev *ndev; + + ndev = container_of(ntb, struct intel_ntb_dev, ntb); + + return ndev->db_vec_count; +} + +static u64 intel_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + + if (db_vector < 0 || db_vector > ndev->db_vec_count) + return 0; + + return ndev->db_valid_mask & ndev_vec_mask(ndev, db_vector); +} + +static u64 intel_ntb_db_read(struct ntb_dev *ntb) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + + return ndev_db_read(ndev, + ndev->self_mmio + + ndev->self_reg->db_bell); +} + +static int intel_ntb_db_clear(struct ntb_dev *ntb, u64 db_bits) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + + return ndev_db_write(ndev, db_bits, + ndev->self_mmio + + ndev->self_reg->db_bell); +} + +static int intel_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + + return ndev_db_set_mask(ndev, db_bits, + ndev->self_mmio + + ndev->self_reg->db_mask); +} + +static int intel_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + + return ndev_db_clear_mask(ndev, db_bits, + ndev->self_mmio + + ndev->self_reg->db_mask); +} + +static int intel_ntb_peer_db_addr(struct ntb_dev *ntb, + phys_addr_t *db_addr, + resource_size_t *db_size) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + + return ndev_db_addr(ndev, db_addr, db_size, ndev->peer_addr, + ndev->peer_reg->db_bell); +} + +static int intel_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + + return ndev_db_write(ndev, db_bits, + ndev->peer_mmio + + ndev->peer_reg->db_bell); +} + +static int intel_ntb_spad_is_unsafe(struct ntb_dev *ntb) +{ + return ndev_ignore_unsafe(ntb_ndev(ntb), NTB_UNSAFE_SPAD); +} + +static int intel_ntb_spad_count(struct ntb_dev *ntb) +{ + struct intel_ntb_dev *ndev; + + ndev = container_of(ntb, struct intel_ntb_dev, ntb); + + return ndev->spad_count; +} + +static u32 intel_ntb_spad_read(struct ntb_dev *ntb, int idx) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + + return ndev_spad_read(ndev, idx, + ndev->self_mmio + + ndev->self_reg->spad); +} + +static int intel_ntb_spad_write(struct ntb_dev *ntb, + int idx, u32 val) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + + return ndev_spad_write(ndev, idx, val, + ndev->self_mmio + + ndev->self_reg->spad); +} + +static int intel_ntb_peer_spad_addr(struct ntb_dev *ntb, int idx, + phys_addr_t *spad_addr) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + + return ndev_spad_addr(ndev, idx, spad_addr, ndev->peer_addr, + ndev->peer_reg->spad); +} + +static u32 intel_ntb_peer_spad_read(struct ntb_dev *ntb, int idx) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + + return ndev_spad_read(ndev, idx, + ndev->peer_mmio + + ndev->peer_reg->spad); +} + +static int intel_ntb_peer_spad_write(struct ntb_dev *ntb, + int idx, u32 val) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + + return ndev_spad_write(ndev, idx, val, + ndev->peer_mmio + + ndev->peer_reg->spad); +} + +/* ATOM */ + +static u64 atom_db_ioread(void __iomem *mmio) +{ + return ioread64(mmio); +} + +static void atom_db_iowrite(u64 bits, void __iomem *mmio) +{ + iowrite64(bits, mmio); +} + +static int atom_poll_link(struct intel_ntb_dev *ndev) +{ + u32 ntb_ctl; + + ntb_ctl = ioread32(ndev->self_mmio + ATOM_NTBCNTL_OFFSET); + + if (ntb_ctl == ndev->ntb_ctl) + return 0; + + ndev->ntb_ctl = ntb_ctl; + + ndev->lnk_sta = ioread32(ndev->self_mmio + ATOM_LINK_STATUS_OFFSET); + + return 1; +} + +static int atom_link_is_up(struct intel_ntb_dev *ndev) +{ + return ATOM_NTB_CTL_ACTIVE(ndev->ntb_ctl); +} + +static int atom_link_is_err(struct intel_ntb_dev *ndev) +{ + if (ioread32(ndev->self_mmio + ATOM_LTSSMSTATEJMP_OFFSET) + & ATOM_LTSSMSTATEJMP_FORCEDETECT) + return 1; + + if (ioread32(ndev->self_mmio + ATOM_IBSTERRRCRVSTS0_OFFSET) + & ATOM_IBIST_ERR_OFLOW) + return 1; + + return 0; +} + +static inline enum ntb_topo atom_ppd_topo(struct intel_ntb_dev *ndev, u32 ppd) +{ + switch (ppd & ATOM_PPD_TOPO_MASK) { + case ATOM_PPD_TOPO_B2B_USD: + dev_dbg(ndev_dev(ndev), "PPD %d B2B USD\n", ppd); + return NTB_TOPO_B2B_USD; + + case ATOM_PPD_TOPO_B2B_DSD: + dev_dbg(ndev_dev(ndev), "PPD %d B2B DSD\n", ppd); + return NTB_TOPO_B2B_DSD; + + case ATOM_PPD_TOPO_PRI_USD: + case ATOM_PPD_TOPO_PRI_DSD: /* accept bogus PRI_DSD */ + case ATOM_PPD_TOPO_SEC_USD: + case ATOM_PPD_TOPO_SEC_DSD: /* accept bogus SEC_DSD */ + dev_dbg(ndev_dev(ndev), "PPD %d non B2B disabled\n", ppd); + return NTB_TOPO_NONE; + } + + dev_dbg(ndev_dev(ndev), "PPD %d invalid\n", ppd); + return NTB_TOPO_NONE; +} + +static void atom_link_hb(struct work_struct *work) +{ + struct intel_ntb_dev *ndev = hb_ndev(work); + unsigned long poll_ts; + void __iomem *mmio; + u32 status32; + + poll_ts = ndev->last_ts + ATOM_LINK_HB_TIMEOUT; + + /* Delay polling the link status if an interrupt was received, + * unless the cached link status says the link is down. + */ + if (time_after(poll_ts, jiffies) && atom_link_is_up(ndev)) { + schedule_delayed_work(&ndev->hb_timer, poll_ts - jiffies); + return; + } + + if (atom_poll_link(ndev)) + ntb_link_event(&ndev->ntb); + + if (atom_link_is_up(ndev) || !atom_link_is_err(ndev)) { + schedule_delayed_work(&ndev->hb_timer, ATOM_LINK_HB_TIMEOUT); + return; + } + + /* Link is down with error: recover the link! */ + + mmio = ndev->self_mmio; + + /* Driver resets the NTB ModPhy lanes - magic! */ + iowrite8(0xe0, mmio + ATOM_MODPHY_PCSREG6); + iowrite8(0x40, mmio + ATOM_MODPHY_PCSREG4); + iowrite8(0x60, mmio + ATOM_MODPHY_PCSREG4); + iowrite8(0x60, mmio + ATOM_MODPHY_PCSREG6); + + /* Driver waits 100ms to allow the NTB ModPhy to settle */ + msleep(100); + + /* Clear AER Errors, write to clear */ + status32 = ioread32(mmio + ATOM_ERRCORSTS_OFFSET); + dev_dbg(ndev_dev(ndev), "ERRCORSTS = %x\n", status32); + status32 &= PCI_ERR_COR_REP_ROLL; + iowrite32(status32, mmio + ATOM_ERRCORSTS_OFFSET); + + /* Clear unexpected electrical idle event in LTSSM, write to clear */ + status32 = ioread32(mmio + ATOM_LTSSMERRSTS0_OFFSET); + dev_dbg(ndev_dev(ndev), "LTSSMERRSTS0 = %x\n", status32); + status32 |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI; + iowrite32(status32, mmio + ATOM_LTSSMERRSTS0_OFFSET); + + /* Clear DeSkew Buffer error, write to clear */ + status32 = ioread32(mmio + ATOM_DESKEWSTS_OFFSET); + dev_dbg(ndev_dev(ndev), "DESKEWSTS = %x\n", status32); + status32 |= ATOM_DESKEWSTS_DBERR; + iowrite32(status32, mmio + ATOM_DESKEWSTS_OFFSET); + + status32 = ioread32(mmio + ATOM_IBSTERRRCRVSTS0_OFFSET); + dev_dbg(ndev_dev(ndev), "IBSTERRRCRVSTS0 = %x\n", status32); + status32 &= ATOM_IBIST_ERR_OFLOW; + iowrite32(status32, mmio + ATOM_IBSTERRRCRVSTS0_OFFSET); + + /* Releases the NTB state machine to allow the link to retrain */ + status32 = ioread32(mmio + ATOM_LTSSMSTATEJMP_OFFSET); + dev_dbg(ndev_dev(ndev), "LTSSMSTATEJMP = %x\n", status32); + status32 &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT; + iowrite32(status32, mmio + ATOM_LTSSMSTATEJMP_OFFSET); + + /* There is a potential race between the 2 NTB devices recovering at the + * same time. If the times are the same, the link will not recover and + * the driver will be stuck in this loop forever. Add a random interval + * to the recovery time to prevent this race. + */ + schedule_delayed_work(&ndev->hb_timer, ATOM_LINK_RECOVERY_TIME + + prandom_u32() % ATOM_LINK_RECOVERY_TIME); +} + +static int atom_init_isr(struct intel_ntb_dev *ndev) +{ + int rc; + + rc = ndev_init_isr(ndev, 1, ATOM_DB_MSIX_VECTOR_COUNT, + ATOM_DB_MSIX_VECTOR_SHIFT, ATOM_DB_TOTAL_SHIFT); + if (rc) + return rc; + + /* ATOM doesn't have link status interrupt, poll on that platform */ + ndev->last_ts = jiffies; + INIT_DELAYED_WORK(&ndev->hb_timer, atom_link_hb); + schedule_delayed_work(&ndev->hb_timer, ATOM_LINK_HB_TIMEOUT); + + return 0; +} + +static void atom_deinit_isr(struct intel_ntb_dev *ndev) +{ + cancel_delayed_work_sync(&ndev->hb_timer); + ndev_deinit_isr(ndev); +} + +static int atom_init_ntb(struct intel_ntb_dev *ndev) +{ + ndev->mw_count = ATOM_MW_COUNT; + ndev->spad_count = ATOM_SPAD_COUNT; + ndev->db_count = ATOM_DB_COUNT; + + switch (ndev->ntb.topo) { + case NTB_TOPO_B2B_USD: + case NTB_TOPO_B2B_DSD: + ndev->self_reg = &atom_pri_reg; + ndev->peer_reg = &atom_b2b_reg; + ndev->xlat_reg = &atom_sec_xlat; + + /* Enable Bus Master and Memory Space on the secondary side */ + iowrite16(PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER, + ndev->self_mmio + ATOM_SPCICMD_OFFSET); + + break; + + default: + return -EINVAL; + } + + ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1; + + return 0; +} + +static int atom_init_dev(struct intel_ntb_dev *ndev) +{ + u32 ppd; + int rc; + + rc = pci_read_config_dword(ndev->ntb.pdev, ATOM_PPD_OFFSET, &ppd); + if (rc) + return -EIO; + + ndev->ntb.topo = atom_ppd_topo(ndev, ppd); + if (ndev->ntb.topo == NTB_TOPO_NONE) + return -EINVAL; + + rc = atom_init_ntb(ndev); + if (rc) + return rc; + + rc = atom_init_isr(ndev); + if (rc) + return rc; + + if (ndev->ntb.topo != NTB_TOPO_SEC) { + /* Initiate PCI-E link training */ + rc = pci_write_config_dword(ndev->ntb.pdev, ATOM_PPD_OFFSET, + ppd | ATOM_PPD_INIT_LINK); + if (rc) + return rc; + } + + return 0; +} + +static void atom_deinit_dev(struct intel_ntb_dev *ndev) +{ + atom_deinit_isr(ndev); +} + +/* XEON */ + +static u64 xeon_db_ioread(void __iomem *mmio) +{ + return (u64)ioread16(mmio); +} + +static void xeon_db_iowrite(u64 bits, void __iomem *mmio) +{ + iowrite16((u16)bits, mmio); +} + +static int xeon_poll_link(struct intel_ntb_dev *ndev) +{ + u16 reg_val; + int rc; + + ndev->reg->db_iowrite(ndev->db_link_mask, + ndev->self_mmio + + ndev->self_reg->db_bell); + + rc = pci_read_config_word(ndev->ntb.pdev, + XEON_LINK_STATUS_OFFSET, ®_val); + if (rc) + return 0; + + if (reg_val == ndev->lnk_sta) + return 0; + + ndev->lnk_sta = reg_val; + + return 1; +} + +static int xeon_link_is_up(struct intel_ntb_dev *ndev) +{ + if (ndev->ntb.topo == NTB_TOPO_SEC) + return 1; + + return NTB_LNK_STA_ACTIVE(ndev->lnk_sta); +} + +static inline enum ntb_topo xeon_ppd_topo(struct intel_ntb_dev *ndev, u8 ppd) +{ + switch (ppd & XEON_PPD_TOPO_MASK) { + case XEON_PPD_TOPO_B2B_USD: + return NTB_TOPO_B2B_USD; + + case XEON_PPD_TOPO_B2B_DSD: + return NTB_TOPO_B2B_DSD; + + case XEON_PPD_TOPO_PRI_USD: + case XEON_PPD_TOPO_PRI_DSD: /* accept bogus PRI_DSD */ + return NTB_TOPO_PRI; + + case XEON_PPD_TOPO_SEC_USD: + case XEON_PPD_TOPO_SEC_DSD: /* accept bogus SEC_DSD */ + return NTB_TOPO_SEC; + } + + return NTB_TOPO_NONE; +} + +static inline int xeon_ppd_bar4_split(struct intel_ntb_dev *ndev, u8 ppd) +{ + if (ppd & XEON_PPD_SPLIT_BAR_MASK) { + dev_dbg(ndev_dev(ndev), "PPD %d split bar\n", ppd); + return 1; + } + return 0; +} + +static int xeon_init_isr(struct intel_ntb_dev *ndev) +{ + return ndev_init_isr(ndev, XEON_DB_MSIX_VECTOR_COUNT, + XEON_DB_MSIX_VECTOR_COUNT, + XEON_DB_MSIX_VECTOR_SHIFT, + XEON_DB_TOTAL_SHIFT); +} + +static void xeon_deinit_isr(struct intel_ntb_dev *ndev) +{ + ndev_deinit_isr(ndev); +} + +static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev, + const struct intel_b2b_addr *addr, + const struct intel_b2b_addr *peer_addr) +{ + struct pci_dev *pdev; + void __iomem *mmio; + resource_size_t bar_size; + phys_addr_t bar_addr; + int b2b_bar; + u8 bar_sz; + + pdev = ndev_pdev(ndev); + mmio = ndev->self_mmio; + + if (ndev->b2b_idx >= ndev->mw_count) { + dev_dbg(ndev_dev(ndev), "not using b2b mw\n"); + b2b_bar = 0; + ndev->b2b_off = 0; + } else { + b2b_bar = ndev_mw_to_bar(ndev, ndev->b2b_idx); + if (b2b_bar < 0) + return -EIO; + + dev_dbg(ndev_dev(ndev), "using b2b mw bar %d\n", b2b_bar); + + bar_size = pci_resource_len(ndev->ntb.pdev, b2b_bar); + + dev_dbg(ndev_dev(ndev), "b2b bar size %#llx\n", bar_size); + + if (b2b_mw_share && XEON_B2B_MIN_SIZE <= bar_size >> 1) { + dev_dbg(ndev_dev(ndev), + "b2b using first half of bar\n"); + ndev->b2b_off = bar_size >> 1; + } else if (XEON_B2B_MIN_SIZE <= bar_size) { + dev_dbg(ndev_dev(ndev), + "b2b using whole bar\n"); + ndev->b2b_off = 0; + --ndev->mw_count; + } else { + dev_dbg(ndev_dev(ndev), + "b2b bar size is too small\n"); + return -EIO; + } + } + + /* Reset the secondary bar sizes to match the primary bar sizes, + * except disable or halve the size of the b2b secondary bar. + * + * Note: code for each specific bar size register, because the register + * offsets are not in a consistent order (bar5sz comes after ppd, odd). + */ + pci_read_config_byte(pdev, XEON_PBAR23SZ_OFFSET, &bar_sz); + dev_dbg(ndev_dev(ndev), "PBAR23SZ %#x\n", bar_sz); + if (b2b_bar == 2) { + if (ndev->b2b_off) + bar_sz -= 1; + else + bar_sz = 0; + } + pci_write_config_byte(pdev, XEON_SBAR23SZ_OFFSET, bar_sz); + pci_read_config_byte(pdev, XEON_SBAR23SZ_OFFSET, &bar_sz); + dev_dbg(ndev_dev(ndev), "SBAR23SZ %#x\n", bar_sz); + + if (!ndev->bar4_split) { + pci_read_config_byte(pdev, XEON_PBAR45SZ_OFFSET, &bar_sz); + dev_dbg(ndev_dev(ndev), "PBAR45SZ %#x\n", bar_sz); + if (b2b_bar == 4) { + if (ndev->b2b_off) + bar_sz -= 1; + else + bar_sz = 0; + } + pci_write_config_byte(pdev, XEON_SBAR45SZ_OFFSET, bar_sz); + pci_read_config_byte(pdev, XEON_SBAR45SZ_OFFSET, &bar_sz); + dev_dbg(ndev_dev(ndev), "SBAR45SZ %#x\n", bar_sz); + } else { + pci_read_config_byte(pdev, XEON_PBAR4SZ_OFFSET, &bar_sz); + dev_dbg(ndev_dev(ndev), "PBAR4SZ %#x\n", bar_sz); + if (b2b_bar == 4) { + if (ndev->b2b_off) + bar_sz -= 1; + else + bar_sz = 0; + } + pci_write_config_byte(pdev, XEON_SBAR4SZ_OFFSET, bar_sz); + pci_read_config_byte(pdev, XEON_SBAR4SZ_OFFSET, &bar_sz); + dev_dbg(ndev_dev(ndev), "SBAR4SZ %#x\n", bar_sz); + + pci_read_config_byte(pdev, XEON_PBAR5SZ_OFFSET, &bar_sz); + dev_dbg(ndev_dev(ndev), "PBAR5SZ %#x\n", bar_sz); + if (b2b_bar == 5) { + if (ndev->b2b_off) + bar_sz -= 1; + else + bar_sz = 0; + } + pci_write_config_byte(pdev, XEON_SBAR5SZ_OFFSET, bar_sz); + pci_read_config_byte(pdev, XEON_SBAR5SZ_OFFSET, &bar_sz); + dev_dbg(ndev_dev(ndev), "SBAR5SZ %#x\n", bar_sz); + } + + /* SBAR01 hit by first part of the b2b bar */ + if (b2b_bar == 0) + bar_addr = addr->bar0_addr; + else if (b2b_bar == 2) + bar_addr = addr->bar2_addr64; + else if (b2b_bar == 4 && !ndev->bar4_split) + bar_addr = addr->bar4_addr64; + else if (b2b_bar == 4) + bar_addr = addr->bar4_addr32; + else if (b2b_bar == 5) + bar_addr = addr->bar5_addr32; + else + return -EIO; + + dev_dbg(ndev_dev(ndev), "SBAR01 %#018llx\n", bar_addr); + iowrite64(bar_addr, mmio + XEON_SBAR0BASE_OFFSET); + + /* Other SBAR are normally hit by the PBAR xlat, except for b2b bar. + * The b2b bar is either disabled above, or configured half-size, and + * it starts at the PBAR xlat + offset. + */ + + bar_addr = addr->bar2_addr64 + (b2b_bar == 2 ? ndev->b2b_off : 0); + iowrite64(bar_addr, mmio + XEON_SBAR23BASE_OFFSET); + bar_addr = ioread64(mmio + XEON_SBAR23BASE_OFFSET); + dev_dbg(ndev_dev(ndev), "SBAR23 %#018llx\n", bar_addr); + + if (!ndev->bar4_split) { + bar_addr = addr->bar4_addr64 + + (b2b_bar == 4 ? ndev->b2b_off : 0); + iowrite64(bar_addr, mmio + XEON_SBAR45BASE_OFFSET); + bar_addr = ioread64(mmio + XEON_SBAR45BASE_OFFSET); + dev_dbg(ndev_dev(ndev), "SBAR45 %#018llx\n", bar_addr); + } else { + bar_addr = addr->bar4_addr32 + + (b2b_bar == 4 ? ndev->b2b_off : 0); + iowrite32(bar_addr, mmio + XEON_SBAR4BASE_OFFSET); + bar_addr = ioread32(mmio + XEON_SBAR4BASE_OFFSET); + dev_dbg(ndev_dev(ndev), "SBAR4 %#010llx\n", bar_addr); + + bar_addr = addr->bar5_addr32 + + (b2b_bar == 5 ? ndev->b2b_off : 0); + iowrite32(bar_addr, mmio + XEON_SBAR5BASE_OFFSET); + bar_addr = ioread32(mmio + XEON_SBAR5BASE_OFFSET); + dev_dbg(ndev_dev(ndev), "SBAR5 %#010llx\n", bar_addr); + } + + /* setup incoming bar limits == base addrs (zero length windows) */ + + bar_addr = addr->bar2_addr64 + (b2b_bar == 2 ? ndev->b2b_off : 0); + iowrite64(bar_addr, mmio + XEON_SBAR23LMT_OFFSET); + bar_addr = ioread64(mmio + XEON_SBAR23LMT_OFFSET); + dev_dbg(ndev_dev(ndev), "SBAR23LMT %#018llx\n", bar_addr); + + if (!ndev->bar4_split) { + bar_addr = addr->bar4_addr64 + + (b2b_bar == 4 ? ndev->b2b_off : 0); + iowrite64(bar_addr, mmio + XEON_SBAR45LMT_OFFSET); + bar_addr = ioread64(mmio + XEON_SBAR45LMT_OFFSET); + dev_dbg(ndev_dev(ndev), "SBAR45LMT %#018llx\n", bar_addr); + } else { + bar_addr = addr->bar4_addr32 + + (b2b_bar == 4 ? ndev->b2b_off : 0); + iowrite32(bar_addr, mmio + XEON_SBAR4LMT_OFFSET); + bar_addr = ioread32(mmio + XEON_SBAR4LMT_OFFSET); + dev_dbg(ndev_dev(ndev), "SBAR4LMT %#010llx\n", bar_addr); + + bar_addr = addr->bar5_addr32 + + (b2b_bar == 5 ? ndev->b2b_off : 0); + iowrite32(bar_addr, mmio + XEON_SBAR5LMT_OFFSET); + bar_addr = ioread32(mmio + XEON_SBAR5LMT_OFFSET); + dev_dbg(ndev_dev(ndev), "SBAR5LMT %#05llx\n", bar_addr); + } + + /* zero incoming translation addrs */ + iowrite64(0, mmio + XEON_SBAR23XLAT_OFFSET); + + if (!ndev->bar4_split) { + iowrite64(0, mmio + XEON_SBAR45XLAT_OFFSET); + } else { + iowrite32(0, mmio + XEON_SBAR4XLAT_OFFSET); + iowrite32(0, mmio + XEON_SBAR5XLAT_OFFSET); + } + + /* zero outgoing translation limits (whole bar size windows) */ + iowrite64(0, mmio + XEON_PBAR23LMT_OFFSET); + if (!ndev->bar4_split) { + iowrite64(0, mmio + XEON_PBAR45LMT_OFFSET); + } else { + iowrite32(0, mmio + XEON_PBAR4LMT_OFFSET); + iowrite32(0, mmio + XEON_PBAR5LMT_OFFSET); + } + + /* set outgoing translation offsets */ + bar_addr = peer_addr->bar2_addr64; + iowrite64(bar_addr, mmio + XEON_PBAR23XLAT_OFFSET); + bar_addr = ioread64(mmio + XEON_PBAR23XLAT_OFFSET); + dev_dbg(ndev_dev(ndev), "PBAR23XLAT %#018llx\n", bar_addr); + + if (!ndev->bar4_split) { + bar_addr = peer_addr->bar4_addr64; + iowrite64(bar_addr, mmio + XEON_PBAR45XLAT_OFFSET); + bar_addr = ioread64(mmio + XEON_PBAR45XLAT_OFFSET); + dev_dbg(ndev_dev(ndev), "PBAR45XLAT %#018llx\n", bar_addr); + } else { + bar_addr = peer_addr->bar4_addr32; + iowrite32(bar_addr, mmio + XEON_PBAR4XLAT_OFFSET); + bar_addr = ioread32(mmio + XEON_PBAR4XLAT_OFFSET); + dev_dbg(ndev_dev(ndev), "PBAR4XLAT %#010llx\n", bar_addr); + + bar_addr = peer_addr->bar5_addr32; + iowrite32(bar_addr, mmio + XEON_PBAR5XLAT_OFFSET); + bar_addr = ioread32(mmio + XEON_PBAR5XLAT_OFFSET); + dev_dbg(ndev_dev(ndev), "PBAR5XLAT %#010llx\n", bar_addr); + } + + /* set the translation offset for b2b registers */ + if (b2b_bar == 0) + bar_addr = peer_addr->bar0_addr; + else if (b2b_bar == 2) + bar_addr = peer_addr->bar2_addr64; + else if (b2b_bar == 4 && !ndev->bar4_split) + bar_addr = peer_addr->bar4_addr64; + else if (b2b_bar == 4) + bar_addr = peer_addr->bar4_addr32; + else if (b2b_bar == 5) + bar_addr = peer_addr->bar5_addr32; + else + return -EIO; + + /* B2B_XLAT_OFFSET is 64bit, but can only take 32bit writes */ + dev_dbg(ndev_dev(ndev), "B2BXLAT %#018llx\n", bar_addr); + iowrite32(bar_addr, mmio + XEON_B2B_XLAT_OFFSETL); + iowrite32(bar_addr >> 32, mmio + XEON_B2B_XLAT_OFFSETU); + + if (b2b_bar) { + /* map peer ntb mmio config space registers */ + ndev->peer_mmio = pci_iomap(pdev, b2b_bar, + XEON_B2B_MIN_SIZE); + if (!ndev->peer_mmio) + return -EIO; + } + + return 0; +} + +static int xeon_init_ntb(struct intel_ntb_dev *ndev) +{ + int rc; + u32 ntb_ctl; + + if (ndev->bar4_split) + ndev->mw_count = HSX_SPLIT_BAR_MW_COUNT; + else + ndev->mw_count = XEON_MW_COUNT; + + ndev->spad_count = XEON_SPAD_COUNT; + ndev->db_count = XEON_DB_COUNT; + ndev->db_link_mask = XEON_DB_LINK_BIT; + + switch (ndev->ntb.topo) { + case NTB_TOPO_PRI: + if (ndev->hwerr_flags & NTB_HWERR_SDOORBELL_LOCKUP) { + dev_err(ndev_dev(ndev), "NTB Primary config disabled\n"); + return -EINVAL; + } + + /* enable link to allow secondary side device to appear */ + ntb_ctl = ioread32(ndev->self_mmio + ndev->reg->ntb_ctl); + ntb_ctl &= ~NTB_CTL_DISABLE; + iowrite32(ntb_ctl, ndev->self_mmio + ndev->reg->ntb_ctl); + + /* use half the spads for the peer */ + ndev->spad_count >>= 1; + ndev->self_reg = &xeon_pri_reg; + ndev->peer_reg = &xeon_sec_reg; + ndev->xlat_reg = &xeon_sec_xlat; + break; + + case NTB_TOPO_SEC: + if (ndev->hwerr_flags & NTB_HWERR_SDOORBELL_LOCKUP) { + dev_err(ndev_dev(ndev), "NTB Secondary config disabled\n"); + return -EINVAL; + } + /* use half the spads for the peer */ + ndev->spad_count >>= 1; + ndev->self_reg = &xeon_sec_reg; + ndev->peer_reg = &xeon_pri_reg; + ndev->xlat_reg = &xeon_pri_xlat; + break; + + case NTB_TOPO_B2B_USD: + case NTB_TOPO_B2B_DSD: + ndev->self_reg = &xeon_pri_reg; + ndev->peer_reg = &xeon_b2b_reg; + ndev->xlat_reg = &xeon_sec_xlat; + + if (ndev->hwerr_flags & NTB_HWERR_SDOORBELL_LOCKUP) { + ndev->peer_reg = &xeon_pri_reg; + + if (b2b_mw_idx < 0) + ndev->b2b_idx = b2b_mw_idx + ndev->mw_count; + else + ndev->b2b_idx = b2b_mw_idx; + + dev_dbg(ndev_dev(ndev), + "setting up b2b mw idx %d means %d\n", + b2b_mw_idx, ndev->b2b_idx); + + } else if (ndev->hwerr_flags & NTB_HWERR_B2BDOORBELL_BIT14) { + dev_warn(ndev_dev(ndev), "Reduce doorbell count by 1\n"); + ndev->db_count -= 1; + } + + if (ndev->ntb.topo == NTB_TOPO_B2B_USD) { + rc = xeon_setup_b2b_mw(ndev, + &xeon_b2b_dsd_addr, + &xeon_b2b_usd_addr); + } else { + rc = xeon_setup_b2b_mw(ndev, + &xeon_b2b_usd_addr, + &xeon_b2b_dsd_addr); + } + if (rc) + return rc; + + /* Enable Bus Master and Memory Space on the secondary side */ + iowrite16(PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER, + ndev->self_mmio + XEON_SPCICMD_OFFSET); + + break; + + default: + return -EINVAL; + } + + ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1; + + ndev->reg->db_iowrite(ndev->db_valid_mask, + ndev->self_mmio + + ndev->self_reg->db_mask); + + return 0; +} + +static int xeon_init_dev(struct intel_ntb_dev *ndev) +{ + struct pci_dev *pdev; + u8 ppd; + int rc, mem; + + pdev = ndev_pdev(ndev); + + switch (pdev->device) { + /* There is a Xeon hardware errata related to writes to SDOORBELL or + * B2BDOORBELL in conjunction with inbound access to NTB MMIO Space, + * which may hang the system. To workaround this use the second memory + * window to access the interrupt and scratch pad registers on the + * remote system. + */ + case PCI_DEVICE_ID_INTEL_NTB_SS_JSF: + case PCI_DEVICE_ID_INTEL_NTB_PS_JSF: + case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF: + case PCI_DEVICE_ID_INTEL_NTB_SS_SNB: + case PCI_DEVICE_ID_INTEL_NTB_PS_SNB: + case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB: + case PCI_DEVICE_ID_INTEL_NTB_SS_IVT: + case PCI_DEVICE_ID_INTEL_NTB_PS_IVT: + case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT: + case PCI_DEVICE_ID_INTEL_NTB_SS_HSX: + case PCI_DEVICE_ID_INTEL_NTB_PS_HSX: + case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX: + ndev->hwerr_flags |= NTB_HWERR_SDOORBELL_LOCKUP; + break; + } + + switch (pdev->device) { + /* There is a hardware errata related to accessing any register in + * SB01BASE in the presence of bidirectional traffic crossing the NTB. + */ + case PCI_DEVICE_ID_INTEL_NTB_SS_IVT: + case PCI_DEVICE_ID_INTEL_NTB_PS_IVT: + case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT: + case PCI_DEVICE_ID_INTEL_NTB_SS_HSX: + case PCI_DEVICE_ID_INTEL_NTB_PS_HSX: + case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX: + ndev->hwerr_flags |= NTB_HWERR_SB01BASE_LOCKUP; + break; + } + + switch (pdev->device) { + /* HW Errata on bit 14 of b2bdoorbell register. Writes will not be + * mirrored to the remote system. Shrink the number of bits by one, + * since bit 14 is the last bit. + */ + case PCI_DEVICE_ID_INTEL_NTB_SS_JSF: + case PCI_DEVICE_ID_INTEL_NTB_PS_JSF: + case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF: + case PCI_DEVICE_ID_INTEL_NTB_SS_SNB: + case PCI_DEVICE_ID_INTEL_NTB_PS_SNB: + case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB: + case PCI_DEVICE_ID_INTEL_NTB_SS_IVT: + case PCI_DEVICE_ID_INTEL_NTB_PS_IVT: + case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT: + case PCI_DEVICE_ID_INTEL_NTB_SS_HSX: + case PCI_DEVICE_ID_INTEL_NTB_PS_HSX: + case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX: + ndev->hwerr_flags |= NTB_HWERR_B2BDOORBELL_BIT14; + break; + } + + ndev->reg = &xeon_reg; + + rc = pci_read_config_byte(pdev, XEON_PPD_OFFSET, &ppd); + if (rc) + return -EIO; + + ndev->ntb.topo = xeon_ppd_topo(ndev, ppd); + dev_dbg(ndev_dev(ndev), "ppd %#x topo %s\n", ppd, + ntb_topo_string(ndev->ntb.topo)); + if (ndev->ntb.topo == NTB_TOPO_NONE) + return -EINVAL; + + if (ndev->ntb.topo != NTB_TOPO_SEC) { + ndev->bar4_split = xeon_ppd_bar4_split(ndev, ppd); + dev_dbg(ndev_dev(ndev), "ppd %#x bar4_split %d\n", + ppd, ndev->bar4_split); + } else { + /* This is a way for transparent BAR to figure out if we are + * doing split BAR or not. There is no way for the hw on the + * transparent side to know and set the PPD. + */ + mem = pci_select_bars(pdev, IORESOURCE_MEM); + ndev->bar4_split = hweight32(mem) == + HSX_SPLIT_BAR_MW_COUNT + 1; + dev_dbg(ndev_dev(ndev), "mem %#x bar4_split %d\n", + mem, ndev->bar4_split); + } + + rc = xeon_init_ntb(ndev); + if (rc) + return rc; + + return xeon_init_isr(ndev); +} + +static void xeon_deinit_dev(struct intel_ntb_dev *ndev) +{ + xeon_deinit_isr(ndev); +} + +static int intel_ntb_init_pci(struct intel_ntb_dev *ndev, struct pci_dev *pdev) +{ + int rc; + + pci_set_drvdata(pdev, ndev); + + rc = pci_enable_device(pdev); + if (rc) + goto err_pci_enable; + + rc = pci_request_regions(pdev, NTB_NAME); + if (rc) + goto err_pci_regions; + + pci_set_master(pdev); + + rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + if (rc) { + rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (rc) + goto err_dma_mask; + dev_warn(ndev_dev(ndev), "Cannot DMA highmem\n"); + } + + rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + if (rc) { + rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + if (rc) + goto err_dma_mask; + dev_warn(ndev_dev(ndev), "Cannot DMA consistent highmem\n"); + } + + ndev->self_mmio = pci_iomap(pdev, 0, 0); + if (!ndev->self_mmio) { + rc = -EIO; + goto err_mmio; + } + ndev->peer_mmio = ndev->self_mmio; + + return 0; + +err_mmio: +err_dma_mask: + pci_clear_master(pdev); + pci_release_regions(pdev); +err_pci_regions: + pci_disable_device(pdev); +err_pci_enable: + pci_set_drvdata(pdev, NULL); + return rc; +} + +static void intel_ntb_deinit_pci(struct intel_ntb_dev *ndev) +{ + struct pci_dev *pdev = ndev_pdev(ndev); + + if (ndev->peer_mmio && ndev->peer_mmio != ndev->self_mmio) + pci_iounmap(pdev, ndev->peer_mmio); + pci_iounmap(pdev, ndev->self_mmio); + + pci_clear_master(pdev); + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); +} + +static inline void ndev_init_struct(struct intel_ntb_dev *ndev, + struct pci_dev *pdev) +{ + ndev->ntb.pdev = pdev; + ndev->ntb.topo = NTB_TOPO_NONE; + ndev->ntb.ops = &intel_ntb_ops; + + ndev->b2b_off = 0; + ndev->b2b_idx = INT_MAX; + + ndev->bar4_split = 0; + + ndev->mw_count = 0; + ndev->spad_count = 0; + ndev->db_count = 0; + ndev->db_vec_count = 0; + ndev->db_vec_shift = 0; + + ndev->ntb_ctl = 0; + ndev->lnk_sta = 0; + + ndev->db_valid_mask = 0; + ndev->db_link_mask = 0; + ndev->db_mask = 0; + + spin_lock_init(&ndev->db_mask_lock); +} + +static int intel_ntb_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct intel_ntb_dev *ndev; + int rc, node; + + node = dev_to_node(&pdev->dev); + + if (pdev_is_atom(pdev)) { + ndev = kzalloc_node(sizeof(*ndev), GFP_KERNEL, node); + if (!ndev) { + rc = -ENOMEM; + goto err_ndev; + } + + ndev_init_struct(ndev, pdev); + + rc = intel_ntb_init_pci(ndev, pdev); + if (rc) + goto err_init_pci; + + rc = atom_init_dev(ndev); + if (rc) + goto err_init_dev; + + } else if (pdev_is_xeon(pdev)) { + ndev = kzalloc_node(sizeof(*ndev), GFP_KERNEL, node); + if (!ndev) { + rc = -ENOMEM; + goto err_ndev; + } + + ndev_init_struct(ndev, pdev); + + rc = intel_ntb_init_pci(ndev, pdev); + if (rc) + goto err_init_pci; + + rc = xeon_init_dev(ndev); + if (rc) + goto err_init_dev; + + } else { + rc = -EINVAL; + goto err_ndev; + } + + ndev_reset_unsafe_flags(ndev); + + ndev->reg->poll_link(ndev); + + ndev_init_debugfs(ndev); + + rc = ntb_register_device(&ndev->ntb); + if (rc) + goto err_register; + + dev_info(&pdev->dev, "NTB device registered.\n"); + + return 0; + +err_register: + ndev_deinit_debugfs(ndev); + if (pdev_is_atom(pdev)) + atom_deinit_dev(ndev); + else if (pdev_is_xeon(pdev)) + xeon_deinit_dev(ndev); +err_init_dev: + intel_ntb_deinit_pci(ndev); +err_init_pci: + kfree(ndev); +err_ndev: + return rc; +} + +static void intel_ntb_pci_remove(struct pci_dev *pdev) +{ + struct intel_ntb_dev *ndev = pci_get_drvdata(pdev); + + ntb_unregister_device(&ndev->ntb); + ndev_deinit_debugfs(ndev); + if (pdev_is_atom(pdev)) + atom_deinit_dev(ndev); + else if (pdev_is_xeon(pdev)) + xeon_deinit_dev(ndev); + intel_ntb_deinit_pci(ndev); + kfree(ndev); +} + +static const struct intel_ntb_reg atom_reg = { + .poll_link = atom_poll_link, + .link_is_up = atom_link_is_up, + .db_ioread = atom_db_ioread, + .db_iowrite = atom_db_iowrite, + .db_size = sizeof(u64), + .ntb_ctl = ATOM_NTBCNTL_OFFSET, + .mw_bar = {2, 4}, +}; + +static const struct intel_ntb_alt_reg atom_pri_reg = { + .db_bell = ATOM_PDOORBELL_OFFSET, + .db_mask = ATOM_PDBMSK_OFFSET, + .spad = ATOM_SPAD_OFFSET, +}; + +static const struct intel_ntb_alt_reg atom_b2b_reg = { + .db_bell = ATOM_B2B_DOORBELL_OFFSET, + .spad = ATOM_B2B_SPAD_OFFSET, +}; + +static const struct intel_ntb_xlat_reg atom_sec_xlat = { + /* FIXME : .bar0_base = ATOM_SBAR0BASE_OFFSET, */ + /* FIXME : .bar2_limit = ATOM_SBAR2LMT_OFFSET, */ + .bar2_xlat = ATOM_SBAR2XLAT_OFFSET, +}; + +static const struct intel_ntb_reg xeon_reg = { + .poll_link = xeon_poll_link, + .link_is_up = xeon_link_is_up, + .db_ioread = xeon_db_ioread, + .db_iowrite = xeon_db_iowrite, + .db_size = sizeof(u32), + .ntb_ctl = XEON_NTBCNTL_OFFSET, + .mw_bar = {2, 4, 5}, +}; + +static const struct intel_ntb_alt_reg xeon_pri_reg = { + .db_bell = XEON_PDOORBELL_OFFSET, + .db_mask = XEON_PDBMSK_OFFSET, + .spad = XEON_SPAD_OFFSET, +}; + +static const struct intel_ntb_alt_reg xeon_sec_reg = { + .db_bell = XEON_SDOORBELL_OFFSET, + .db_mask = XEON_SDBMSK_OFFSET, + /* second half of the scratchpads */ + .spad = XEON_SPAD_OFFSET + (XEON_SPAD_COUNT << 1), +}; + +static const struct intel_ntb_alt_reg xeon_b2b_reg = { + .db_bell = XEON_B2B_DOORBELL_OFFSET, + .spad = XEON_B2B_SPAD_OFFSET, +}; + +static const struct intel_ntb_xlat_reg xeon_pri_xlat = { + /* Note: no primary .bar0_base visible to the secondary side. + * + * The secondary side cannot get the base address stored in primary + * bars. The base address is necessary to set the limit register to + * any value other than zero, or unlimited. + * + * WITHOUT THE BASE ADDRESS, THE SECONDARY SIDE CANNOT DISABLE the + * window by setting the limit equal to base, nor can it limit the size + * of the memory window by setting the limit to base + size. + */ + .bar2_limit = XEON_PBAR23LMT_OFFSET, + .bar2_xlat = XEON_PBAR23XLAT_OFFSET, +}; + +static const struct intel_ntb_xlat_reg xeon_sec_xlat = { + .bar0_base = XEON_SBAR0BASE_OFFSET, + .bar2_limit = XEON_SBAR23LMT_OFFSET, + .bar2_xlat = XEON_SBAR23XLAT_OFFSET, +}; + +static struct intel_b2b_addr xeon_b2b_usd_addr = { + .bar2_addr64 = XEON_B2B_BAR2_USD_ADDR64, + .bar4_addr64 = XEON_B2B_BAR4_USD_ADDR64, + .bar4_addr32 = XEON_B2B_BAR4_USD_ADDR32, + .bar5_addr32 = XEON_B2B_BAR5_USD_ADDR32, +}; + +static struct intel_b2b_addr xeon_b2b_dsd_addr = { + .bar2_addr64 = XEON_B2B_BAR2_DSD_ADDR64, + .bar4_addr64 = XEON_B2B_BAR4_DSD_ADDR64, + .bar4_addr32 = XEON_B2B_BAR4_DSD_ADDR32, + .bar5_addr32 = XEON_B2B_BAR5_DSD_ADDR32, +}; + +/* operations for primary side of local ntb */ +static const struct ntb_dev_ops intel_ntb_ops = { + .mw_count = intel_ntb_mw_count, + .mw_get_range = intel_ntb_mw_get_range, + .mw_set_trans = intel_ntb_mw_set_trans, + .link_is_up = intel_ntb_link_is_up, + .link_enable = intel_ntb_link_enable, + .link_disable = intel_ntb_link_disable, + .db_is_unsafe = intel_ntb_db_is_unsafe, + .db_valid_mask = intel_ntb_db_valid_mask, + .db_vector_count = intel_ntb_db_vector_count, + .db_vector_mask = intel_ntb_db_vector_mask, + .db_read = intel_ntb_db_read, + .db_clear = intel_ntb_db_clear, + .db_set_mask = intel_ntb_db_set_mask, + .db_clear_mask = intel_ntb_db_clear_mask, + .peer_db_addr = intel_ntb_peer_db_addr, + .peer_db_set = intel_ntb_peer_db_set, + .spad_is_unsafe = intel_ntb_spad_is_unsafe, + .spad_count = intel_ntb_spad_count, + .spad_read = intel_ntb_spad_read, + .spad_write = intel_ntb_spad_write, + .peer_spad_addr = intel_ntb_peer_spad_addr, + .peer_spad_read = intel_ntb_peer_spad_read, + .peer_spad_write = intel_ntb_peer_spad_write, +}; + +static const struct file_operations intel_ntb_debugfs_info = { + .owner = THIS_MODULE, + .open = simple_open, + .read = ndev_debugfs_read, +}; + +static const struct pci_device_id intel_ntb_pci_tbl[] = { + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_BWD)}, + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_JSF)}, + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_SNB)}, + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_IVT)}, + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_HSX)}, + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_JSF)}, + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_SNB)}, + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_IVT)}, + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_HSX)}, + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_JSF)}, + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_SNB)}, + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_IVT)}, + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_HSX)}, + {0} +}; +MODULE_DEVICE_TABLE(pci, intel_ntb_pci_tbl); + +static struct pci_driver intel_ntb_pci_driver = { + .name = KBUILD_MODNAME, + .id_table = intel_ntb_pci_tbl, + .probe = intel_ntb_pci_probe, + .remove = intel_ntb_pci_remove, +}; + +static int __init intel_ntb_pci_driver_init(void) +{ + pr_info("%s %s\n", NTB_DESC, NTB_VER); + + if (debugfs_initialized()) + debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); + + return pci_register_driver(&intel_ntb_pci_driver); +} +module_init(intel_ntb_pci_driver_init); + +static void __exit intel_ntb_pci_driver_exit(void) +{ + pci_unregister_driver(&intel_ntb_pci_driver); + + debugfs_remove_recursive(debugfs_dir); +} +module_exit(intel_ntb_pci_driver_exit); + diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.h b/drivers/ntb/hw/intel/ntb_hw_intel.h new file mode 100644 index 000000000000..7ddaf387b679 --- /dev/null +++ b/drivers/ntb/hw/intel/ntb_hw_intel.h @@ -0,0 +1,342 @@ +/* + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2012 Intel Corporation. All rights reserved. + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * BSD LICENSE + * + * Copyright(c) 2012 Intel Corporation. All rights reserved. + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copy + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Intel PCIe NTB Linux driver + * + * Contact Information: + * Jon Mason <jon.mason@intel.com> + */ + +#ifndef NTB_HW_INTEL_H +#define NTB_HW_INTEL_H + +#include <linux/ntb.h> +#include <linux/pci.h> + +#define PCI_DEVICE_ID_INTEL_NTB_B2B_JSF 0x3725 +#define PCI_DEVICE_ID_INTEL_NTB_PS_JSF 0x3726 +#define PCI_DEVICE_ID_INTEL_NTB_SS_JSF 0x3727 +#define PCI_DEVICE_ID_INTEL_NTB_B2B_SNB 0x3C0D +#define PCI_DEVICE_ID_INTEL_NTB_PS_SNB 0x3C0E +#define PCI_DEVICE_ID_INTEL_NTB_SS_SNB 0x3C0F +#define PCI_DEVICE_ID_INTEL_NTB_B2B_IVT 0x0E0D +#define PCI_DEVICE_ID_INTEL_NTB_PS_IVT 0x0E0E +#define PCI_DEVICE_ID_INTEL_NTB_SS_IVT 0x0E0F +#define PCI_DEVICE_ID_INTEL_NTB_B2B_HSX 0x2F0D +#define PCI_DEVICE_ID_INTEL_NTB_PS_HSX 0x2F0E +#define PCI_DEVICE_ID_INTEL_NTB_SS_HSX 0x2F0F +#define PCI_DEVICE_ID_INTEL_NTB_B2B_BWD 0x0C4E + +/* Intel Xeon hardware */ + +#define XEON_PBAR23LMT_OFFSET 0x0000 +#define XEON_PBAR45LMT_OFFSET 0x0008 +#define XEON_PBAR4LMT_OFFSET 0x0008 +#define XEON_PBAR5LMT_OFFSET 0x000c +#define XEON_PBAR23XLAT_OFFSET 0x0010 +#define XEON_PBAR45XLAT_OFFSET 0x0018 +#define XEON_PBAR4XLAT_OFFSET 0x0018 +#define XEON_PBAR5XLAT_OFFSET 0x001c +#define XEON_SBAR23LMT_OFFSET 0x0020 +#define XEON_SBAR45LMT_OFFSET 0x0028 +#define XEON_SBAR4LMT_OFFSET 0x0028 +#define XEON_SBAR5LMT_OFFSET 0x002c +#define XEON_SBAR23XLAT_OFFSET 0x0030 +#define XEON_SBAR45XLAT_OFFSET 0x0038 +#define XEON_SBAR4XLAT_OFFSET 0x0038 +#define XEON_SBAR5XLAT_OFFSET 0x003c +#define XEON_SBAR0BASE_OFFSET 0x0040 +#define XEON_SBAR23BASE_OFFSET 0x0048 +#define XEON_SBAR45BASE_OFFSET 0x0050 +#define XEON_SBAR4BASE_OFFSET 0x0050 +#define XEON_SBAR5BASE_OFFSET 0x0054 +#define XEON_SBDF_OFFSET 0x005c +#define XEON_NTBCNTL_OFFSET 0x0058 +#define XEON_PDOORBELL_OFFSET 0x0060 +#define XEON_PDBMSK_OFFSET 0x0062 +#define XEON_SDOORBELL_OFFSET 0x0064 +#define XEON_SDBMSK_OFFSET 0x0066 +#define XEON_USMEMMISS_OFFSET 0x0070 +#define XEON_SPAD_OFFSET 0x0080 +#define XEON_PBAR23SZ_OFFSET 0x00d0 +#define XEON_PBAR45SZ_OFFSET 0x00d1 +#define XEON_PBAR4SZ_OFFSET 0x00d1 +#define XEON_SBAR23SZ_OFFSET 0x00d2 +#define XEON_SBAR45SZ_OFFSET 0x00d3 +#define XEON_SBAR4SZ_OFFSET 0x00d3 +#define XEON_PPD_OFFSET 0x00d4 +#define XEON_PBAR5SZ_OFFSET 0x00d5 +#define XEON_SBAR5SZ_OFFSET 0x00d6 +#define XEON_WCCNTRL_OFFSET 0x00e0 +#define XEON_UNCERRSTS_OFFSET 0x014c +#define XEON_CORERRSTS_OFFSET 0x0158 +#define XEON_LINK_STATUS_OFFSET 0x01a2 +#define XEON_SPCICMD_OFFSET 0x0504 +#define XEON_DEVCTRL_OFFSET 0x0598 +#define XEON_DEVSTS_OFFSET 0x059a +#define XEON_SLINK_STATUS_OFFSET 0x05a2 +#define XEON_B2B_SPAD_OFFSET 0x0100 +#define XEON_B2B_DOORBELL_OFFSET 0x0140 +#define XEON_B2B_XLAT_OFFSETL 0x0144 +#define XEON_B2B_XLAT_OFFSETU 0x0148 +#define XEON_PPD_CONN_MASK 0x03 +#define XEON_PPD_CONN_TRANSPARENT 0x00 +#define XEON_PPD_CONN_B2B 0x01 +#define XEON_PPD_CONN_RP 0x02 +#define XEON_PPD_DEV_MASK 0x10 +#define XEON_PPD_DEV_USD 0x00 +#define XEON_PPD_DEV_DSD 0x10 +#define XEON_PPD_SPLIT_BAR_MASK 0x40 + +#define XEON_PPD_TOPO_MASK (XEON_PPD_CONN_MASK | XEON_PPD_DEV_MASK) +#define XEON_PPD_TOPO_PRI_USD (XEON_PPD_CONN_RP | XEON_PPD_DEV_USD) +#define XEON_PPD_TOPO_PRI_DSD (XEON_PPD_CONN_RP | XEON_PPD_DEV_DSD) +#define XEON_PPD_TOPO_SEC_USD (XEON_PPD_CONN_TRANSPARENT | XEON_PPD_DEV_USD) +#define XEON_PPD_TOPO_SEC_DSD (XEON_PPD_CONN_TRANSPARENT | XEON_PPD_DEV_DSD) +#define XEON_PPD_TOPO_B2B_USD (XEON_PPD_CONN_B2B | XEON_PPD_DEV_USD) +#define XEON_PPD_TOPO_B2B_DSD (XEON_PPD_CONN_B2B | XEON_PPD_DEV_DSD) + +#define XEON_MW_COUNT 2 +#define HSX_SPLIT_BAR_MW_COUNT 3 +#define XEON_DB_COUNT 15 +#define XEON_DB_LINK 15 +#define XEON_DB_LINK_BIT BIT_ULL(XEON_DB_LINK) +#define XEON_DB_MSIX_VECTOR_COUNT 4 +#define XEON_DB_MSIX_VECTOR_SHIFT 5 +#define XEON_DB_TOTAL_SHIFT 16 +#define XEON_SPAD_COUNT 16 + +/* Intel Atom hardware */ + +#define ATOM_SBAR2XLAT_OFFSET 0x0008 +#define ATOM_PDOORBELL_OFFSET 0x0020 +#define ATOM_PDBMSK_OFFSET 0x0028 +#define ATOM_NTBCNTL_OFFSET 0x0060 +#define ATOM_SPAD_OFFSET 0x0080 +#define ATOM_PPD_OFFSET 0x00d4 +#define ATOM_PBAR2XLAT_OFFSET 0x8008 +#define ATOM_B2B_DOORBELL_OFFSET 0x8020 +#define ATOM_B2B_SPAD_OFFSET 0x8080 +#define ATOM_SPCICMD_OFFSET 0xb004 +#define ATOM_LINK_STATUS_OFFSET 0xb052 +#define ATOM_ERRCORSTS_OFFSET 0xb110 +#define ATOM_IP_BASE 0xc000 +#define ATOM_DESKEWSTS_OFFSET (ATOM_IP_BASE + 0x3024) +#define ATOM_LTSSMERRSTS0_OFFSET (ATOM_IP_BASE + 0x3180) +#define ATOM_LTSSMSTATEJMP_OFFSET (ATOM_IP_BASE + 0x3040) +#define ATOM_IBSTERRRCRVSTS0_OFFSET (ATOM_IP_BASE + 0x3324) +#define ATOM_MODPHY_PCSREG4 0x1c004 +#define ATOM_MODPHY_PCSREG6 0x1c006 + +#define ATOM_PPD_INIT_LINK 0x0008 +#define ATOM_PPD_CONN_MASK 0x0300 +#define ATOM_PPD_CONN_TRANSPARENT 0x0000 +#define ATOM_PPD_CONN_B2B 0x0100 +#define ATOM_PPD_CONN_RP 0x0200 +#define ATOM_PPD_DEV_MASK 0x1000 +#define ATOM_PPD_DEV_USD 0x0000 +#define ATOM_PPD_DEV_DSD 0x1000 +#define ATOM_PPD_TOPO_MASK (ATOM_PPD_CONN_MASK | ATOM_PPD_DEV_MASK) +#define ATOM_PPD_TOPO_PRI_USD (ATOM_PPD_CONN_TRANSPARENT | ATOM_PPD_DEV_USD) +#define ATOM_PPD_TOPO_PRI_DSD (ATOM_PPD_CONN_TRANSPARENT | ATOM_PPD_DEV_DSD) +#define ATOM_PPD_TOPO_SEC_USD (ATOM_PPD_CONN_RP | ATOM_PPD_DEV_USD) +#define ATOM_PPD_TOPO_SEC_DSD (ATOM_PPD_CONN_RP | ATOM_PPD_DEV_DSD) +#define ATOM_PPD_TOPO_B2B_USD (ATOM_PPD_CONN_B2B | ATOM_PPD_DEV_USD) +#define ATOM_PPD_TOPO_B2B_DSD (ATOM_PPD_CONN_B2B | ATOM_PPD_DEV_DSD) + +#define ATOM_MW_COUNT 2 +#define ATOM_DB_COUNT 34 +#define ATOM_DB_VALID_MASK (BIT_ULL(ATOM_DB_COUNT) - 1) +#define ATOM_DB_MSIX_VECTOR_COUNT 34 +#define ATOM_DB_MSIX_VECTOR_SHIFT 1 +#define ATOM_DB_TOTAL_SHIFT 34 +#define ATOM_SPAD_COUNT 16 + +#define ATOM_NTB_CTL_DOWN_BIT BIT(16) +#define ATOM_NTB_CTL_ACTIVE(x) !(x & ATOM_NTB_CTL_DOWN_BIT) + +#define ATOM_DESKEWSTS_DBERR BIT(15) +#define ATOM_LTSSMERRSTS0_UNEXPECTEDEI BIT(20) +#define ATOM_LTSSMSTATEJMP_FORCEDETECT BIT(2) +#define ATOM_IBIST_ERR_OFLOW 0x7FFF7FFF + +#define ATOM_LINK_HB_TIMEOUT msecs_to_jiffies(1000) +#define ATOM_LINK_RECOVERY_TIME msecs_to_jiffies(500) + +/* Ntb control and link status */ + +#define NTB_CTL_CFG_LOCK BIT(0) +#define NTB_CTL_DISABLE BIT(1) +#define NTB_CTL_S2P_BAR2_SNOOP BIT(2) +#define NTB_CTL_P2S_BAR2_SNOOP BIT(4) +#define NTB_CTL_S2P_BAR4_SNOOP BIT(6) +#define NTB_CTL_P2S_BAR4_SNOOP BIT(8) +#define NTB_CTL_S2P_BAR5_SNOOP BIT(12) +#define NTB_CTL_P2S_BAR5_SNOOP BIT(14) + +#define NTB_LNK_STA_ACTIVE_BIT 0x2000 +#define NTB_LNK_STA_SPEED_MASK 0x000f +#define NTB_LNK_STA_WIDTH_MASK 0x03f0 +#define NTB_LNK_STA_ACTIVE(x) (!!((x) & NTB_LNK_STA_ACTIVE_BIT)) +#define NTB_LNK_STA_SPEED(x) ((x) & NTB_LNK_STA_SPEED_MASK) +#define NTB_LNK_STA_WIDTH(x) (((x) & NTB_LNK_STA_WIDTH_MASK) >> 4) + +/* Use the following addresses for translation between b2b ntb devices in case + * the hardware default values are not reliable. */ +#define XEON_B2B_BAR0_USD_ADDR 0x1000000000000000ull +#define XEON_B2B_BAR2_USD_ADDR64 0x2000000000000000ull +#define XEON_B2B_BAR4_USD_ADDR64 0x4000000000000000ull +#define XEON_B2B_BAR4_USD_ADDR32 0x20000000u +#define XEON_B2B_BAR5_USD_ADDR32 0x40000000u +#define XEON_B2B_BAR0_DSD_ADDR 0x9000000000000000ull +#define XEON_B2B_BAR2_DSD_ADDR64 0xa000000000000000ull +#define XEON_B2B_BAR4_DSD_ADDR64 0xc000000000000000ull +#define XEON_B2B_BAR4_DSD_ADDR32 0xa0000000u +#define XEON_B2B_BAR5_DSD_ADDR32 0xc0000000u + +/* The peer ntb secondary config space is 32KB fixed size */ +#define XEON_B2B_MIN_SIZE 0x8000 + +/* flags to indicate hardware errata */ +#define NTB_HWERR_SDOORBELL_LOCKUP BIT_ULL(0) +#define NTB_HWERR_SB01BASE_LOCKUP BIT_ULL(1) +#define NTB_HWERR_B2BDOORBELL_BIT14 BIT_ULL(2) + +/* flags to indicate unsafe api */ +#define NTB_UNSAFE_DB BIT_ULL(0) +#define NTB_UNSAFE_SPAD BIT_ULL(1) + +struct intel_ntb_dev; + +struct intel_ntb_reg { + int (*poll_link)(struct intel_ntb_dev *ndev); + int (*link_is_up)(struct intel_ntb_dev *ndev); + u64 (*db_ioread)(void __iomem *mmio); + void (*db_iowrite)(u64 db_bits, void __iomem *mmio); + unsigned long ntb_ctl; + resource_size_t db_size; + int mw_bar[]; +}; + +struct intel_ntb_alt_reg { + unsigned long db_bell; + unsigned long db_mask; + unsigned long spad; +}; + +struct intel_ntb_xlat_reg { + unsigned long bar0_base; + unsigned long bar2_xlat; + unsigned long bar2_limit; +}; + +struct intel_b2b_addr { + phys_addr_t bar0_addr; + phys_addr_t bar2_addr64; + phys_addr_t bar4_addr64; + phys_addr_t bar4_addr32; + phys_addr_t bar5_addr32; +}; + +struct intel_ntb_vec { + struct intel_ntb_dev *ndev; + int num; +}; + +struct intel_ntb_dev { + struct ntb_dev ntb; + + /* offset of peer bar0 in b2b bar */ + unsigned long b2b_off; + /* mw idx used to access peer bar0 */ + unsigned int b2b_idx; + + /* BAR45 is split into BAR4 and BAR5 */ + bool bar4_split; + + u32 ntb_ctl; + u32 lnk_sta; + + unsigned char mw_count; + unsigned char spad_count; + unsigned char db_count; + unsigned char db_vec_count; + unsigned char db_vec_shift; + + u64 db_valid_mask; + u64 db_link_mask; + u64 db_mask; + + /* synchronize rmw access of db_mask and hw reg */ + spinlock_t db_mask_lock; + + struct msix_entry *msix; + struct intel_ntb_vec *vec; + + const struct intel_ntb_reg *reg; + const struct intel_ntb_alt_reg *self_reg; + const struct intel_ntb_alt_reg *peer_reg; + const struct intel_ntb_xlat_reg *xlat_reg; + void __iomem *self_mmio; + void __iomem *peer_mmio; + phys_addr_t peer_addr; + + unsigned long last_ts; + struct delayed_work hb_timer; + + unsigned long hwerr_flags; + unsigned long unsafe_flags; + unsigned long unsafe_flags_ignore; + + struct dentry *debugfs_dir; + struct dentry *debugfs_info; +}; + +#define ndev_pdev(ndev) ((ndev)->ntb.pdev) +#define ndev_name(ndev) pci_name(ndev_pdev(ndev)) +#define ndev_dev(ndev) (&ndev_pdev(ndev)->dev) +#define ntb_ndev(ntb) container_of(ntb, struct intel_ntb_dev, ntb) +#define hb_ndev(work) container_of(work, struct intel_ntb_dev, hb_timer.work) + +#endif diff --git a/drivers/ntb/ntb.c b/drivers/ntb/ntb.c new file mode 100644 index 000000000000..23435f2a5486 --- /dev/null +++ b/drivers/ntb/ntb.c @@ -0,0 +1,251 @@ +/* + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copy + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * PCIe NTB Linux driver + * + * Contact Information: + * Allen Hubbe <Allen.Hubbe@emc.com> + */ + +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#include <linux/ntb.h> +#include <linux/pci.h> + +#define DRIVER_NAME "ntb" +#define DRIVER_DESCRIPTION "PCIe NTB Driver Framework" + +#define DRIVER_LICENSE "Dual BSD/GPL" +#define DRIVER_VERSION "1.0" +#define DRIVER_RELDATE "24 March 2015" +#define DRIVER_AUTHOR "Allen Hubbe <Allen.Hubbe@emc.com>" + +MODULE_LICENSE(DRIVER_LICENSE); +MODULE_VERSION(DRIVER_VERSION); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESCRIPTION); + +static struct bus_type ntb_bus; +static void ntb_dev_release(struct device *dev); + +int __ntb_register_client(struct ntb_client *client, struct module *mod, + const char *mod_name) +{ + if (!client) + return -EINVAL; + if (!ntb_client_ops_is_valid(&client->ops)) + return -EINVAL; + + memset(&client->drv, 0, sizeof(client->drv)); + client->drv.bus = &ntb_bus; + client->drv.name = mod_name; + client->drv.owner = mod; + + return driver_register(&client->drv); +} +EXPORT_SYMBOL(__ntb_register_client); + +void ntb_unregister_client(struct ntb_client *client) +{ + driver_unregister(&client->drv); +} +EXPORT_SYMBOL(ntb_unregister_client); + +int ntb_register_device(struct ntb_dev *ntb) +{ + if (!ntb) + return -EINVAL; + if (!ntb->pdev) + return -EINVAL; + if (!ntb->ops) + return -EINVAL; + if (!ntb_dev_ops_is_valid(ntb->ops)) + return -EINVAL; + + init_completion(&ntb->released); + + memset(&ntb->dev, 0, sizeof(ntb->dev)); + ntb->dev.bus = &ntb_bus; + ntb->dev.parent = &ntb->pdev->dev; + ntb->dev.release = ntb_dev_release; + dev_set_name(&ntb->dev, pci_name(ntb->pdev)); + + ntb->ctx = NULL; + ntb->ctx_ops = NULL; + spin_lock_init(&ntb->ctx_lock); + + return device_register(&ntb->dev); +} +EXPORT_SYMBOL(ntb_register_device); + +void ntb_unregister_device(struct ntb_dev *ntb) +{ + device_unregister(&ntb->dev); + wait_for_completion(&ntb->released); +} +EXPORT_SYMBOL(ntb_unregister_device); + +int ntb_set_ctx(struct ntb_dev *ntb, void *ctx, + const struct ntb_ctx_ops *ctx_ops) +{ + unsigned long irqflags; + + if (!ntb_ctx_ops_is_valid(ctx_ops)) + return -EINVAL; + if (ntb->ctx_ops) + return -EINVAL; + + spin_lock_irqsave(&ntb->ctx_lock, irqflags); + { + ntb->ctx = ctx; + ntb->ctx_ops = ctx_ops; + } + spin_unlock_irqrestore(&ntb->ctx_lock, irqflags); + + return 0; +} +EXPORT_SYMBOL(ntb_set_ctx); + +void ntb_clear_ctx(struct ntb_dev *ntb) +{ + unsigned long irqflags; + + spin_lock_irqsave(&ntb->ctx_lock, irqflags); + { + ntb->ctx_ops = NULL; + ntb->ctx = NULL; + } + spin_unlock_irqrestore(&ntb->ctx_lock, irqflags); +} +EXPORT_SYMBOL(ntb_clear_ctx); + +void ntb_link_event(struct ntb_dev *ntb) +{ + unsigned long irqflags; + + spin_lock_irqsave(&ntb->ctx_lock, irqflags); + { + if (ntb->ctx_ops && ntb->ctx_ops->link_event) + ntb->ctx_ops->link_event(ntb->ctx); + } + spin_unlock_irqrestore(&ntb->ctx_lock, irqflags); +} +EXPORT_SYMBOL(ntb_link_event); + +void ntb_db_event(struct ntb_dev *ntb, int vector) +{ + unsigned long irqflags; + + spin_lock_irqsave(&ntb->ctx_lock, irqflags); + { + if (ntb->ctx_ops && ntb->ctx_ops->db_event) + ntb->ctx_ops->db_event(ntb->ctx, vector); + } + spin_unlock_irqrestore(&ntb->ctx_lock, irqflags); +} +EXPORT_SYMBOL(ntb_db_event); + +static int ntb_probe(struct device *dev) +{ + struct ntb_dev *ntb; + struct ntb_client *client; + int rc; + + get_device(dev); + ntb = dev_ntb(dev); + client = drv_ntb_client(dev->driver); + + rc = client->ops.probe(client, ntb); + if (rc) + put_device(dev); + + return rc; +} + +static int ntb_remove(struct device *dev) +{ + struct ntb_dev *ntb; + struct ntb_client *client; + + if (dev->driver) { + ntb = dev_ntb(dev); + client = drv_ntb_client(dev->driver); + + client->ops.remove(client, ntb); + put_device(dev); + } + + return 0; +} + +static void ntb_dev_release(struct device *dev) +{ + struct ntb_dev *ntb = dev_ntb(dev); + + complete(&ntb->released); +} + +static struct bus_type ntb_bus = { + .name = "ntb", + .probe = ntb_probe, + .remove = ntb_remove, +}; + +static int __init ntb_driver_init(void) +{ + return bus_register(&ntb_bus); +} +module_init(ntb_driver_init); + +static void __exit ntb_driver_exit(void) +{ + bus_unregister(&ntb_bus); +} +module_exit(ntb_driver_exit); + diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/ntb_hw.c deleted file mode 100644 index 3f6738612f45..000000000000 --- a/drivers/ntb/ntb_hw.c +++ /dev/null @@ -1,1895 +0,0 @@ -/* - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2012 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * BSD LICENSE - * - * Copyright(c) 2012 Intel Corporation. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copy - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Intel PCIe NTB Linux driver - * - * Contact Information: - * Jon Mason <jon.mason@intel.com> - */ -#include <linux/debugfs.h> -#include <linux/delay.h> -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/module.h> -#include <linux/pci.h> -#include <linux/random.h> -#include <linux/slab.h> -#include "ntb_hw.h" -#include "ntb_regs.h" - -#define NTB_NAME "Intel(R) PCI-E Non-Transparent Bridge Driver" -#define NTB_VER "1.0" - -MODULE_DESCRIPTION(NTB_NAME); -MODULE_VERSION(NTB_VER); -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_AUTHOR("Intel Corporation"); - -enum { - NTB_CONN_TRANSPARENT = 0, - NTB_CONN_B2B, - NTB_CONN_RP, -}; - -enum { - NTB_DEV_USD = 0, - NTB_DEV_DSD, -}; - -enum { - SNB_HW = 0, - BWD_HW, -}; - -static struct dentry *debugfs_dir; - -#define BWD_LINK_RECOVERY_TIME 500 - -/* Translate memory window 0,1,2 to BAR 2,4,5 */ -#define MW_TO_BAR(mw) (mw == 0 ? 2 : (mw == 1 ? 4 : 5)) - -static const struct pci_device_id ntb_pci_tbl[] = { - {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_BWD)}, - {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_JSF)}, - {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_SNB)}, - {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_IVT)}, - {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_HSX)}, - {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_JSF)}, - {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_SNB)}, - {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_IVT)}, - {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_HSX)}, - {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_JSF)}, - {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_SNB)}, - {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_IVT)}, - {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_HSX)}, - {0} -}; -MODULE_DEVICE_TABLE(pci, ntb_pci_tbl); - -static int is_ntb_xeon(struct ntb_device *ndev) -{ - switch (ndev->pdev->device) { - case PCI_DEVICE_ID_INTEL_NTB_SS_JSF: - case PCI_DEVICE_ID_INTEL_NTB_SS_SNB: - case PCI_DEVICE_ID_INTEL_NTB_SS_IVT: - case PCI_DEVICE_ID_INTEL_NTB_SS_HSX: - case PCI_DEVICE_ID_INTEL_NTB_PS_JSF: - case PCI_DEVICE_ID_INTEL_NTB_PS_SNB: - case PCI_DEVICE_ID_INTEL_NTB_PS_IVT: - case PCI_DEVICE_ID_INTEL_NTB_PS_HSX: - case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF: - case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB: - case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT: - case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX: - return 1; - default: - return 0; - } - - return 0; -} - -static int is_ntb_atom(struct ntb_device *ndev) -{ - switch (ndev->pdev->device) { - case PCI_DEVICE_ID_INTEL_NTB_B2B_BWD: - return 1; - default: - return 0; - } - - return 0; -} - -static void ntb_set_errata_flags(struct ntb_device *ndev) -{ - switch (ndev->pdev->device) { - /* - * this workaround applies to all platform up to IvyBridge - * Haswell has splitbar support and use a different workaround - */ - case PCI_DEVICE_ID_INTEL_NTB_SS_JSF: - case PCI_DEVICE_ID_INTEL_NTB_SS_SNB: - case PCI_DEVICE_ID_INTEL_NTB_SS_IVT: - case PCI_DEVICE_ID_INTEL_NTB_SS_HSX: - case PCI_DEVICE_ID_INTEL_NTB_PS_JSF: - case PCI_DEVICE_ID_INTEL_NTB_PS_SNB: - case PCI_DEVICE_ID_INTEL_NTB_PS_IVT: - case PCI_DEVICE_ID_INTEL_NTB_PS_HSX: - case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF: - case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB: - case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT: - case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX: - ndev->wa_flags |= WA_SNB_ERR; - break; - } -} - -/** - * ntb_register_event_callback() - register event callback - * @ndev: pointer to ntb_device instance - * @func: callback function to register - * - * This function registers a callback for any HW driver events such as link - * up/down, power management notices and etc. - * - * RETURNS: An appropriate -ERRNO error value on error, or zero for success. - */ -int ntb_register_event_callback(struct ntb_device *ndev, - void (*func)(void *handle, - enum ntb_hw_event event)) -{ - if (ndev->event_cb) - return -EINVAL; - - ndev->event_cb = func; - - return 0; -} - -/** - * ntb_unregister_event_callback() - unregisters the event callback - * @ndev: pointer to ntb_device instance - * - * This function unregisters the existing callback from transport - */ -void ntb_unregister_event_callback(struct ntb_device *ndev) -{ - ndev->event_cb = NULL; -} - -static void ntb_irq_work(unsigned long data) -{ - struct ntb_db_cb *db_cb = (struct ntb_db_cb *)data; - int rc; - - rc = db_cb->callback(db_cb->data, db_cb->db_num); - if (rc) - tasklet_schedule(&db_cb->irq_work); - else { - struct ntb_device *ndev = db_cb->ndev; - unsigned long mask; - - mask = readw(ndev->reg_ofs.ldb_mask); - clear_bit(db_cb->db_num * ndev->bits_per_vector, &mask); - writew(mask, ndev->reg_ofs.ldb_mask); - } -} - -/** - * ntb_register_db_callback() - register a callback for doorbell interrupt - * @ndev: pointer to ntb_device instance - * @idx: doorbell index to register callback, zero based - * @data: pointer to be returned to caller with every callback - * @func: callback function to register - * - * This function registers a callback function for the doorbell interrupt - * on the primary side. The function will unmask the doorbell as well to - * allow interrupt. - * - * RETURNS: An appropriate -ERRNO error value on error, or zero for success. - */ -int ntb_register_db_callback(struct ntb_device *ndev, unsigned int idx, - void *data, int (*func)(void *data, int db_num)) -{ - unsigned long mask; - - if (idx >= ndev->max_cbs || ndev->db_cb[idx].callback) { - dev_warn(&ndev->pdev->dev, "Invalid Index.\n"); - return -EINVAL; - } - - ndev->db_cb[idx].callback = func; - ndev->db_cb[idx].data = data; - ndev->db_cb[idx].ndev = ndev; - - tasklet_init(&ndev->db_cb[idx].irq_work, ntb_irq_work, - (unsigned long) &ndev->db_cb[idx]); - - /* unmask interrupt */ - mask = readw(ndev->reg_ofs.ldb_mask); - clear_bit(idx * ndev->bits_per_vector, &mask); - writew(mask, ndev->reg_ofs.ldb_mask); - - return 0; -} - -/** - * ntb_unregister_db_callback() - unregister a callback for doorbell interrupt - * @ndev: pointer to ntb_device instance - * @idx: doorbell index to register callback, zero based - * - * This function unregisters a callback function for the doorbell interrupt - * on the primary side. The function will also mask the said doorbell. - */ -void ntb_unregister_db_callback(struct ntb_device *ndev, unsigned int idx) -{ - unsigned long mask; - - if (idx >= ndev->max_cbs || !ndev->db_cb[idx].callback) - return; - - mask = readw(ndev->reg_ofs.ldb_mask); - set_bit(idx * ndev->bits_per_vector, &mask); - writew(mask, ndev->reg_ofs.ldb_mask); - - tasklet_disable(&ndev->db_cb[idx].irq_work); - - ndev->db_cb[idx].callback = NULL; -} - -/** - * ntb_find_transport() - find the transport pointer - * @transport: pointer to pci device - * - * Given the pci device pointer, return the transport pointer passed in when - * the transport attached when it was inited. - * - * RETURNS: pointer to transport. - */ -void *ntb_find_transport(struct pci_dev *pdev) -{ - struct ntb_device *ndev = pci_get_drvdata(pdev); - return ndev->ntb_transport; -} - -/** - * ntb_register_transport() - Register NTB transport with NTB HW driver - * @transport: transport identifier - * - * This function allows a transport to reserve the hardware driver for - * NTB usage. - * - * RETURNS: pointer to ntb_device, NULL on error. - */ -struct ntb_device *ntb_register_transport(struct pci_dev *pdev, void *transport) -{ - struct ntb_device *ndev = pci_get_drvdata(pdev); - - if (ndev->ntb_transport) - return NULL; - - ndev->ntb_transport = transport; - return ndev; -} - -/** - * ntb_unregister_transport() - Unregister the transport with the NTB HW driver - * @ndev - ntb_device of the transport to be freed - * - * This function unregisters the transport from the HW driver and performs any - * necessary cleanups. - */ -void ntb_unregister_transport(struct ntb_device *ndev) -{ - int i; - - if (!ndev->ntb_transport) - return; - - for (i = 0; i < ndev->max_cbs; i++) - ntb_unregister_db_callback(ndev, i); - - ntb_unregister_event_callback(ndev); - ndev->ntb_transport = NULL; -} - -/** - * ntb_write_local_spad() - write to the secondary scratchpad register - * @ndev: pointer to ntb_device instance - * @idx: index to the scratchpad register, 0 based - * @val: the data value to put into the register - * - * This function allows writing of a 32bit value to the indexed scratchpad - * register. This writes over the data mirrored to the local scratchpad register - * by the remote system. - * - * RETURNS: An appropriate -ERRNO error value on error, or zero for success. - */ -int ntb_write_local_spad(struct ntb_device *ndev, unsigned int idx, u32 val) -{ - if (idx >= ndev->limits.max_spads) - return -EINVAL; - - dev_dbg(&ndev->pdev->dev, "Writing %x to local scratch pad index %d\n", - val, idx); - writel(val, ndev->reg_ofs.spad_read + idx * 4); - - return 0; -} - -/** - * ntb_read_local_spad() - read from the primary scratchpad register - * @ndev: pointer to ntb_device instance - * @idx: index to scratchpad register, 0 based - * @val: pointer to 32bit integer for storing the register value - * - * This function allows reading of the 32bit scratchpad register on - * the primary (internal) side. This allows the local system to read data - * written and mirrored to the scratchpad register by the remote system. - * - * RETURNS: An appropriate -ERRNO error value on error, or zero for success. - */ -int ntb_read_local_spad(struct ntb_device *ndev, unsigned int idx, u32 *val) -{ - if (idx >= ndev->limits.max_spads) - return -EINVAL; - - *val = readl(ndev->reg_ofs.spad_write + idx * 4); - dev_dbg(&ndev->pdev->dev, - "Reading %x from local scratch pad index %d\n", *val, idx); - - return 0; -} - -/** - * ntb_write_remote_spad() - write to the secondary scratchpad register - * @ndev: pointer to ntb_device instance - * @idx: index to the scratchpad register, 0 based - * @val: the data value to put into the register - * - * This function allows writing of a 32bit value to the indexed scratchpad - * register. The register resides on the secondary (external) side. This allows - * the local system to write data to be mirrored to the remote systems - * scratchpad register. - * - * RETURNS: An appropriate -ERRNO error value on error, or zero for success. - */ -int ntb_write_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 val) -{ - if (idx >= ndev->limits.max_spads) - return -EINVAL; - - dev_dbg(&ndev->pdev->dev, "Writing %x to remote scratch pad index %d\n", - val, idx); - writel(val, ndev->reg_ofs.spad_write + idx * 4); - - return 0; -} - -/** - * ntb_read_remote_spad() - read from the primary scratchpad register - * @ndev: pointer to ntb_device instance - * @idx: index to scratchpad register, 0 based - * @val: pointer to 32bit integer for storing the register value - * - * This function allows reading of the 32bit scratchpad register on - * the primary (internal) side. This alloows the local system to read the data - * it wrote to be mirrored on the remote system. - * - * RETURNS: An appropriate -ERRNO error value on error, or zero for success. - */ -int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val) -{ - if (idx >= ndev->limits.max_spads) - return -EINVAL; - - *val = readl(ndev->reg_ofs.spad_read + idx * 4); - dev_dbg(&ndev->pdev->dev, - "Reading %x from remote scratch pad index %d\n", *val, idx); - - return 0; -} - -/** - * ntb_get_mw_base() - get addr for the NTB memory window - * @ndev: pointer to ntb_device instance - * @mw: memory window number - * - * This function provides the base address of the memory window specified. - * - * RETURNS: address, or NULL on error. - */ -resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw) -{ - if (mw >= ntb_max_mw(ndev)) - return 0; - - return pci_resource_start(ndev->pdev, MW_TO_BAR(mw)); -} - -/** - * ntb_get_mw_vbase() - get virtual addr for the NTB memory window - * @ndev: pointer to ntb_device instance - * @mw: memory window number - * - * This function provides the base virtual address of the memory window - * specified. - * - * RETURNS: pointer to virtual address, or NULL on error. - */ -void __iomem *ntb_get_mw_vbase(struct ntb_device *ndev, unsigned int mw) -{ - if (mw >= ntb_max_mw(ndev)) - return NULL; - - return ndev->mw[mw].vbase; -} - -/** - * ntb_get_mw_size() - return size of NTB memory window - * @ndev: pointer to ntb_device instance - * @mw: memory window number - * - * This function provides the physical size of the memory window specified - * - * RETURNS: the size of the memory window or zero on error - */ -u64 ntb_get_mw_size(struct ntb_device *ndev, unsigned int mw) -{ - if (mw >= ntb_max_mw(ndev)) - return 0; - - return ndev->mw[mw].bar_sz; -} - -/** - * ntb_set_mw_addr - set the memory window address - * @ndev: pointer to ntb_device instance - * @mw: memory window number - * @addr: base address for data - * - * This function sets the base physical address of the memory window. This - * memory address is where data from the remote system will be transfered into - * or out of depending on how the transport is configured. - */ -void ntb_set_mw_addr(struct ntb_device *ndev, unsigned int mw, u64 addr) -{ - if (mw >= ntb_max_mw(ndev)) - return; - - dev_dbg(&ndev->pdev->dev, "Writing addr %Lx to BAR %d\n", addr, - MW_TO_BAR(mw)); - - ndev->mw[mw].phys_addr = addr; - - switch (MW_TO_BAR(mw)) { - case NTB_BAR_23: - writeq(addr, ndev->reg_ofs.bar2_xlat); - break; - case NTB_BAR_4: - if (ndev->split_bar) - writel(addr, ndev->reg_ofs.bar4_xlat); - else - writeq(addr, ndev->reg_ofs.bar4_xlat); - break; - case NTB_BAR_5: - writel(addr, ndev->reg_ofs.bar5_xlat); - break; - } -} - -/** - * ntb_ring_doorbell() - Set the doorbell on the secondary/external side - * @ndev: pointer to ntb_device instance - * @db: doorbell to ring - * - * This function allows triggering of a doorbell on the secondary/external - * side that will initiate an interrupt on the remote host - * - * RETURNS: An appropriate -ERRNO error value on error, or zero for success. - */ -void ntb_ring_doorbell(struct ntb_device *ndev, unsigned int db) -{ - dev_dbg(&ndev->pdev->dev, "%s: ringing doorbell %d\n", __func__, db); - - if (ndev->hw_type == BWD_HW) - writeq((u64) 1 << db, ndev->reg_ofs.rdb); - else - writew(((1 << ndev->bits_per_vector) - 1) << - (db * ndev->bits_per_vector), ndev->reg_ofs.rdb); -} - -static void bwd_recover_link(struct ntb_device *ndev) -{ - u32 status; - - /* Driver resets the NTB ModPhy lanes - magic! */ - writeb(0xe0, ndev->reg_base + BWD_MODPHY_PCSREG6); - writeb(0x40, ndev->reg_base + BWD_MODPHY_PCSREG4); - writeb(0x60, ndev->reg_base + BWD_MODPHY_PCSREG4); - writeb(0x60, ndev->reg_base + BWD_MODPHY_PCSREG6); - - /* Driver waits 100ms to allow the NTB ModPhy to settle */ - msleep(100); - - /* Clear AER Errors, write to clear */ - status = readl(ndev->reg_base + BWD_ERRCORSTS_OFFSET); - dev_dbg(&ndev->pdev->dev, "ERRCORSTS = %x\n", status); - status &= PCI_ERR_COR_REP_ROLL; - writel(status, ndev->reg_base + BWD_ERRCORSTS_OFFSET); - - /* Clear unexpected electrical idle event in LTSSM, write to clear */ - status = readl(ndev->reg_base + BWD_LTSSMERRSTS0_OFFSET); - dev_dbg(&ndev->pdev->dev, "LTSSMERRSTS0 = %x\n", status); - status |= BWD_LTSSMERRSTS0_UNEXPECTEDEI; - writel(status, ndev->reg_base + BWD_LTSSMERRSTS0_OFFSET); - - /* Clear DeSkew Buffer error, write to clear */ - status = readl(ndev->reg_base + BWD_DESKEWSTS_OFFSET); - dev_dbg(&ndev->pdev->dev, "DESKEWSTS = %x\n", status); - status |= BWD_DESKEWSTS_DBERR; - writel(status, ndev->reg_base + BWD_DESKEWSTS_OFFSET); - - status = readl(ndev->reg_base + BWD_IBSTERRRCRVSTS0_OFFSET); - dev_dbg(&ndev->pdev->dev, "IBSTERRRCRVSTS0 = %x\n", status); - status &= BWD_IBIST_ERR_OFLOW; - writel(status, ndev->reg_base + BWD_IBSTERRRCRVSTS0_OFFSET); - - /* Releases the NTB state machine to allow the link to retrain */ - status = readl(ndev->reg_base + BWD_LTSSMSTATEJMP_OFFSET); - dev_dbg(&ndev->pdev->dev, "LTSSMSTATEJMP = %x\n", status); - status &= ~BWD_LTSSMSTATEJMP_FORCEDETECT; - writel(status, ndev->reg_base + BWD_LTSSMSTATEJMP_OFFSET); -} - -static void ntb_link_event(struct ntb_device *ndev, int link_state) -{ - unsigned int event; - - if (ndev->link_status == link_state) - return; - - if (link_state == NTB_LINK_UP) { - u16 status; - - dev_info(&ndev->pdev->dev, "Link Up\n"); - ndev->link_status = NTB_LINK_UP; - event = NTB_EVENT_HW_LINK_UP; - - if (is_ntb_atom(ndev) || - ndev->conn_type == NTB_CONN_TRANSPARENT) - status = readw(ndev->reg_ofs.lnk_stat); - else { - int rc = pci_read_config_word(ndev->pdev, - SNB_LINK_STATUS_OFFSET, - &status); - if (rc) - return; - } - - ndev->link_width = (status & NTB_LINK_WIDTH_MASK) >> 4; - ndev->link_speed = (status & NTB_LINK_SPEED_MASK); - dev_info(&ndev->pdev->dev, "Link Width %d, Link Speed %d\n", - ndev->link_width, ndev->link_speed); - } else { - dev_info(&ndev->pdev->dev, "Link Down\n"); - ndev->link_status = NTB_LINK_DOWN; - event = NTB_EVENT_HW_LINK_DOWN; - /* Don't modify link width/speed, we need it in link recovery */ - } - - /* notify the upper layer if we have an event change */ - if (ndev->event_cb) - ndev->event_cb(ndev->ntb_transport, event); -} - -static int ntb_link_status(struct ntb_device *ndev) -{ - int link_state; - - if (is_ntb_atom(ndev)) { - u32 ntb_cntl; - - ntb_cntl = readl(ndev->reg_ofs.lnk_cntl); - if (ntb_cntl & BWD_CNTL_LINK_DOWN) - link_state = NTB_LINK_DOWN; - else - link_state = NTB_LINK_UP; - } else { - u16 status; - int rc; - - rc = pci_read_config_word(ndev->pdev, SNB_LINK_STATUS_OFFSET, - &status); - if (rc) - return rc; - - if (status & NTB_LINK_STATUS_ACTIVE) - link_state = NTB_LINK_UP; - else - link_state = NTB_LINK_DOWN; - } - - ntb_link_event(ndev, link_state); - - return 0; -} - -static void bwd_link_recovery(struct work_struct *work) -{ - struct ntb_device *ndev = container_of(work, struct ntb_device, - lr_timer.work); - u32 status32; - - bwd_recover_link(ndev); - /* There is a potential race between the 2 NTB devices recovering at the - * same time. If the times are the same, the link will not recover and - * the driver will be stuck in this loop forever. Add a random interval - * to the recovery time to prevent this race. - */ - msleep(BWD_LINK_RECOVERY_TIME + prandom_u32() % BWD_LINK_RECOVERY_TIME); - - status32 = readl(ndev->reg_base + BWD_LTSSMSTATEJMP_OFFSET); - if (status32 & BWD_LTSSMSTATEJMP_FORCEDETECT) - goto retry; - - status32 = readl(ndev->reg_base + BWD_IBSTERRRCRVSTS0_OFFSET); - if (status32 & BWD_IBIST_ERR_OFLOW) - goto retry; - - status32 = readl(ndev->reg_ofs.lnk_cntl); - if (!(status32 & BWD_CNTL_LINK_DOWN)) { - unsigned char speed, width; - u16 status16; - - status16 = readw(ndev->reg_ofs.lnk_stat); - width = (status16 & NTB_LINK_WIDTH_MASK) >> 4; - speed = (status16 & NTB_LINK_SPEED_MASK); - if (ndev->link_width != width || ndev->link_speed != speed) - goto retry; - } - - schedule_delayed_work(&ndev->hb_timer, NTB_HB_TIMEOUT); - return; - -retry: - schedule_delayed_work(&ndev->lr_timer, NTB_HB_TIMEOUT); -} - -/* BWD doesn't have link status interrupt, poll on that platform */ -static void bwd_link_poll(struct work_struct *work) -{ - struct ntb_device *ndev = container_of(work, struct ntb_device, - hb_timer.work); - unsigned long ts = jiffies; - - /* If we haven't gotten an interrupt in a while, check the BWD link - * status bit - */ - if (ts > ndev->last_ts + NTB_HB_TIMEOUT) { - int rc = ntb_link_status(ndev); - if (rc) - dev_err(&ndev->pdev->dev, - "Error determining link status\n"); - - /* Check to see if a link error is the cause of the link down */ - if (ndev->link_status == NTB_LINK_DOWN) { - u32 status32 = readl(ndev->reg_base + - BWD_LTSSMSTATEJMP_OFFSET); - if (status32 & BWD_LTSSMSTATEJMP_FORCEDETECT) { - schedule_delayed_work(&ndev->lr_timer, 0); - return; - } - } - } - - schedule_delayed_work(&ndev->hb_timer, NTB_HB_TIMEOUT); -} - -static int ntb_xeon_setup(struct ntb_device *ndev) -{ - switch (ndev->conn_type) { - case NTB_CONN_B2B: - ndev->reg_ofs.ldb = ndev->reg_base + SNB_PDOORBELL_OFFSET; - ndev->reg_ofs.ldb_mask = ndev->reg_base + SNB_PDBMSK_OFFSET; - ndev->reg_ofs.spad_read = ndev->reg_base + SNB_SPAD_OFFSET; - ndev->reg_ofs.bar2_xlat = ndev->reg_base + SNB_SBAR2XLAT_OFFSET; - ndev->reg_ofs.bar4_xlat = ndev->reg_base + SNB_SBAR4XLAT_OFFSET; - if (ndev->split_bar) - ndev->reg_ofs.bar5_xlat = - ndev->reg_base + SNB_SBAR5XLAT_OFFSET; - ndev->limits.max_spads = SNB_MAX_B2B_SPADS; - - /* There is a Xeon hardware errata related to writes to - * SDOORBELL or B2BDOORBELL in conjunction with inbound access - * to NTB MMIO Space, which may hang the system. To workaround - * this use the second memory window to access the interrupt and - * scratch pad registers on the remote system. - */ - if (ndev->wa_flags & WA_SNB_ERR) { - if (!ndev->mw[ndev->limits.max_mw - 1].bar_sz) - return -EINVAL; - - ndev->limits.max_db_bits = SNB_MAX_DB_BITS; - ndev->reg_ofs.spad_write = - ndev->mw[ndev->limits.max_mw - 1].vbase + - SNB_SPAD_OFFSET; - ndev->reg_ofs.rdb = - ndev->mw[ndev->limits.max_mw - 1].vbase + - SNB_PDOORBELL_OFFSET; - - /* Set the Limit register to 4k, the minimum size, to - * prevent an illegal access - */ - writeq(ndev->mw[1].bar_sz + 0x1000, ndev->reg_base + - SNB_PBAR4LMT_OFFSET); - /* HW errata on the Limit registers. They can only be - * written when the base register is 4GB aligned and - * < 32bit. This should already be the case based on - * the driver defaults, but write the Limit registers - * first just in case. - */ - - ndev->limits.max_mw = SNB_ERRATA_MAX_MW; - } else { - /* HW Errata on bit 14 of b2bdoorbell register. Writes - * will not be mirrored to the remote system. Shrink - * the number of bits by one, since bit 14 is the last - * bit. - */ - ndev->limits.max_db_bits = SNB_MAX_DB_BITS - 1; - ndev->reg_ofs.spad_write = ndev->reg_base + - SNB_B2B_SPAD_OFFSET; - ndev->reg_ofs.rdb = ndev->reg_base + - SNB_B2B_DOORBELL_OFFSET; - - /* Disable the Limit register, just incase it is set to - * something silly. A 64bit write should handle it - * regardless of whether it has a split BAR or not. - */ - writeq(0, ndev->reg_base + SNB_PBAR4LMT_OFFSET); - /* HW errata on the Limit registers. They can only be - * written when the base register is 4GB aligned and - * < 32bit. This should already be the case based on - * the driver defaults, but write the Limit registers - * first just in case. - */ - if (ndev->split_bar) - ndev->limits.max_mw = HSX_SPLITBAR_MAX_MW; - else - ndev->limits.max_mw = SNB_MAX_MW; - } - - /* The Xeon errata workaround requires setting SBAR Base - * addresses to known values, so that the PBAR XLAT can be - * pointed at SBAR0 of the remote system. - */ - if (ndev->dev_type == NTB_DEV_USD) { - writeq(SNB_MBAR23_DSD_ADDR, ndev->reg_base + - SNB_PBAR2XLAT_OFFSET); - if (ndev->wa_flags & WA_SNB_ERR) - writeq(SNB_MBAR01_DSD_ADDR, ndev->reg_base + - SNB_PBAR4XLAT_OFFSET); - else { - if (ndev->split_bar) { - writel(SNB_MBAR4_DSD_ADDR, - ndev->reg_base + - SNB_PBAR4XLAT_OFFSET); - writel(SNB_MBAR5_DSD_ADDR, - ndev->reg_base + - SNB_PBAR5XLAT_OFFSET); - } else - writeq(SNB_MBAR4_DSD_ADDR, - ndev->reg_base + - SNB_PBAR4XLAT_OFFSET); - - /* B2B_XLAT_OFFSET is a 64bit register, but can - * only take 32bit writes - */ - writel(SNB_MBAR01_DSD_ADDR & 0xffffffff, - ndev->reg_base + SNB_B2B_XLAT_OFFSETL); - writel(SNB_MBAR01_DSD_ADDR >> 32, - ndev->reg_base + SNB_B2B_XLAT_OFFSETU); - } - - writeq(SNB_MBAR01_USD_ADDR, ndev->reg_base + - SNB_SBAR0BASE_OFFSET); - writeq(SNB_MBAR23_USD_ADDR, ndev->reg_base + - SNB_SBAR2BASE_OFFSET); - if (ndev->split_bar) { - writel(SNB_MBAR4_USD_ADDR, ndev->reg_base + - SNB_SBAR4BASE_OFFSET); - writel(SNB_MBAR5_USD_ADDR, ndev->reg_base + - SNB_SBAR5BASE_OFFSET); - } else - writeq(SNB_MBAR4_USD_ADDR, ndev->reg_base + - SNB_SBAR4BASE_OFFSET); - } else { - writeq(SNB_MBAR23_USD_ADDR, ndev->reg_base + - SNB_PBAR2XLAT_OFFSET); - if (ndev->wa_flags & WA_SNB_ERR) - writeq(SNB_MBAR01_USD_ADDR, ndev->reg_base + - SNB_PBAR4XLAT_OFFSET); - else { - if (ndev->split_bar) { - writel(SNB_MBAR4_USD_ADDR, - ndev->reg_base + - SNB_PBAR4XLAT_OFFSET); - writel(SNB_MBAR5_USD_ADDR, - ndev->reg_base + - SNB_PBAR5XLAT_OFFSET); - } else - writeq(SNB_MBAR4_USD_ADDR, - ndev->reg_base + - SNB_PBAR4XLAT_OFFSET); - - /* - * B2B_XLAT_OFFSET is a 64bit register, but can - * only take 32bit writes - */ - writel(SNB_MBAR01_USD_ADDR & 0xffffffff, - ndev->reg_base + SNB_B2B_XLAT_OFFSETL); - writel(SNB_MBAR01_USD_ADDR >> 32, - ndev->reg_base + SNB_B2B_XLAT_OFFSETU); - } - writeq(SNB_MBAR01_DSD_ADDR, ndev->reg_base + - SNB_SBAR0BASE_OFFSET); - writeq(SNB_MBAR23_DSD_ADDR, ndev->reg_base + - SNB_SBAR2BASE_OFFSET); - if (ndev->split_bar) { - writel(SNB_MBAR4_DSD_ADDR, ndev->reg_base + - SNB_SBAR4BASE_OFFSET); - writel(SNB_MBAR5_DSD_ADDR, ndev->reg_base + - SNB_SBAR5BASE_OFFSET); - } else - writeq(SNB_MBAR4_DSD_ADDR, ndev->reg_base + - SNB_SBAR4BASE_OFFSET); - - } - break; - case NTB_CONN_RP: - if (ndev->wa_flags & WA_SNB_ERR) { - dev_err(&ndev->pdev->dev, - "NTB-RP disabled due to hardware errata.\n"); - return -EINVAL; - } - - /* Scratch pads need to have exclusive access from the primary - * or secondary side. Halve the num spads so that each side can - * have an equal amount. - */ - ndev->limits.max_spads = SNB_MAX_COMPAT_SPADS / 2; - ndev->limits.max_db_bits = SNB_MAX_DB_BITS; - /* Note: The SDOORBELL is the cause of the errata. You REALLY - * don't want to touch it. - */ - ndev->reg_ofs.rdb = ndev->reg_base + SNB_SDOORBELL_OFFSET; - ndev->reg_ofs.ldb = ndev->reg_base + SNB_PDOORBELL_OFFSET; - ndev->reg_ofs.ldb_mask = ndev->reg_base + SNB_PDBMSK_OFFSET; - /* Offset the start of the spads to correspond to whether it is - * primary or secondary - */ - ndev->reg_ofs.spad_write = ndev->reg_base + SNB_SPAD_OFFSET + - ndev->limits.max_spads * 4; - ndev->reg_ofs.spad_read = ndev->reg_base + SNB_SPAD_OFFSET; - ndev->reg_ofs.bar2_xlat = ndev->reg_base + SNB_SBAR2XLAT_OFFSET; - ndev->reg_ofs.bar4_xlat = ndev->reg_base + SNB_SBAR4XLAT_OFFSET; - if (ndev->split_bar) { - ndev->reg_ofs.bar5_xlat = - ndev->reg_base + SNB_SBAR5XLAT_OFFSET; - ndev->limits.max_mw = HSX_SPLITBAR_MAX_MW; - } else - ndev->limits.max_mw = SNB_MAX_MW; - break; - case NTB_CONN_TRANSPARENT: - if (ndev->wa_flags & WA_SNB_ERR) { - dev_err(&ndev->pdev->dev, - "NTB-TRANSPARENT disabled due to hardware errata.\n"); - return -EINVAL; - } - - /* Scratch pads need to have exclusive access from the primary - * or secondary side. Halve the num spads so that each side can - * have an equal amount. - */ - ndev->limits.max_spads = SNB_MAX_COMPAT_SPADS / 2; - ndev->limits.max_db_bits = SNB_MAX_DB_BITS; - ndev->reg_ofs.rdb = ndev->reg_base + SNB_PDOORBELL_OFFSET; - ndev->reg_ofs.ldb = ndev->reg_base + SNB_SDOORBELL_OFFSET; - ndev->reg_ofs.ldb_mask = ndev->reg_base + SNB_SDBMSK_OFFSET; - ndev->reg_ofs.spad_write = ndev->reg_base + SNB_SPAD_OFFSET; - /* Offset the start of the spads to correspond to whether it is - * primary or secondary - */ - ndev->reg_ofs.spad_read = ndev->reg_base + SNB_SPAD_OFFSET + - ndev->limits.max_spads * 4; - ndev->reg_ofs.bar2_xlat = ndev->reg_base + SNB_PBAR2XLAT_OFFSET; - ndev->reg_ofs.bar4_xlat = ndev->reg_base + SNB_PBAR4XLAT_OFFSET; - - if (ndev->split_bar) { - ndev->reg_ofs.bar5_xlat = - ndev->reg_base + SNB_PBAR5XLAT_OFFSET; - ndev->limits.max_mw = HSX_SPLITBAR_MAX_MW; - } else - ndev->limits.max_mw = SNB_MAX_MW; - break; - default: - /* - * we should never hit this. the detect function should've - * take cared of everything. - */ - return -EINVAL; - } - - ndev->reg_ofs.lnk_cntl = ndev->reg_base + SNB_NTBCNTL_OFFSET; - ndev->reg_ofs.lnk_stat = ndev->reg_base + SNB_SLINK_STATUS_OFFSET; - ndev->reg_ofs.spci_cmd = ndev->reg_base + SNB_PCICMD_OFFSET; - - ndev->limits.msix_cnt = SNB_MSIX_CNT; - ndev->bits_per_vector = SNB_DB_BITS_PER_VEC; - - return 0; -} - -static int ntb_bwd_setup(struct ntb_device *ndev) -{ - int rc; - u32 val; - - ndev->hw_type = BWD_HW; - - rc = pci_read_config_dword(ndev->pdev, NTB_PPD_OFFSET, &val); - if (rc) - return rc; - - switch ((val & BWD_PPD_CONN_TYPE) >> 8) { - case NTB_CONN_B2B: - ndev->conn_type = NTB_CONN_B2B; - break; - case NTB_CONN_RP: - default: - dev_err(&ndev->pdev->dev, "Unsupported NTB configuration\n"); - return -EINVAL; - } - - if (val & BWD_PPD_DEV_TYPE) - ndev->dev_type = NTB_DEV_DSD; - else - ndev->dev_type = NTB_DEV_USD; - - /* Initiate PCI-E link training */ - rc = pci_write_config_dword(ndev->pdev, NTB_PPD_OFFSET, - val | BWD_PPD_INIT_LINK); - if (rc) - return rc; - - ndev->reg_ofs.ldb = ndev->reg_base + BWD_PDOORBELL_OFFSET; - ndev->reg_ofs.ldb_mask = ndev->reg_base + BWD_PDBMSK_OFFSET; - ndev->reg_ofs.rdb = ndev->reg_base + BWD_B2B_DOORBELL_OFFSET; - ndev->reg_ofs.bar2_xlat = ndev->reg_base + BWD_SBAR2XLAT_OFFSET; - ndev->reg_ofs.bar4_xlat = ndev->reg_base + BWD_SBAR4XLAT_OFFSET; - ndev->reg_ofs.lnk_cntl = ndev->reg_base + BWD_NTBCNTL_OFFSET; - ndev->reg_ofs.lnk_stat = ndev->reg_base + BWD_LINK_STATUS_OFFSET; - ndev->reg_ofs.spad_read = ndev->reg_base + BWD_SPAD_OFFSET; - ndev->reg_ofs.spad_write = ndev->reg_base + BWD_B2B_SPAD_OFFSET; - ndev->reg_ofs.spci_cmd = ndev->reg_base + BWD_PCICMD_OFFSET; - ndev->limits.max_mw = BWD_MAX_MW; - ndev->limits.max_spads = BWD_MAX_SPADS; - ndev->limits.max_db_bits = BWD_MAX_DB_BITS; - ndev->limits.msix_cnt = BWD_MSIX_CNT; - ndev->bits_per_vector = BWD_DB_BITS_PER_VEC; - - /* Since bwd doesn't have a link interrupt, setup a poll timer */ - INIT_DELAYED_WORK(&ndev->hb_timer, bwd_link_poll); - INIT_DELAYED_WORK(&ndev->lr_timer, bwd_link_recovery); - schedule_delayed_work(&ndev->hb_timer, NTB_HB_TIMEOUT); - - return 0; -} - -static int ntb_device_setup(struct ntb_device *ndev) -{ - int rc; - - if (is_ntb_xeon(ndev)) - rc = ntb_xeon_setup(ndev); - else if (is_ntb_atom(ndev)) - rc = ntb_bwd_setup(ndev); - else - rc = -ENODEV; - - if (rc) - return rc; - - if (ndev->conn_type == NTB_CONN_B2B) - /* Enable Bus Master and Memory Space on the secondary side */ - writew(PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER, - ndev->reg_ofs.spci_cmd); - - return 0; -} - -static void ntb_device_free(struct ntb_device *ndev) -{ - if (is_ntb_atom(ndev)) { - cancel_delayed_work_sync(&ndev->hb_timer); - cancel_delayed_work_sync(&ndev->lr_timer); - } -} - -static irqreturn_t bwd_callback_msix_irq(int irq, void *data) -{ - struct ntb_db_cb *db_cb = data; - struct ntb_device *ndev = db_cb->ndev; - unsigned long mask; - - dev_dbg(&ndev->pdev->dev, "MSI-X irq %d received for DB %d\n", irq, - db_cb->db_num); - - mask = readw(ndev->reg_ofs.ldb_mask); - set_bit(db_cb->db_num * ndev->bits_per_vector, &mask); - writew(mask, ndev->reg_ofs.ldb_mask); - - tasklet_schedule(&db_cb->irq_work); - - /* No need to check for the specific HB irq, any interrupt means - * we're connected. - */ - ndev->last_ts = jiffies; - - writeq((u64) 1 << db_cb->db_num, ndev->reg_ofs.ldb); - - return IRQ_HANDLED; -} - -static irqreturn_t xeon_callback_msix_irq(int irq, void *data) -{ - struct ntb_db_cb *db_cb = data; - struct ntb_device *ndev = db_cb->ndev; - unsigned long mask; - - dev_dbg(&ndev->pdev->dev, "MSI-X irq %d received for DB %d\n", irq, - db_cb->db_num); - - mask = readw(ndev->reg_ofs.ldb_mask); - set_bit(db_cb->db_num * ndev->bits_per_vector, &mask); - writew(mask, ndev->reg_ofs.ldb_mask); - - tasklet_schedule(&db_cb->irq_work); - - /* On Sandybridge, there are 16 bits in the interrupt register - * but only 4 vectors. So, 5 bits are assigned to the first 3 - * vectors, with the 4th having a single bit for link - * interrupts. - */ - writew(((1 << ndev->bits_per_vector) - 1) << - (db_cb->db_num * ndev->bits_per_vector), ndev->reg_ofs.ldb); - - return IRQ_HANDLED; -} - -/* Since we do not have a HW doorbell in BWD, this is only used in JF/JT */ -static irqreturn_t xeon_event_msix_irq(int irq, void *dev) -{ - struct ntb_device *ndev = dev; - int rc; - - dev_dbg(&ndev->pdev->dev, "MSI-X irq %d received for Events\n", irq); - - rc = ntb_link_status(ndev); - if (rc) - dev_err(&ndev->pdev->dev, "Error determining link status\n"); - - /* bit 15 is always the link bit */ - writew(1 << SNB_LINK_DB, ndev->reg_ofs.ldb); - - return IRQ_HANDLED; -} - -static irqreturn_t ntb_interrupt(int irq, void *dev) -{ - struct ntb_device *ndev = dev; - unsigned int i = 0; - - if (is_ntb_atom(ndev)) { - u64 ldb = readq(ndev->reg_ofs.ldb); - - dev_dbg(&ndev->pdev->dev, "irq %d - ldb = %Lx\n", irq, ldb); - - while (ldb) { - i = __ffs(ldb); - ldb &= ldb - 1; - bwd_callback_msix_irq(irq, &ndev->db_cb[i]); - } - } else { - u16 ldb = readw(ndev->reg_ofs.ldb); - - dev_dbg(&ndev->pdev->dev, "irq %d - ldb = %x\n", irq, ldb); - - if (ldb & SNB_DB_HW_LINK) { - xeon_event_msix_irq(irq, dev); - ldb &= ~SNB_DB_HW_LINK; - } - - while (ldb) { - i = __ffs(ldb); - ldb &= ldb - 1; - xeon_callback_msix_irq(irq, &ndev->db_cb[i]); - } - } - - return IRQ_HANDLED; -} - -static int ntb_setup_snb_msix(struct ntb_device *ndev, int msix_entries) -{ - struct pci_dev *pdev = ndev->pdev; - struct msix_entry *msix; - int rc, i; - - if (msix_entries < ndev->limits.msix_cnt) - return -ENOSPC; - - rc = pci_enable_msix_exact(pdev, ndev->msix_entries, msix_entries); - if (rc < 0) - return rc; - - for (i = 0; i < msix_entries; i++) { - msix = &ndev->msix_entries[i]; - WARN_ON(!msix->vector); - - if (i == msix_entries - 1) { - rc = request_irq(msix->vector, - xeon_event_msix_irq, 0, - "ntb-event-msix", ndev); - if (rc) - goto err; - } else { - rc = request_irq(msix->vector, - xeon_callback_msix_irq, 0, - "ntb-callback-msix", - &ndev->db_cb[i]); - if (rc) - goto err; - } - } - - ndev->num_msix = msix_entries; - ndev->max_cbs = msix_entries - 1; - - return 0; - -err: - while (--i >= 0) { - /* Code never reaches here for entry nr 'ndev->num_msix - 1' */ - msix = &ndev->msix_entries[i]; - free_irq(msix->vector, &ndev->db_cb[i]); - } - - pci_disable_msix(pdev); - ndev->num_msix = 0; - - return rc; -} - -static int ntb_setup_bwd_msix(struct ntb_device *ndev, int msix_entries) -{ - struct pci_dev *pdev = ndev->pdev; - struct msix_entry *msix; - int rc, i; - - msix_entries = pci_enable_msix_range(pdev, ndev->msix_entries, - 1, msix_entries); - if (msix_entries < 0) - return msix_entries; - - for (i = 0; i < msix_entries; i++) { - msix = &ndev->msix_entries[i]; - WARN_ON(!msix->vector); - - rc = request_irq(msix->vector, bwd_callback_msix_irq, 0, - "ntb-callback-msix", &ndev->db_cb[i]); - if (rc) - goto err; - } - - ndev->num_msix = msix_entries; - ndev->max_cbs = msix_entries; - - return 0; - -err: - while (--i >= 0) - free_irq(msix->vector, &ndev->db_cb[i]); - - pci_disable_msix(pdev); - ndev->num_msix = 0; - - return rc; -} - -static int ntb_setup_msix(struct ntb_device *ndev) -{ - struct pci_dev *pdev = ndev->pdev; - int msix_entries; - int rc, i; - - msix_entries = pci_msix_vec_count(pdev); - if (msix_entries < 0) { - rc = msix_entries; - goto err; - } else if (msix_entries > ndev->limits.msix_cnt) { - rc = -EINVAL; - goto err; - } - - ndev->msix_entries = kmalloc(sizeof(struct msix_entry) * msix_entries, - GFP_KERNEL); - if (!ndev->msix_entries) { - rc = -ENOMEM; - goto err; - } - - for (i = 0; i < msix_entries; i++) - ndev->msix_entries[i].entry = i; - - if (is_ntb_atom(ndev)) - rc = ntb_setup_bwd_msix(ndev, msix_entries); - else - rc = ntb_setup_snb_msix(ndev, msix_entries); - if (rc) - goto err1; - - return 0; - -err1: - kfree(ndev->msix_entries); -err: - dev_err(&pdev->dev, "Error allocating MSI-X interrupt\n"); - return rc; -} - -static int ntb_setup_msi(struct ntb_device *ndev) -{ - struct pci_dev *pdev = ndev->pdev; - int rc; - - rc = pci_enable_msi(pdev); - if (rc) - return rc; - - rc = request_irq(pdev->irq, ntb_interrupt, 0, "ntb-msi", ndev); - if (rc) { - pci_disable_msi(pdev); - dev_err(&pdev->dev, "Error allocating MSI interrupt\n"); - return rc; - } - - return 0; -} - -static int ntb_setup_intx(struct ntb_device *ndev) -{ - struct pci_dev *pdev = ndev->pdev; - int rc; - - /* Verify intx is enabled */ - pci_intx(pdev, 1); - - rc = request_irq(pdev->irq, ntb_interrupt, IRQF_SHARED, "ntb-intx", - ndev); - if (rc) - return rc; - - return 0; -} - -static int ntb_setup_interrupts(struct ntb_device *ndev) -{ - int rc; - - /* On BWD, disable all interrupts. On SNB, disable all but Link - * Interrupt. The rest will be unmasked as callbacks are registered. - */ - if (is_ntb_atom(ndev)) - writeq(~0, ndev->reg_ofs.ldb_mask); - else { - u16 var = 1 << SNB_LINK_DB; - writew(~var, ndev->reg_ofs.ldb_mask); - } - - rc = ntb_setup_msix(ndev); - if (!rc) - goto done; - - ndev->bits_per_vector = 1; - ndev->max_cbs = ndev->limits.max_db_bits; - - rc = ntb_setup_msi(ndev); - if (!rc) - goto done; - - rc = ntb_setup_intx(ndev); - if (rc) { - dev_err(&ndev->pdev->dev, "no usable interrupts\n"); - return rc; - } - -done: - return 0; -} - -static void ntb_free_interrupts(struct ntb_device *ndev) -{ - struct pci_dev *pdev = ndev->pdev; - - /* mask interrupts */ - if (is_ntb_atom(ndev)) - writeq(~0, ndev->reg_ofs.ldb_mask); - else - writew(~0, ndev->reg_ofs.ldb_mask); - - if (ndev->num_msix) { - struct msix_entry *msix; - u32 i; - - for (i = 0; i < ndev->num_msix; i++) { - msix = &ndev->msix_entries[i]; - if (is_ntb_xeon(ndev) && i == ndev->num_msix - 1) - free_irq(msix->vector, ndev); - else - free_irq(msix->vector, &ndev->db_cb[i]); - } - pci_disable_msix(pdev); - kfree(ndev->msix_entries); - } else { - free_irq(pdev->irq, ndev); - - if (pci_dev_msi_enabled(pdev)) - pci_disable_msi(pdev); - } -} - -static int ntb_create_callbacks(struct ntb_device *ndev) -{ - int i; - - /* Chicken-egg issue. We won't know how many callbacks are necessary - * until we see how many MSI-X vectors we get, but these pointers need - * to be passed into the MSI-X register function. So, we allocate the - * max, knowing that they might not all be used, to work around this. - */ - ndev->db_cb = kcalloc(ndev->limits.max_db_bits, - sizeof(struct ntb_db_cb), - GFP_KERNEL); - if (!ndev->db_cb) - return -ENOMEM; - - for (i = 0; i < ndev->limits.max_db_bits; i++) { - ndev->db_cb[i].db_num = i; - ndev->db_cb[i].ndev = ndev; - } - - return 0; -} - -static void ntb_free_callbacks(struct ntb_device *ndev) -{ - int i; - - for (i = 0; i < ndev->limits.max_db_bits; i++) - ntb_unregister_db_callback(ndev, i); - - kfree(ndev->db_cb); -} - -static ssize_t ntb_debugfs_read(struct file *filp, char __user *ubuf, - size_t count, loff_t *offp) -{ - struct ntb_device *ndev; - char *buf; - ssize_t ret, offset, out_count; - - out_count = 500; - - buf = kmalloc(out_count, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - ndev = filp->private_data; - offset = 0; - offset += snprintf(buf + offset, out_count - offset, - "NTB Device Information:\n"); - offset += snprintf(buf + offset, out_count - offset, - "Connection Type - \t\t%s\n", - ndev->conn_type == NTB_CONN_TRANSPARENT ? - "Transparent" : (ndev->conn_type == NTB_CONN_B2B) ? - "Back to back" : "Root Port"); - offset += snprintf(buf + offset, out_count - offset, - "Device Type - \t\t\t%s\n", - ndev->dev_type == NTB_DEV_USD ? - "DSD/USP" : "USD/DSP"); - offset += snprintf(buf + offset, out_count - offset, - "Max Number of Callbacks - \t%u\n", - ntb_max_cbs(ndev)); - offset += snprintf(buf + offset, out_count - offset, - "Link Status - \t\t\t%s\n", - ntb_hw_link_status(ndev) ? "Up" : "Down"); - if (ntb_hw_link_status(ndev)) { - offset += snprintf(buf + offset, out_count - offset, - "Link Speed - \t\t\tPCI-E Gen %u\n", - ndev->link_speed); - offset += snprintf(buf + offset, out_count - offset, - "Link Width - \t\t\tx%u\n", - ndev->link_width); - } - - if (is_ntb_xeon(ndev)) { - u32 status32; - u16 status16; - int rc; - - offset += snprintf(buf + offset, out_count - offset, - "\nNTB Device Statistics:\n"); - offset += snprintf(buf + offset, out_count - offset, - "Upstream Memory Miss - \t%u\n", - readw(ndev->reg_base + - SNB_USMEMMISS_OFFSET)); - - offset += snprintf(buf + offset, out_count - offset, - "\nNTB Hardware Errors:\n"); - - rc = pci_read_config_word(ndev->pdev, SNB_DEVSTS_OFFSET, - &status16); - if (!rc) - offset += snprintf(buf + offset, out_count - offset, - "DEVSTS - \t%#06x\n", status16); - - rc = pci_read_config_word(ndev->pdev, SNB_LINK_STATUS_OFFSET, - &status16); - if (!rc) - offset += snprintf(buf + offset, out_count - offset, - "LNKSTS - \t%#06x\n", status16); - - rc = pci_read_config_dword(ndev->pdev, SNB_UNCERRSTS_OFFSET, - &status32); - if (!rc) - offset += snprintf(buf + offset, out_count - offset, - "UNCERRSTS - \t%#010x\n", status32); - - rc = pci_read_config_dword(ndev->pdev, SNB_CORERRSTS_OFFSET, - &status32); - if (!rc) - offset += snprintf(buf + offset, out_count - offset, - "CORERRSTS - \t%#010x\n", status32); - } - - if (offset > out_count) - offset = out_count; - - ret = simple_read_from_buffer(ubuf, count, offp, buf, offset); - kfree(buf); - return ret; -} - -static const struct file_operations ntb_debugfs_info = { - .owner = THIS_MODULE, - .open = simple_open, - .read = ntb_debugfs_read, -}; - -static void ntb_setup_debugfs(struct ntb_device *ndev) -{ - if (!debugfs_initialized()) - return; - - if (!debugfs_dir) - debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); - - ndev->debugfs_dir = debugfs_create_dir(pci_name(ndev->pdev), - debugfs_dir); - if (ndev->debugfs_dir) - ndev->debugfs_info = debugfs_create_file("info", S_IRUSR, - ndev->debugfs_dir, - ndev, - &ntb_debugfs_info); -} - -static void ntb_free_debugfs(struct ntb_device *ndev) -{ - debugfs_remove_recursive(ndev->debugfs_dir); - - if (debugfs_dir && simple_empty(debugfs_dir)) { - debugfs_remove_recursive(debugfs_dir); - debugfs_dir = NULL; - } -} - -static void ntb_hw_link_up(struct ntb_device *ndev) -{ - if (ndev->conn_type == NTB_CONN_TRANSPARENT) - ntb_link_event(ndev, NTB_LINK_UP); - else { - u32 ntb_cntl; - - /* Let's bring the NTB link up */ - ntb_cntl = readl(ndev->reg_ofs.lnk_cntl); - ntb_cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK); - ntb_cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP; - ntb_cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP; - if (ndev->split_bar) - ntb_cntl |= NTB_CNTL_P2S_BAR5_SNOOP | - NTB_CNTL_S2P_BAR5_SNOOP; - - writel(ntb_cntl, ndev->reg_ofs.lnk_cntl); - } -} - -static void ntb_hw_link_down(struct ntb_device *ndev) -{ - u32 ntb_cntl; - - if (ndev->conn_type == NTB_CONN_TRANSPARENT) { - ntb_link_event(ndev, NTB_LINK_DOWN); - return; - } - - /* Bring NTB link down */ - ntb_cntl = readl(ndev->reg_ofs.lnk_cntl); - ntb_cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP); - ntb_cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP); - if (ndev->split_bar) - ntb_cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | - NTB_CNTL_S2P_BAR5_SNOOP); - ntb_cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK; - writel(ntb_cntl, ndev->reg_ofs.lnk_cntl); -} - -static void ntb_max_mw_detect(struct ntb_device *ndev) -{ - if (ndev->split_bar) - ndev->limits.max_mw = HSX_SPLITBAR_MAX_MW; - else - ndev->limits.max_mw = SNB_MAX_MW; -} - -static int ntb_xeon_detect(struct ntb_device *ndev) -{ - int rc, bars_mask; - u32 bars; - u8 ppd; - - ndev->hw_type = SNB_HW; - - rc = pci_read_config_byte(ndev->pdev, NTB_PPD_OFFSET, &ppd); - if (rc) - return -EIO; - - if (ppd & SNB_PPD_DEV_TYPE) - ndev->dev_type = NTB_DEV_USD; - else - ndev->dev_type = NTB_DEV_DSD; - - ndev->split_bar = (ppd & SNB_PPD_SPLIT_BAR) ? 1 : 0; - - switch (ppd & SNB_PPD_CONN_TYPE) { - case NTB_CONN_B2B: - dev_info(&ndev->pdev->dev, "Conn Type = B2B\n"); - ndev->conn_type = NTB_CONN_B2B; - break; - case NTB_CONN_RP: - dev_info(&ndev->pdev->dev, "Conn Type = RP\n"); - ndev->conn_type = NTB_CONN_RP; - break; - case NTB_CONN_TRANSPARENT: - dev_info(&ndev->pdev->dev, "Conn Type = TRANSPARENT\n"); - ndev->conn_type = NTB_CONN_TRANSPARENT; - /* - * This mode is default to USD/DSP. HW does not report - * properly in transparent mode as it has no knowledge of - * NTB. We will just force correct here. - */ - ndev->dev_type = NTB_DEV_USD; - - /* - * This is a way for transparent BAR to figure out if we - * are doing split BAR or not. There is no way for the hw - * on the transparent side to know and set the PPD. - */ - bars_mask = pci_select_bars(ndev->pdev, IORESOURCE_MEM); - bars = hweight32(bars_mask); - if (bars == (HSX_SPLITBAR_MAX_MW + 1)) - ndev->split_bar = 1; - - break; - default: - dev_err(&ndev->pdev->dev, "Unknown PPD %x\n", ppd); - return -ENODEV; - } - - ntb_max_mw_detect(ndev); - - return 0; -} - -static int ntb_atom_detect(struct ntb_device *ndev) -{ - int rc; - u32 ppd; - - ndev->hw_type = BWD_HW; - ndev->limits.max_mw = BWD_MAX_MW; - - rc = pci_read_config_dword(ndev->pdev, NTB_PPD_OFFSET, &ppd); - if (rc) - return rc; - - switch ((ppd & BWD_PPD_CONN_TYPE) >> 8) { - case NTB_CONN_B2B: - dev_info(&ndev->pdev->dev, "Conn Type = B2B\n"); - ndev->conn_type = NTB_CONN_B2B; - break; - case NTB_CONN_RP: - default: - dev_err(&ndev->pdev->dev, "Unsupported NTB configuration\n"); - return -EINVAL; - } - - if (ppd & BWD_PPD_DEV_TYPE) - ndev->dev_type = NTB_DEV_DSD; - else - ndev->dev_type = NTB_DEV_USD; - - return 0; -} - -static int ntb_device_detect(struct ntb_device *ndev) -{ - int rc; - - if (is_ntb_xeon(ndev)) - rc = ntb_xeon_detect(ndev); - else if (is_ntb_atom(ndev)) - rc = ntb_atom_detect(ndev); - else - rc = -ENODEV; - - dev_info(&ndev->pdev->dev, "Device Type = %s\n", - ndev->dev_type == NTB_DEV_USD ? "USD/DSP" : "DSD/USP"); - - return 0; -} - -static int ntb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) -{ - struct ntb_device *ndev; - int rc, i; - - ndev = kzalloc(sizeof(struct ntb_device), GFP_KERNEL); - if (!ndev) - return -ENOMEM; - - ndev->pdev = pdev; - - ntb_set_errata_flags(ndev); - - ndev->link_status = NTB_LINK_DOWN; - pci_set_drvdata(pdev, ndev); - ntb_setup_debugfs(ndev); - - rc = pci_enable_device(pdev); - if (rc) - goto err; - - pci_set_master(ndev->pdev); - - rc = ntb_device_detect(ndev); - if (rc) - goto err; - - ndev->mw = kcalloc(ndev->limits.max_mw, sizeof(struct ntb_mw), - GFP_KERNEL); - if (!ndev->mw) { - rc = -ENOMEM; - goto err1; - } - - if (ndev->split_bar) - rc = pci_request_selected_regions(pdev, NTB_SPLITBAR_MASK, - KBUILD_MODNAME); - else - rc = pci_request_selected_regions(pdev, NTB_BAR_MASK, - KBUILD_MODNAME); - - if (rc) - goto err2; - - ndev->reg_base = pci_ioremap_bar(pdev, NTB_BAR_MMIO); - if (!ndev->reg_base) { - dev_warn(&pdev->dev, "Cannot remap BAR 0\n"); - rc = -EIO; - goto err3; - } - - for (i = 0; i < ndev->limits.max_mw; i++) { - ndev->mw[i].bar_sz = pci_resource_len(pdev, MW_TO_BAR(i)); - - /* - * with the errata we need to steal last of the memory - * windows for workarounds and they point to MMIO registers. - */ - if ((ndev->wa_flags & WA_SNB_ERR) && - (i == (ndev->limits.max_mw - 1))) { - ndev->mw[i].vbase = - ioremap_nocache(pci_resource_start(pdev, - MW_TO_BAR(i)), - ndev->mw[i].bar_sz); - } else { - ndev->mw[i].vbase = - ioremap_wc(pci_resource_start(pdev, - MW_TO_BAR(i)), - ndev->mw[i].bar_sz); - } - - dev_info(&pdev->dev, "MW %d size %llu\n", i, - (unsigned long long) ndev->mw[i].bar_sz); - if (!ndev->mw[i].vbase) { - dev_warn(&pdev->dev, "Cannot remap BAR %d\n", - MW_TO_BAR(i)); - rc = -EIO; - goto err4; - } - } - - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); - if (rc) { - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); - if (rc) - goto err4; - - dev_warn(&pdev->dev, "Cannot DMA highmem\n"); - } - - rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (rc) { - rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - if (rc) - goto err4; - - dev_warn(&pdev->dev, "Cannot DMA consistent highmem\n"); - } - - rc = ntb_device_setup(ndev); - if (rc) - goto err4; - - rc = ntb_create_callbacks(ndev); - if (rc) - goto err5; - - rc = ntb_setup_interrupts(ndev); - if (rc) - goto err6; - - /* The scratchpad registers keep the values between rmmod/insmod, - * blast them now - */ - for (i = 0; i < ndev->limits.max_spads; i++) { - ntb_write_local_spad(ndev, i, 0); - ntb_write_remote_spad(ndev, i, 0); - } - - rc = ntb_transport_init(pdev); - if (rc) - goto err7; - - ntb_hw_link_up(ndev); - - return 0; - -err7: - ntb_free_interrupts(ndev); -err6: - ntb_free_callbacks(ndev); -err5: - ntb_device_free(ndev); -err4: - for (i--; i >= 0; i--) - iounmap(ndev->mw[i].vbase); - iounmap(ndev->reg_base); -err3: - if (ndev->split_bar) - pci_release_selected_regions(pdev, NTB_SPLITBAR_MASK); - else - pci_release_selected_regions(pdev, NTB_BAR_MASK); -err2: - kfree(ndev->mw); -err1: - pci_disable_device(pdev); -err: - ntb_free_debugfs(ndev); - kfree(ndev); - - dev_err(&pdev->dev, "Error loading %s module\n", KBUILD_MODNAME); - return rc; -} - -static void ntb_pci_remove(struct pci_dev *pdev) -{ - struct ntb_device *ndev = pci_get_drvdata(pdev); - int i; - - ntb_hw_link_down(ndev); - - ntb_transport_free(ndev->ntb_transport); - - ntb_free_interrupts(ndev); - ntb_free_callbacks(ndev); - ntb_device_free(ndev); - - /* need to reset max_mw limits so we can unmap properly */ - if (ndev->hw_type == SNB_HW) - ntb_max_mw_detect(ndev); - - for (i = 0; i < ndev->limits.max_mw; i++) - iounmap(ndev->mw[i].vbase); - - kfree(ndev->mw); - iounmap(ndev->reg_base); - if (ndev->split_bar) - pci_release_selected_regions(pdev, NTB_SPLITBAR_MASK); - else - pci_release_selected_regions(pdev, NTB_BAR_MASK); - pci_disable_device(pdev); - ntb_free_debugfs(ndev); - kfree(ndev); -} - -static struct pci_driver ntb_pci_driver = { - .name = KBUILD_MODNAME, - .id_table = ntb_pci_tbl, - .probe = ntb_pci_probe, - .remove = ntb_pci_remove, -}; - -module_pci_driver(ntb_pci_driver); diff --git a/drivers/ntb/ntb_hw.h b/drivers/ntb/ntb_hw.h deleted file mode 100644 index 96de5fc95f90..000000000000 --- a/drivers/ntb/ntb_hw.h +++ /dev/null @@ -1,256 +0,0 @@ -/* - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2012 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * BSD LICENSE - * - * Copyright(c) 2012 Intel Corporation. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copy - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Intel PCIe NTB Linux driver - * - * Contact Information: - * Jon Mason <jon.mason@intel.com> - */ -#include <linux/ntb.h> - -#define PCI_DEVICE_ID_INTEL_NTB_B2B_JSF 0x3725 -#define PCI_DEVICE_ID_INTEL_NTB_PS_JSF 0x3726 -#define PCI_DEVICE_ID_INTEL_NTB_SS_JSF 0x3727 -#define PCI_DEVICE_ID_INTEL_NTB_B2B_SNB 0x3C0D -#define PCI_DEVICE_ID_INTEL_NTB_PS_SNB 0x3C0E -#define PCI_DEVICE_ID_INTEL_NTB_SS_SNB 0x3C0F -#define PCI_DEVICE_ID_INTEL_NTB_B2B_IVT 0x0E0D -#define PCI_DEVICE_ID_INTEL_NTB_PS_IVT 0x0E0E -#define PCI_DEVICE_ID_INTEL_NTB_SS_IVT 0x0E0F -#define PCI_DEVICE_ID_INTEL_NTB_B2B_HSX 0x2F0D -#define PCI_DEVICE_ID_INTEL_NTB_PS_HSX 0x2F0E -#define PCI_DEVICE_ID_INTEL_NTB_SS_HSX 0x2F0F -#define PCI_DEVICE_ID_INTEL_NTB_B2B_BWD 0x0C4E - -#ifndef readq -static inline u64 readq(void __iomem *addr) -{ - return readl(addr) | (((u64) readl(addr + 4)) << 32LL); -} -#endif - -#ifndef writeq -static inline void writeq(u64 val, void __iomem *addr) -{ - writel(val & 0xffffffff, addr); - writel(val >> 32, addr + 4); -} -#endif - -#define NTB_BAR_MMIO 0 -#define NTB_BAR_23 2 -#define NTB_BAR_4 4 -#define NTB_BAR_5 5 - -#define NTB_BAR_MASK ((1 << NTB_BAR_MMIO) | (1 << NTB_BAR_23) |\ - (1 << NTB_BAR_4)) -#define NTB_SPLITBAR_MASK ((1 << NTB_BAR_MMIO) | (1 << NTB_BAR_23) |\ - (1 << NTB_BAR_4) | (1 << NTB_BAR_5)) - -#define NTB_HB_TIMEOUT msecs_to_jiffies(1000) - -enum ntb_hw_event { - NTB_EVENT_SW_EVENT0 = 0, - NTB_EVENT_SW_EVENT1, - NTB_EVENT_SW_EVENT2, - NTB_EVENT_HW_ERROR, - NTB_EVENT_HW_LINK_UP, - NTB_EVENT_HW_LINK_DOWN, -}; - -struct ntb_mw { - dma_addr_t phys_addr; - void __iomem *vbase; - resource_size_t bar_sz; -}; - -struct ntb_db_cb { - int (*callback)(void *data, int db_num); - unsigned int db_num; - void *data; - struct ntb_device *ndev; - struct tasklet_struct irq_work; -}; - -#define WA_SNB_ERR 0x00000001 - -struct ntb_device { - struct pci_dev *pdev; - struct msix_entry *msix_entries; - void __iomem *reg_base; - struct ntb_mw *mw; - struct { - unsigned char max_mw; - unsigned char max_spads; - unsigned char max_db_bits; - unsigned char msix_cnt; - } limits; - struct { - void __iomem *ldb; - void __iomem *ldb_mask; - void __iomem *rdb; - void __iomem *bar2_xlat; - void __iomem *bar4_xlat; - void __iomem *bar5_xlat; - void __iomem *spad_write; - void __iomem *spad_read; - void __iomem *lnk_cntl; - void __iomem *lnk_stat; - void __iomem *spci_cmd; - } reg_ofs; - struct ntb_transport *ntb_transport; - void (*event_cb)(void *handle, enum ntb_hw_event event); - - struct ntb_db_cb *db_cb; - unsigned char hw_type; - unsigned char conn_type; - unsigned char dev_type; - unsigned char num_msix; - unsigned char bits_per_vector; - unsigned char max_cbs; - unsigned char link_width; - unsigned char link_speed; - unsigned char link_status; - unsigned char split_bar; - - struct delayed_work hb_timer; - unsigned long last_ts; - - struct delayed_work lr_timer; - - struct dentry *debugfs_dir; - struct dentry *debugfs_info; - - unsigned int wa_flags; -}; - -/** - * ntb_max_cbs() - return the max callbacks - * @ndev: pointer to ntb_device instance - * - * Given the ntb pointer, return the maximum number of callbacks - * - * RETURNS: the maximum number of callbacks - */ -static inline unsigned char ntb_max_cbs(struct ntb_device *ndev) -{ - return ndev->max_cbs; -} - -/** - * ntb_max_mw() - return the max number of memory windows - * @ndev: pointer to ntb_device instance - * - * Given the ntb pointer, return the maximum number of memory windows - * - * RETURNS: the maximum number of memory windows - */ -static inline unsigned char ntb_max_mw(struct ntb_device *ndev) -{ - return ndev->limits.max_mw; -} - -/** - * ntb_hw_link_status() - return the hardware link status - * @ndev: pointer to ntb_device instance - * - * Returns true if the hardware is connected to the remote system - * - * RETURNS: true or false based on the hardware link state - */ -static inline bool ntb_hw_link_status(struct ntb_device *ndev) -{ - return ndev->link_status == NTB_LINK_UP; -} - -/** - * ntb_query_pdev() - return the pci_dev pointer - * @ndev: pointer to ntb_device instance - * - * Given the ntb pointer, return the pci_dev pointer for the NTB hardware device - * - * RETURNS: a pointer to the ntb pci_dev - */ -static inline struct pci_dev *ntb_query_pdev(struct ntb_device *ndev) -{ - return ndev->pdev; -} - -/** - * ntb_query_debugfs() - return the debugfs pointer - * @ndev: pointer to ntb_device instance - * - * Given the ntb pointer, return the debugfs directory pointer for the NTB - * hardware device - * - * RETURNS: a pointer to the debugfs directory - */ -static inline struct dentry *ntb_query_debugfs(struct ntb_device *ndev) -{ - return ndev->debugfs_dir; -} - -struct ntb_device *ntb_register_transport(struct pci_dev *pdev, - void *transport); -void ntb_unregister_transport(struct ntb_device *ndev); -void ntb_set_mw_addr(struct ntb_device *ndev, unsigned int mw, u64 addr); -int ntb_register_db_callback(struct ntb_device *ndev, unsigned int idx, - void *data, int (*db_cb_func)(void *data, - int db_num)); -void ntb_unregister_db_callback(struct ntb_device *ndev, unsigned int idx); -int ntb_register_event_callback(struct ntb_device *ndev, - void (*event_cb_func)(void *handle, - enum ntb_hw_event event)); -void ntb_unregister_event_callback(struct ntb_device *ndev); -int ntb_get_max_spads(struct ntb_device *ndev); -int ntb_write_local_spad(struct ntb_device *ndev, unsigned int idx, u32 val); -int ntb_read_local_spad(struct ntb_device *ndev, unsigned int idx, u32 *val); -int ntb_write_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 val); -int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val); -resource_size_t ntb_get_mw_base(struct ntb_device *ndev, unsigned int mw); -void __iomem *ntb_get_mw_vbase(struct ntb_device *ndev, unsigned int mw); -u64 ntb_get_mw_size(struct ntb_device *ndev, unsigned int mw); -void ntb_ring_doorbell(struct ntb_device *ndev, unsigned int idx); -void *ntb_find_transport(struct pci_dev *pdev); - -int ntb_transport_init(struct pci_dev *pdev); -void ntb_transport_free(void *transport); diff --git a/drivers/ntb/ntb_regs.h b/drivers/ntb/ntb_regs.h deleted file mode 100644 index f028ff81fd77..000000000000 --- a/drivers/ntb/ntb_regs.h +++ /dev/null @@ -1,177 +0,0 @@ -/* - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2012 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * BSD LICENSE - * - * Copyright(c) 2012 Intel Corporation. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copy - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Intel PCIe NTB Linux driver - * - * Contact Information: - * Jon Mason <jon.mason@intel.com> - */ - -#define NTB_LINK_STATUS_ACTIVE 0x2000 -#define NTB_LINK_SPEED_MASK 0x000f -#define NTB_LINK_WIDTH_MASK 0x03f0 - -#define SNB_MSIX_CNT 4 -#define SNB_MAX_B2B_SPADS 16 -#define SNB_MAX_COMPAT_SPADS 16 -/* Reserve the uppermost bit for link interrupt */ -#define SNB_MAX_DB_BITS 15 -#define SNB_LINK_DB 15 -#define SNB_DB_BITS_PER_VEC 5 -#define HSX_SPLITBAR_MAX_MW 3 -#define SNB_MAX_MW 2 -#define SNB_ERRATA_MAX_MW 1 - -#define SNB_DB_HW_LINK 0x8000 - -#define SNB_UNCERRSTS_OFFSET 0x014C -#define SNB_CORERRSTS_OFFSET 0x0158 -#define SNB_LINK_STATUS_OFFSET 0x01A2 -#define SNB_PCICMD_OFFSET 0x0504 -#define SNB_DEVCTRL_OFFSET 0x0598 -#define SNB_DEVSTS_OFFSET 0x059A -#define SNB_SLINK_STATUS_OFFSET 0x05A2 - -#define SNB_PBAR2LMT_OFFSET 0x0000 -#define SNB_PBAR4LMT_OFFSET 0x0008 -#define SNB_PBAR5LMT_OFFSET 0x000C -#define SNB_PBAR2XLAT_OFFSET 0x0010 -#define SNB_PBAR4XLAT_OFFSET 0x0018 -#define SNB_PBAR5XLAT_OFFSET 0x001C -#define SNB_SBAR2LMT_OFFSET 0x0020 -#define SNB_SBAR4LMT_OFFSET 0x0028 -#define SNB_SBAR5LMT_OFFSET 0x002C -#define SNB_SBAR2XLAT_OFFSET 0x0030 -#define SNB_SBAR4XLAT_OFFSET 0x0038 -#define SNB_SBAR5XLAT_OFFSET 0x003C -#define SNB_SBAR0BASE_OFFSET 0x0040 -#define SNB_SBAR2BASE_OFFSET 0x0048 -#define SNB_SBAR4BASE_OFFSET 0x0050 -#define SNB_SBAR5BASE_OFFSET 0x0054 -#define SNB_NTBCNTL_OFFSET 0x0058 -#define SNB_SBDF_OFFSET 0x005C -#define SNB_PDOORBELL_OFFSET 0x0060 -#define SNB_PDBMSK_OFFSET 0x0062 -#define SNB_SDOORBELL_OFFSET 0x0064 -#define SNB_SDBMSK_OFFSET 0x0066 -#define SNB_USMEMMISS_OFFSET 0x0070 -#define SNB_SPAD_OFFSET 0x0080 -#define SNB_SPADSEMA4_OFFSET 0x00c0 -#define SNB_WCCNTRL_OFFSET 0x00e0 -#define SNB_B2B_SPAD_OFFSET 0x0100 -#define SNB_B2B_DOORBELL_OFFSET 0x0140 -#define SNB_B2B_XLAT_OFFSETL 0x0144 -#define SNB_B2B_XLAT_OFFSETU 0x0148 - -/* - * The addresses are setup so the 32bit BARs can function. Thus - * the addresses are all in 32bit space - */ -#define SNB_MBAR01_USD_ADDR 0x000000002100000CULL -#define SNB_MBAR23_USD_ADDR 0x000000004100000CULL -#define SNB_MBAR4_USD_ADDR 0x000000008100000CULL -#define SNB_MBAR5_USD_ADDR 0x00000000A100000CULL -#define SNB_MBAR01_DSD_ADDR 0x000000002000000CULL -#define SNB_MBAR23_DSD_ADDR 0x000000004000000CULL -#define SNB_MBAR4_DSD_ADDR 0x000000008000000CULL -#define SNB_MBAR5_DSD_ADDR 0x00000000A000000CULL - -#define BWD_MSIX_CNT 34 -#define BWD_MAX_SPADS 16 -#define BWD_MAX_DB_BITS 34 -#define BWD_DB_BITS_PER_VEC 1 -#define BWD_MAX_MW 2 - -#define BWD_PCICMD_OFFSET 0xb004 -#define BWD_MBAR23_OFFSET 0xb018 -#define BWD_MBAR45_OFFSET 0xb020 -#define BWD_DEVCTRL_OFFSET 0xb048 -#define BWD_LINK_STATUS_OFFSET 0xb052 -#define BWD_ERRCORSTS_OFFSET 0xb110 - -#define BWD_SBAR2XLAT_OFFSET 0x0008 -#define BWD_SBAR4XLAT_OFFSET 0x0010 -#define BWD_PDOORBELL_OFFSET 0x0020 -#define BWD_PDBMSK_OFFSET 0x0028 -#define BWD_NTBCNTL_OFFSET 0x0060 -#define BWD_EBDF_OFFSET 0x0064 -#define BWD_SPAD_OFFSET 0x0080 -#define BWD_SPADSEMA_OFFSET 0x00c0 -#define BWD_STKYSPAD_OFFSET 0x00c4 -#define BWD_PBAR2XLAT_OFFSET 0x8008 -#define BWD_PBAR4XLAT_OFFSET 0x8010 -#define BWD_B2B_DOORBELL_OFFSET 0x8020 -#define BWD_B2B_SPAD_OFFSET 0x8080 -#define BWD_B2B_SPADSEMA_OFFSET 0x80c0 -#define BWD_B2B_STKYSPAD_OFFSET 0x80c4 - -#define BWD_MODPHY_PCSREG4 0x1c004 -#define BWD_MODPHY_PCSREG6 0x1c006 - -#define BWD_IP_BASE 0xC000 -#define BWD_DESKEWSTS_OFFSET (BWD_IP_BASE + 0x3024) -#define BWD_LTSSMERRSTS0_OFFSET (BWD_IP_BASE + 0x3180) -#define BWD_LTSSMSTATEJMP_OFFSET (BWD_IP_BASE + 0x3040) -#define BWD_IBSTERRRCRVSTS0_OFFSET (BWD_IP_BASE + 0x3324) - -#define BWD_DESKEWSTS_DBERR (1 << 15) -#define BWD_LTSSMERRSTS0_UNEXPECTEDEI (1 << 20) -#define BWD_LTSSMSTATEJMP_FORCEDETECT (1 << 2) -#define BWD_IBIST_ERR_OFLOW 0x7FFF7FFF - -#define NTB_CNTL_CFG_LOCK (1 << 0) -#define NTB_CNTL_LINK_DISABLE (1 << 1) -#define NTB_CNTL_S2P_BAR23_SNOOP (1 << 2) -#define NTB_CNTL_P2S_BAR23_SNOOP (1 << 4) -#define NTB_CNTL_S2P_BAR4_SNOOP (1 << 6) -#define NTB_CNTL_P2S_BAR4_SNOOP (1 << 8) -#define NTB_CNTL_S2P_BAR5_SNOOP (1 << 12) -#define NTB_CNTL_P2S_BAR5_SNOOP (1 << 14) -#define BWD_CNTL_LINK_DOWN (1 << 16) - -#define NTB_PPD_OFFSET 0x00D4 -#define SNB_PPD_CONN_TYPE 0x0003 -#define SNB_PPD_DEV_TYPE 0x0010 -#define SNB_PPD_SPLIT_BAR (1 << 6) -#define BWD_PPD_INIT_LINK 0x0008 -#define BWD_PPD_CONN_TYPE 0x0300 -#define BWD_PPD_DEV_TYPE 0x1000 diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index e9bf2f47b61a..efe3ad4122f2 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -5,6 +5,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2012 Intel Corporation. All rights reserved. + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -13,6 +14,7 @@ * BSD LICENSE * * Copyright(c) 2012 Intel Corporation. All rights reserved. + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -40,7 +42,7 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Intel PCIe NTB Linux driver + * PCIe NTB Transport Linux driver * * Contact Information: * Jon Mason <jon.mason@intel.com> @@ -56,11 +58,25 @@ #include <linux/pci.h> #include <linux/slab.h> #include <linux/types.h> -#include "ntb_hw.h" +#include <linux/uaccess.h> +#include "linux/ntb.h" +#include "linux/ntb_transport.h" -#define NTB_TRANSPORT_VERSION 3 +#define NTB_TRANSPORT_VERSION 4 +#define NTB_TRANSPORT_VER "4" +#define NTB_TRANSPORT_NAME "ntb_transport" +#define NTB_TRANSPORT_DESC "Software Queue-Pair Transport over NTB" -static unsigned int transport_mtu = 0x401E; +MODULE_DESCRIPTION(NTB_TRANSPORT_DESC); +MODULE_VERSION(NTB_TRANSPORT_VER); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Intel Corporation"); + +static unsigned long max_mw_size; +module_param(max_mw_size, ulong, 0644); +MODULE_PARM_DESC(max_mw_size, "Limit size of large memory windows"); + +static unsigned int transport_mtu = 0x10000; module_param(transport_mtu, uint, 0644); MODULE_PARM_DESC(transport_mtu, "Maximum size of NTB transport packets"); @@ -72,10 +88,16 @@ static unsigned int copy_bytes = 1024; module_param(copy_bytes, uint, 0644); MODULE_PARM_DESC(copy_bytes, "Threshold under which NTB will use the CPU to copy instead of DMA"); +static bool use_dma; +module_param(use_dma, bool, 0644); +MODULE_PARM_DESC(use_dma, "Use DMA engine to perform large data copy"); + +static struct dentry *nt_debugfs_dir; + struct ntb_queue_entry { /* ntb_queue list reference */ struct list_head entry; - /* pointers to data to be transfered */ + /* pointers to data to be transferred */ void *cb_data; void *buf; unsigned int len; @@ -94,14 +116,16 @@ struct ntb_rx_info { }; struct ntb_transport_qp { - struct ntb_transport *transport; - struct ntb_device *ndev; + struct ntb_transport_ctx *transport; + struct ntb_dev *ndev; void *cb_data; struct dma_chan *dma_chan; bool client_ready; - bool qp_link; + bool link_is_up; + u8 qp_num; /* Only 64 QP's are allowed. 0-63 */ + u64 qp_bit; struct ntb_rx_info __iomem *rx_info; struct ntb_rx_info *remote_rx_info; @@ -127,6 +151,7 @@ struct ntb_transport_qp { unsigned int rx_max_entry; unsigned int rx_max_frame; dma_cookie_t last_cookie; + struct tasklet_struct rxc_db_work; void (*event_handler)(void *data, int status); struct delayed_work link_work; @@ -153,33 +178,44 @@ struct ntb_transport_qp { }; struct ntb_transport_mw { - size_t size; + phys_addr_t phys_addr; + resource_size_t phys_size; + resource_size_t xlat_align; + resource_size_t xlat_align_size; + void __iomem *vbase; + size_t xlat_size; + size_t buff_size; void *virt_addr; dma_addr_t dma_addr; }; struct ntb_transport_client_dev { struct list_head entry; + struct ntb_transport_ctx *nt; struct device dev; }; -struct ntb_transport { +struct ntb_transport_ctx { struct list_head entry; struct list_head client_devs; - struct ntb_device *ndev; - struct ntb_transport_mw *mw; - struct ntb_transport_qp *qps; - unsigned int max_qps; - unsigned long qp_bitmap; - bool transport_link; + struct ntb_dev *ndev; + + struct ntb_transport_mw *mw_vec; + struct ntb_transport_qp *qp_vec; + unsigned int mw_count; + unsigned int qp_count; + u64 qp_bitmap; + u64 qp_bitmap_free; + + bool link_is_up; struct delayed_work link_work; struct work_struct link_cleanup; }; enum { - DESC_DONE_FLAG = 1 << 0, - LINK_DOWN_FLAG = 1 << 1, + DESC_DONE_FLAG = BIT(0), + LINK_DOWN_FLAG = BIT(1), }; struct ntb_payload_header { @@ -200,68 +236,69 @@ enum { MAX_SPAD, }; -#define QP_TO_MW(ndev, qp) ((qp) % ntb_max_mw(ndev)) +#define dev_client_dev(__dev) \ + container_of((__dev), struct ntb_transport_client_dev, dev) + +#define drv_client(__drv) \ + container_of((__drv), struct ntb_transport_client, driver) + +#define QP_TO_MW(nt, qp) ((qp) % nt->mw_count) #define NTB_QP_DEF_NUM_ENTRIES 100 #define NTB_LINK_DOWN_TIMEOUT 10 -static int ntb_match_bus(struct device *dev, struct device_driver *drv) +static void ntb_transport_rxc_db(unsigned long data); +static const struct ntb_ctx_ops ntb_transport_ops; +static struct ntb_client ntb_transport_client; + +static int ntb_transport_bus_match(struct device *dev, + struct device_driver *drv) { return !strncmp(dev_name(dev), drv->name, strlen(drv->name)); } -static int ntb_client_probe(struct device *dev) +static int ntb_transport_bus_probe(struct device *dev) { - const struct ntb_client *drv = container_of(dev->driver, - struct ntb_client, driver); - struct pci_dev *pdev = container_of(dev->parent, struct pci_dev, dev); + const struct ntb_transport_client *client; int rc = -EINVAL; get_device(dev); - if (drv && drv->probe) - rc = drv->probe(pdev); + + client = drv_client(dev->driver); + rc = client->probe(dev); if (rc) put_device(dev); return rc; } -static int ntb_client_remove(struct device *dev) +static int ntb_transport_bus_remove(struct device *dev) { - const struct ntb_client *drv = container_of(dev->driver, - struct ntb_client, driver); - struct pci_dev *pdev = container_of(dev->parent, struct pci_dev, dev); + const struct ntb_transport_client *client; - if (drv && drv->remove) - drv->remove(pdev); + client = drv_client(dev->driver); + client->remove(dev); put_device(dev); return 0; } -static struct bus_type ntb_bus_type = { - .name = "ntb_bus", - .match = ntb_match_bus, - .probe = ntb_client_probe, - .remove = ntb_client_remove, +static struct bus_type ntb_transport_bus = { + .name = "ntb_transport", + .match = ntb_transport_bus_match, + .probe = ntb_transport_bus_probe, + .remove = ntb_transport_bus_remove, }; static LIST_HEAD(ntb_transport_list); -static int ntb_bus_init(struct ntb_transport *nt) +static int ntb_bus_init(struct ntb_transport_ctx *nt) { - if (list_empty(&ntb_transport_list)) { - int rc = bus_register(&ntb_bus_type); - if (rc) - return rc; - } - list_add(&nt->entry, &ntb_transport_list); - return 0; } -static void ntb_bus_remove(struct ntb_transport *nt) +static void ntb_bus_remove(struct ntb_transport_ctx *nt) { struct ntb_transport_client_dev *client_dev, *cd; @@ -273,29 +310,26 @@ static void ntb_bus_remove(struct ntb_transport *nt) } list_del(&nt->entry); - - if (list_empty(&ntb_transport_list)) - bus_unregister(&ntb_bus_type); } -static void ntb_client_release(struct device *dev) +static void ntb_transport_client_release(struct device *dev) { struct ntb_transport_client_dev *client_dev; - client_dev = container_of(dev, struct ntb_transport_client_dev, dev); + client_dev = dev_client_dev(dev); kfree(client_dev); } /** - * ntb_unregister_client_dev - Unregister NTB client device + * ntb_transport_unregister_client_dev - Unregister NTB client device * @device_name: Name of NTB client device * * Unregister an NTB client device with the NTB transport layer */ -void ntb_unregister_client_dev(char *device_name) +void ntb_transport_unregister_client_dev(char *device_name) { struct ntb_transport_client_dev *client, *cd; - struct ntb_transport *nt; + struct ntb_transport_ctx *nt; list_for_each_entry(nt, &ntb_transport_list, entry) list_for_each_entry_safe(client, cd, &nt->client_devs, entry) @@ -305,18 +339,19 @@ void ntb_unregister_client_dev(char *device_name) device_unregister(&client->dev); } } -EXPORT_SYMBOL_GPL(ntb_unregister_client_dev); +EXPORT_SYMBOL_GPL(ntb_transport_unregister_client_dev); /** - * ntb_register_client_dev - Register NTB client device + * ntb_transport_register_client_dev - Register NTB client device * @device_name: Name of NTB client device * * Register an NTB client device with the NTB transport layer */ -int ntb_register_client_dev(char *device_name) +int ntb_transport_register_client_dev(char *device_name) { struct ntb_transport_client_dev *client_dev; - struct ntb_transport *nt; + struct ntb_transport_ctx *nt; + int node; int rc, i = 0; if (list_empty(&ntb_transport_list)) @@ -325,8 +360,10 @@ int ntb_register_client_dev(char *device_name) list_for_each_entry(nt, &ntb_transport_list, entry) { struct device *dev; - client_dev = kzalloc(sizeof(struct ntb_transport_client_dev), - GFP_KERNEL); + node = dev_to_node(&nt->ndev->dev); + + client_dev = kzalloc_node(sizeof(*client_dev), + GFP_KERNEL, node); if (!client_dev) { rc = -ENOMEM; goto err; @@ -336,9 +373,9 @@ int ntb_register_client_dev(char *device_name) /* setup and register client devices */ dev_set_name(dev, "%s%d", device_name, i); - dev->bus = &ntb_bus_type; - dev->release = ntb_client_release; - dev->parent = &ntb_query_pdev(nt->ndev)->dev; + dev->bus = &ntb_transport_bus; + dev->release = ntb_transport_client_release; + dev->parent = &nt->ndev->dev; rc = device_register(dev); if (rc) { @@ -353,44 +390,44 @@ int ntb_register_client_dev(char *device_name) return 0; err: - ntb_unregister_client_dev(device_name); + ntb_transport_unregister_client_dev(device_name); return rc; } -EXPORT_SYMBOL_GPL(ntb_register_client_dev); +EXPORT_SYMBOL_GPL(ntb_transport_register_client_dev); /** - * ntb_register_client - Register NTB client driver + * ntb_transport_register_client - Register NTB client driver * @drv: NTB client driver to be registered * * Register an NTB client driver with the NTB transport layer * * RETURNS: An appropriate -ERRNO error value on error, or zero for success. */ -int ntb_register_client(struct ntb_client *drv) +int ntb_transport_register_client(struct ntb_transport_client *drv) { - drv->driver.bus = &ntb_bus_type; + drv->driver.bus = &ntb_transport_bus; if (list_empty(&ntb_transport_list)) return -ENODEV; return driver_register(&drv->driver); } -EXPORT_SYMBOL_GPL(ntb_register_client); +EXPORT_SYMBOL_GPL(ntb_transport_register_client); /** - * ntb_unregister_client - Unregister NTB client driver + * ntb_transport_unregister_client - Unregister NTB client driver * @drv: NTB client driver to be unregistered * * Unregister an NTB client driver with the NTB transport layer * * RETURNS: An appropriate -ERRNO error value on error, or zero for success. */ -void ntb_unregister_client(struct ntb_client *drv) +void ntb_transport_unregister_client(struct ntb_transport_client *drv) { driver_unregister(&drv->driver); } -EXPORT_SYMBOL_GPL(ntb_unregister_client); +EXPORT_SYMBOL_GPL(ntb_transport_unregister_client); static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, loff_t *offp) @@ -452,8 +489,8 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, "tx_max_entry - \t%u\n", qp->tx_max_entry); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "\nQP Link %s\n", (qp->qp_link == NTB_LINK_UP) ? - "Up" : "Down"); + "\nQP Link %s\n", + qp->link_is_up ? "Up" : "Down"); if (out_offset > out_count) out_offset = out_count; @@ -497,26 +534,31 @@ out: return entry; } -static void ntb_transport_setup_qp_mw(struct ntb_transport *nt, - unsigned int qp_num) +static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, + unsigned int qp_num) { - struct ntb_transport_qp *qp = &nt->qps[qp_num]; + struct ntb_transport_qp *qp = &nt->qp_vec[qp_num]; + struct ntb_transport_mw *mw; unsigned int rx_size, num_qps_mw; - u8 mw_num, mw_max; + unsigned int mw_num, mw_count, qp_count; unsigned int i; - mw_max = ntb_max_mw(nt->ndev); - mw_num = QP_TO_MW(nt->ndev, qp_num); + mw_count = nt->mw_count; + qp_count = nt->qp_count; + + mw_num = QP_TO_MW(nt, qp_num); + mw = &nt->mw_vec[mw_num]; - WARN_ON(nt->mw[mw_num].virt_addr == NULL); + if (!mw->virt_addr) + return -ENOMEM; - if (nt->max_qps % mw_max && mw_num + 1 < nt->max_qps / mw_max) - num_qps_mw = nt->max_qps / mw_max + 1; + if (qp_count % mw_count && mw_num + 1 < qp_count / mw_count) + num_qps_mw = qp_count / mw_count + 1; else - num_qps_mw = nt->max_qps / mw_max; + num_qps_mw = qp_count / mw_count; - rx_size = (unsigned int) nt->mw[mw_num].size / num_qps_mw; - qp->rx_buff = nt->mw[mw_num].virt_addr + qp_num / mw_max * rx_size; + rx_size = (unsigned int)mw->xlat_size / num_qps_mw; + qp->rx_buff = mw->virt_addr + rx_size * qp_num / mw_count; rx_size -= sizeof(struct ntb_rx_info); qp->remote_rx_info = qp->rx_buff + rx_size; @@ -530,49 +572,63 @@ static void ntb_transport_setup_qp_mw(struct ntb_transport *nt, /* setup the hdr offsets with 0's */ for (i = 0; i < qp->rx_max_entry; i++) { - void *offset = qp->rx_buff + qp->rx_max_frame * (i + 1) - - sizeof(struct ntb_payload_header); + void *offset = (qp->rx_buff + qp->rx_max_frame * (i + 1) - + sizeof(struct ntb_payload_header)); memset(offset, 0, sizeof(struct ntb_payload_header)); } qp->rx_pkts = 0; qp->tx_pkts = 0; qp->tx_index = 0; + + return 0; } -static void ntb_free_mw(struct ntb_transport *nt, int num_mw) +static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw) { - struct ntb_transport_mw *mw = &nt->mw[num_mw]; - struct pci_dev *pdev = ntb_query_pdev(nt->ndev); + struct ntb_transport_mw *mw = &nt->mw_vec[num_mw]; + struct pci_dev *pdev = nt->ndev->pdev; if (!mw->virt_addr) return; - dma_free_coherent(&pdev->dev, mw->size, mw->virt_addr, mw->dma_addr); + ntb_mw_clear_trans(nt->ndev, num_mw); + dma_free_coherent(&pdev->dev, mw->buff_size, + mw->virt_addr, mw->dma_addr); + mw->xlat_size = 0; + mw->buff_size = 0; mw->virt_addr = NULL; } -static int ntb_set_mw(struct ntb_transport *nt, int num_mw, unsigned int size) +static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw, + unsigned int size) { - struct ntb_transport_mw *mw = &nt->mw[num_mw]; - struct pci_dev *pdev = ntb_query_pdev(nt->ndev); + struct ntb_transport_mw *mw = &nt->mw_vec[num_mw]; + struct pci_dev *pdev = nt->ndev->pdev; + unsigned int xlat_size, buff_size; + int rc; + + xlat_size = round_up(size, mw->xlat_align_size); + buff_size = round_up(size, mw->xlat_align); /* No need to re-setup */ - if (mw->size == ALIGN(size, 4096)) + if (mw->xlat_size == xlat_size) return 0; - if (mw->size != 0) + if (mw->buff_size) ntb_free_mw(nt, num_mw); - /* Alloc memory for receiving data. Must be 4k aligned */ - mw->size = ALIGN(size, 4096); + /* Alloc memory for receiving data. Must be aligned */ + mw->xlat_size = xlat_size; + mw->buff_size = buff_size; - mw->virt_addr = dma_alloc_coherent(&pdev->dev, mw->size, &mw->dma_addr, - GFP_KERNEL); + mw->virt_addr = dma_alloc_coherent(&pdev->dev, buff_size, + &mw->dma_addr, GFP_KERNEL); if (!mw->virt_addr) { - mw->size = 0; - dev_err(&pdev->dev, "Unable to allocate MW buffer of size %d\n", - (int) mw->size); + mw->xlat_size = 0; + mw->buff_size = 0; + dev_err(&pdev->dev, "Unable to alloc MW buff of size %d\n", + buff_size); return -ENOMEM; } @@ -582,34 +638,58 @@ static int ntb_set_mw(struct ntb_transport *nt, int num_mw, unsigned int size) * is a requirement of the hardware. It is recommended to setup CMA * for BAR sizes equal or greater than 4MB. */ - if (!IS_ALIGNED(mw->dma_addr, mw->size)) { - dev_err(&pdev->dev, "DMA memory %pad not aligned to BAR size\n", + if (!IS_ALIGNED(mw->dma_addr, mw->xlat_align)) { + dev_err(&pdev->dev, "DMA memory %pad is not aligned\n", &mw->dma_addr); ntb_free_mw(nt, num_mw); return -ENOMEM; } /* Notify HW the memory location of the receive buffer */ - ntb_set_mw_addr(nt->ndev, num_mw, mw->dma_addr); + rc = ntb_mw_set_trans(nt->ndev, num_mw, mw->dma_addr, mw->xlat_size); + if (rc) { + dev_err(&pdev->dev, "Unable to set mw%d translation", num_mw); + ntb_free_mw(nt, num_mw); + return -EIO; + } return 0; } +static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp) +{ + qp->link_is_up = false; + + qp->tx_index = 0; + qp->rx_index = 0; + qp->rx_bytes = 0; + qp->rx_pkts = 0; + qp->rx_ring_empty = 0; + qp->rx_err_no_buf = 0; + qp->rx_err_oflow = 0; + qp->rx_err_ver = 0; + qp->rx_memcpy = 0; + qp->rx_async = 0; + qp->tx_bytes = 0; + qp->tx_pkts = 0; + qp->tx_ring_full = 0; + qp->tx_err_no_buf = 0; + qp->tx_memcpy = 0; + qp->tx_async = 0; +} + static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp) { - struct ntb_transport *nt = qp->transport; - struct pci_dev *pdev = ntb_query_pdev(nt->ndev); + struct ntb_transport_ctx *nt = qp->transport; + struct pci_dev *pdev = nt->ndev->pdev; - if (qp->qp_link == NTB_LINK_DOWN) { - cancel_delayed_work_sync(&qp->link_work); - return; - } + dev_info(&pdev->dev, "qp %d: Link Cleanup\n", qp->qp_num); - if (qp->event_handler) - qp->event_handler(qp->cb_data, NTB_LINK_DOWN); + cancel_delayed_work_sync(&qp->link_work); + ntb_qp_link_down_reset(qp); - dev_info(&pdev->dev, "qp %d: Link Down\n", qp->qp_num); - qp->qp_link = NTB_LINK_DOWN; + if (qp->event_handler) + qp->event_handler(qp->cb_data, qp->link_is_up); } static void ntb_qp_link_cleanup_work(struct work_struct *work) @@ -617,11 +697,11 @@ static void ntb_qp_link_cleanup_work(struct work_struct *work) struct ntb_transport_qp *qp = container_of(work, struct ntb_transport_qp, link_cleanup); - struct ntb_transport *nt = qp->transport; + struct ntb_transport_ctx *nt = qp->transport; ntb_qp_link_cleanup(qp); - if (nt->transport_link == NTB_LINK_UP) + if (nt->link_is_up) schedule_delayed_work(&qp->link_work, msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT)); } @@ -631,180 +711,132 @@ static void ntb_qp_link_down(struct ntb_transport_qp *qp) schedule_work(&qp->link_cleanup); } -static void ntb_transport_link_cleanup(struct ntb_transport *nt) +static void ntb_transport_link_cleanup(struct ntb_transport_ctx *nt) { + struct ntb_transport_qp *qp; + u64 qp_bitmap_alloc; int i; + qp_bitmap_alloc = nt->qp_bitmap & ~nt->qp_bitmap_free; + /* Pass along the info to any clients */ - for (i = 0; i < nt->max_qps; i++) - if (!test_bit(i, &nt->qp_bitmap)) - ntb_qp_link_cleanup(&nt->qps[i]); + for (i = 0; i < nt->qp_count; i++) + if (qp_bitmap_alloc & BIT_ULL(i)) { + qp = &nt->qp_vec[i]; + ntb_qp_link_cleanup(qp); + cancel_work_sync(&qp->link_cleanup); + cancel_delayed_work_sync(&qp->link_work); + } - if (nt->transport_link == NTB_LINK_DOWN) + if (!nt->link_is_up) cancel_delayed_work_sync(&nt->link_work); - else - nt->transport_link = NTB_LINK_DOWN; /* The scratchpad registers keep the values if the remote side * goes down, blast them now to give them a sane value the next * time they are accessed */ for (i = 0; i < MAX_SPAD; i++) - ntb_write_local_spad(nt->ndev, i, 0); + ntb_spad_write(nt->ndev, i, 0); } static void ntb_transport_link_cleanup_work(struct work_struct *work) { - struct ntb_transport *nt = container_of(work, struct ntb_transport, - link_cleanup); + struct ntb_transport_ctx *nt = + container_of(work, struct ntb_transport_ctx, link_cleanup); ntb_transport_link_cleanup(nt); } -static void ntb_transport_event_callback(void *data, enum ntb_hw_event event) +static void ntb_transport_event_callback(void *data) { - struct ntb_transport *nt = data; + struct ntb_transport_ctx *nt = data; - switch (event) { - case NTB_EVENT_HW_LINK_UP: + if (ntb_link_is_up(nt->ndev, NULL, NULL) == 1) schedule_delayed_work(&nt->link_work, 0); - break; - case NTB_EVENT_HW_LINK_DOWN: + else schedule_work(&nt->link_cleanup); - break; - default: - BUG(); - } } static void ntb_transport_link_work(struct work_struct *work) { - struct ntb_transport *nt = container_of(work, struct ntb_transport, - link_work.work); - struct ntb_device *ndev = nt->ndev; - struct pci_dev *pdev = ntb_query_pdev(ndev); + struct ntb_transport_ctx *nt = + container_of(work, struct ntb_transport_ctx, link_work.work); + struct ntb_dev *ndev = nt->ndev; + struct pci_dev *pdev = ndev->pdev; + resource_size_t size; u32 val; - int rc, i; + int rc, i, spad; /* send the local info, in the opposite order of the way we read it */ - for (i = 0; i < ntb_max_mw(ndev); i++) { - rc = ntb_write_remote_spad(ndev, MW0_SZ_HIGH + (i * 2), - ntb_get_mw_size(ndev, i) >> 32); - if (rc) { - dev_err(&pdev->dev, "Error writing %u to remote spad %d\n", - (u32)(ntb_get_mw_size(ndev, i) >> 32), - MW0_SZ_HIGH + (i * 2)); - goto out; - } + for (i = 0; i < nt->mw_count; i++) { + size = nt->mw_vec[i].phys_size; - rc = ntb_write_remote_spad(ndev, MW0_SZ_LOW + (i * 2), - (u32) ntb_get_mw_size(ndev, i)); - if (rc) { - dev_err(&pdev->dev, "Error writing %u to remote spad %d\n", - (u32) ntb_get_mw_size(ndev, i), - MW0_SZ_LOW + (i * 2)); - goto out; - } - } + if (max_mw_size && size > max_mw_size) + size = max_mw_size; - rc = ntb_write_remote_spad(ndev, NUM_MWS, ntb_max_mw(ndev)); - if (rc) { - dev_err(&pdev->dev, "Error writing %x to remote spad %d\n", - ntb_max_mw(ndev), NUM_MWS); - goto out; - } + spad = MW0_SZ_HIGH + (i * 2); + ntb_peer_spad_write(ndev, spad, (u32)(size >> 32)); - rc = ntb_write_remote_spad(ndev, NUM_QPS, nt->max_qps); - if (rc) { - dev_err(&pdev->dev, "Error writing %x to remote spad %d\n", - nt->max_qps, NUM_QPS); - goto out; + spad = MW0_SZ_LOW + (i * 2); + ntb_peer_spad_write(ndev, spad, (u32)size); } - rc = ntb_write_remote_spad(ndev, VERSION, NTB_TRANSPORT_VERSION); - if (rc) { - dev_err(&pdev->dev, "Error writing %x to remote spad %d\n", - NTB_TRANSPORT_VERSION, VERSION); - goto out; - } + ntb_peer_spad_write(ndev, NUM_MWS, nt->mw_count); - /* Query the remote side for its info */ - rc = ntb_read_remote_spad(ndev, VERSION, &val); - if (rc) { - dev_err(&pdev->dev, "Error reading remote spad %d\n", VERSION); - goto out; - } + ntb_peer_spad_write(ndev, NUM_QPS, nt->qp_count); - if (val != NTB_TRANSPORT_VERSION) - goto out; - dev_dbg(&pdev->dev, "Remote version = %d\n", val); + ntb_peer_spad_write(ndev, VERSION, NTB_TRANSPORT_VERSION); - rc = ntb_read_remote_spad(ndev, NUM_QPS, &val); - if (rc) { - dev_err(&pdev->dev, "Error reading remote spad %d\n", NUM_QPS); + /* Query the remote side for its info */ + val = ntb_spad_read(ndev, VERSION); + dev_dbg(&pdev->dev, "Remote version = %d\n", val); + if (val != NTB_TRANSPORT_VERSION) goto out; - } - if (val != nt->max_qps) - goto out; + val = ntb_spad_read(ndev, NUM_QPS); dev_dbg(&pdev->dev, "Remote max number of qps = %d\n", val); - - rc = ntb_read_remote_spad(ndev, NUM_MWS, &val); - if (rc) { - dev_err(&pdev->dev, "Error reading remote spad %d\n", NUM_MWS); + if (val != nt->qp_count) goto out; - } - if (val != ntb_max_mw(ndev)) - goto out; + val = ntb_spad_read(ndev, NUM_MWS); dev_dbg(&pdev->dev, "Remote number of mws = %d\n", val); + if (val != nt->mw_count) + goto out; - for (i = 0; i < ntb_max_mw(ndev); i++) { + for (i = 0; i < nt->mw_count; i++) { u64 val64; - rc = ntb_read_remote_spad(ndev, MW0_SZ_HIGH + (i * 2), &val); - if (rc) { - dev_err(&pdev->dev, "Error reading remote spad %d\n", - MW0_SZ_HIGH + (i * 2)); - goto out1; - } - - val64 = (u64) val << 32; - - rc = ntb_read_remote_spad(ndev, MW0_SZ_LOW + (i * 2), &val); - if (rc) { - dev_err(&pdev->dev, "Error reading remote spad %d\n", - MW0_SZ_LOW + (i * 2)); - goto out1; - } + val = ntb_spad_read(ndev, MW0_SZ_HIGH + (i * 2)); + val64 = (u64)val << 32; + val = ntb_spad_read(ndev, MW0_SZ_LOW + (i * 2)); val64 |= val; - dev_dbg(&pdev->dev, "Remote MW%d size = %llu\n", i, val64); + dev_dbg(&pdev->dev, "Remote MW%d size = %#llx\n", i, val64); rc = ntb_set_mw(nt, i, val64); if (rc) goto out1; } - nt->transport_link = NTB_LINK_UP; + nt->link_is_up = true; - for (i = 0; i < nt->max_qps; i++) { - struct ntb_transport_qp *qp = &nt->qps[i]; + for (i = 0; i < nt->qp_count; i++) { + struct ntb_transport_qp *qp = &nt->qp_vec[i]; ntb_transport_setup_qp_mw(nt, i); - if (qp->client_ready == NTB_LINK_UP) + if (qp->client_ready) schedule_delayed_work(&qp->link_work, 0); } return; out1: - for (i = 0; i < ntb_max_mw(ndev); i++) + for (i = 0; i < nt->mw_count; i++) ntb_free_mw(nt, i); out: - if (ntb_hw_link_status(ndev)) + if (ntb_link_is_up(ndev, NULL, NULL) == 1) schedule_delayed_work(&nt->link_work, msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT)); } @@ -814,73 +846,73 @@ static void ntb_qp_link_work(struct work_struct *work) struct ntb_transport_qp *qp = container_of(work, struct ntb_transport_qp, link_work.work); - struct pci_dev *pdev = ntb_query_pdev(qp->ndev); - struct ntb_transport *nt = qp->transport; - int rc, val; + struct pci_dev *pdev = qp->ndev->pdev; + struct ntb_transport_ctx *nt = qp->transport; + int val; - WARN_ON(nt->transport_link != NTB_LINK_UP); + WARN_ON(!nt->link_is_up); - rc = ntb_read_local_spad(nt->ndev, QP_LINKS, &val); - if (rc) { - dev_err(&pdev->dev, "Error reading spad %d\n", QP_LINKS); - return; - } + val = ntb_spad_read(nt->ndev, QP_LINKS); - rc = ntb_write_remote_spad(nt->ndev, QP_LINKS, val | 1 << qp->qp_num); - if (rc) - dev_err(&pdev->dev, "Error writing %x to remote spad %d\n", - val | 1 << qp->qp_num, QP_LINKS); + ntb_peer_spad_write(nt->ndev, QP_LINKS, val | BIT(qp->qp_num)); /* query remote spad for qp ready bits */ - rc = ntb_read_remote_spad(nt->ndev, QP_LINKS, &val); - if (rc) - dev_err(&pdev->dev, "Error reading remote spad %d\n", QP_LINKS); - - dev_dbg(&pdev->dev, "Remote QP link status = %x\n", val); + ntb_peer_spad_read(nt->ndev, QP_LINKS); + dev_dbg_ratelimited(&pdev->dev, "Remote QP link status = %x\n", val); /* See if the remote side is up */ - if (1 << qp->qp_num & val) { - qp->qp_link = NTB_LINK_UP; - + if (val & BIT(qp->qp_num)) { dev_info(&pdev->dev, "qp %d: Link Up\n", qp->qp_num); + qp->link_is_up = true; + if (qp->event_handler) - qp->event_handler(qp->cb_data, NTB_LINK_UP); - } else if (nt->transport_link == NTB_LINK_UP) + qp->event_handler(qp->cb_data, qp->link_is_up); + } else if (nt->link_is_up) schedule_delayed_work(&qp->link_work, msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT)); } -static int ntb_transport_init_queue(struct ntb_transport *nt, +static int ntb_transport_init_queue(struct ntb_transport_ctx *nt, unsigned int qp_num) { struct ntb_transport_qp *qp; + struct ntb_transport_mw *mw; + phys_addr_t mw_base; + resource_size_t mw_size; unsigned int num_qps_mw, tx_size; - u8 mw_num, mw_max; + unsigned int mw_num, mw_count, qp_count; u64 qp_offset; - mw_max = ntb_max_mw(nt->ndev); - mw_num = QP_TO_MW(nt->ndev, qp_num); + mw_count = nt->mw_count; + qp_count = nt->qp_count; - qp = &nt->qps[qp_num]; + mw_num = QP_TO_MW(nt, qp_num); + mw = &nt->mw_vec[mw_num]; + + qp = &nt->qp_vec[qp_num]; qp->qp_num = qp_num; qp->transport = nt; qp->ndev = nt->ndev; - qp->qp_link = NTB_LINK_DOWN; - qp->client_ready = NTB_LINK_DOWN; + qp->client_ready = false; qp->event_handler = NULL; + ntb_qp_link_down_reset(qp); - if (nt->max_qps % mw_max && mw_num + 1 < nt->max_qps / mw_max) - num_qps_mw = nt->max_qps / mw_max + 1; + if (qp_count % mw_count && mw_num + 1 < qp_count / mw_count) + num_qps_mw = qp_count / mw_count + 1; else - num_qps_mw = nt->max_qps / mw_max; + num_qps_mw = qp_count / mw_count; + + mw_base = nt->mw_vec[mw_num].phys_addr; + mw_size = nt->mw_vec[mw_num].phys_size; - tx_size = (unsigned int) ntb_get_mw_size(qp->ndev, mw_num) / num_qps_mw; - qp_offset = qp_num / mw_max * tx_size; - qp->tx_mw = ntb_get_mw_vbase(nt->ndev, mw_num) + qp_offset; + tx_size = (unsigned int)mw_size / num_qps_mw; + qp_offset = tx_size * qp_num / mw_count; + + qp->tx_mw = nt->mw_vec[mw_num].vbase + qp_offset; if (!qp->tx_mw) return -EINVAL; - qp->tx_mw_phys = ntb_get_mw_base(qp->ndev, mw_num) + qp_offset; + qp->tx_mw_phys = mw_base + qp_offset; if (!qp->tx_mw_phys) return -EINVAL; @@ -891,16 +923,19 @@ static int ntb_transport_init_queue(struct ntb_transport *nt, qp->tx_max_frame = min(transport_mtu, tx_size / 2); qp->tx_max_entry = tx_size / qp->tx_max_frame; - if (ntb_query_debugfs(nt->ndev)) { + if (nt_debugfs_dir) { char debugfs_name[4]; snprintf(debugfs_name, 4, "qp%d", qp_num); qp->debugfs_dir = debugfs_create_dir(debugfs_name, - ntb_query_debugfs(nt->ndev)); + nt_debugfs_dir); qp->debugfs_stats = debugfs_create_file("stats", S_IRUSR, qp->debugfs_dir, qp, &ntb_qp_debugfs_stats); + } else { + qp->debugfs_dir = NULL; + qp->debugfs_stats = NULL; } INIT_DELAYED_WORK(&qp->link_work, ntb_qp_link_work); @@ -914,46 +949,89 @@ static int ntb_transport_init_queue(struct ntb_transport *nt, INIT_LIST_HEAD(&qp->rx_free_q); INIT_LIST_HEAD(&qp->tx_free_q); + tasklet_init(&qp->rxc_db_work, ntb_transport_rxc_db, + (unsigned long)qp); + return 0; } -int ntb_transport_init(struct pci_dev *pdev) +static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) { - struct ntb_transport *nt; + struct ntb_transport_ctx *nt; + struct ntb_transport_mw *mw; + unsigned int mw_count, qp_count; + u64 qp_bitmap; + int node; int rc, i; - nt = kzalloc(sizeof(struct ntb_transport), GFP_KERNEL); + if (ntb_db_is_unsafe(ndev)) + dev_dbg(&ndev->dev, + "doorbell is unsafe, proceed anyway...\n"); + if (ntb_spad_is_unsafe(ndev)) + dev_dbg(&ndev->dev, + "scratchpad is unsafe, proceed anyway...\n"); + + node = dev_to_node(&ndev->dev); + + nt = kzalloc_node(sizeof(*nt), GFP_KERNEL, node); if (!nt) return -ENOMEM; - nt->ndev = ntb_register_transport(pdev, nt); - if (!nt->ndev) { - rc = -EIO; + nt->ndev = ndev; + + mw_count = ntb_mw_count(ndev); + + nt->mw_count = mw_count; + + nt->mw_vec = kzalloc_node(mw_count * sizeof(*nt->mw_vec), + GFP_KERNEL, node); + if (!nt->mw_vec) { + rc = -ENOMEM; goto err; } - nt->mw = kcalloc(ntb_max_mw(nt->ndev), sizeof(struct ntb_transport_mw), - GFP_KERNEL); - if (!nt->mw) { - rc = -ENOMEM; - goto err1; + for (i = 0; i < mw_count; i++) { + mw = &nt->mw_vec[i]; + + rc = ntb_mw_get_range(ndev, i, &mw->phys_addr, &mw->phys_size, + &mw->xlat_align, &mw->xlat_align_size); + if (rc) + goto err1; + + mw->vbase = ioremap_wc(mw->phys_addr, mw->phys_size); + if (!mw->vbase) { + rc = -ENOMEM; + goto err1; + } + + mw->buff_size = 0; + mw->xlat_size = 0; + mw->virt_addr = NULL; + mw->dma_addr = 0; } - if (max_num_clients) - nt->max_qps = min(ntb_max_cbs(nt->ndev), max_num_clients); - else - nt->max_qps = min(ntb_max_cbs(nt->ndev), ntb_max_mw(nt->ndev)); + qp_bitmap = ntb_db_valid_mask(ndev); + + qp_count = ilog2(qp_bitmap); + if (max_num_clients && max_num_clients < qp_count) + qp_count = max_num_clients; + else if (mw_count < qp_count) + qp_count = mw_count; + + qp_bitmap &= BIT_ULL(qp_count) - 1; - nt->qps = kcalloc(nt->max_qps, sizeof(struct ntb_transport_qp), - GFP_KERNEL); - if (!nt->qps) { + nt->qp_count = qp_count; + nt->qp_bitmap = qp_bitmap; + nt->qp_bitmap_free = qp_bitmap; + + nt->qp_vec = kzalloc_node(qp_count * sizeof(*nt->qp_vec), + GFP_KERNEL, node); + if (!nt->qp_vec) { rc = -ENOMEM; goto err2; } - nt->qp_bitmap = ((u64) 1 << nt->max_qps) - 1; - - for (i = 0; i < nt->max_qps; i++) { + for (i = 0; i < qp_count; i++) { rc = ntb_transport_init_queue(nt, i); if (rc) goto err3; @@ -962,8 +1040,7 @@ int ntb_transport_init(struct pci_dev *pdev) INIT_DELAYED_WORK(&nt->link_work, ntb_transport_link_work); INIT_WORK(&nt->link_cleanup, ntb_transport_link_cleanup_work); - rc = ntb_register_event_callback(nt->ndev, - ntb_transport_event_callback); + rc = ntb_set_ctx(ndev, nt, &ntb_transport_ops); if (rc) goto err3; @@ -972,51 +1049,61 @@ int ntb_transport_init(struct pci_dev *pdev) if (rc) goto err4; - if (ntb_hw_link_status(nt->ndev)) - schedule_delayed_work(&nt->link_work, 0); + nt->link_is_up = false; + ntb_link_enable(ndev, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); + ntb_link_event(ndev); return 0; err4: - ntb_unregister_event_callback(nt->ndev); + ntb_clear_ctx(ndev); err3: - kfree(nt->qps); + kfree(nt->qp_vec); err2: - kfree(nt->mw); + kfree(nt->mw_vec); err1: - ntb_unregister_transport(nt->ndev); + while (i--) { + mw = &nt->mw_vec[i]; + iounmap(mw->vbase); + } err: kfree(nt); return rc; } -void ntb_transport_free(void *transport) +static void ntb_transport_free(struct ntb_client *self, struct ntb_dev *ndev) { - struct ntb_transport *nt = transport; - struct ntb_device *ndev = nt->ndev; + struct ntb_transport_ctx *nt = ndev->ctx; + struct ntb_transport_qp *qp; + u64 qp_bitmap_alloc; int i; ntb_transport_link_cleanup(nt); + cancel_work_sync(&nt->link_cleanup); + cancel_delayed_work_sync(&nt->link_work); + + qp_bitmap_alloc = nt->qp_bitmap & ~nt->qp_bitmap_free; /* verify that all the qp's are freed */ - for (i = 0; i < nt->max_qps; i++) { - if (!test_bit(i, &nt->qp_bitmap)) - ntb_transport_free_queue(&nt->qps[i]); - debugfs_remove_recursive(nt->qps[i].debugfs_dir); + for (i = 0; i < nt->qp_count; i++) { + qp = &nt->qp_vec[i]; + if (qp_bitmap_alloc & BIT_ULL(i)) + ntb_transport_free_queue(qp); + debugfs_remove_recursive(qp->debugfs_dir); } - ntb_bus_remove(nt); + ntb_link_disable(ndev); + ntb_clear_ctx(ndev); - cancel_delayed_work_sync(&nt->link_work); - - ntb_unregister_event_callback(ndev); + ntb_bus_remove(nt); - for (i = 0; i < ntb_max_mw(ndev); i++) + for (i = nt->mw_count; i--; ) { ntb_free_mw(nt, i); + iounmap(nt->mw_vec[i].vbase); + } - kfree(nt->qps); - kfree(nt->mw); - ntb_unregister_transport(ndev); + kfree(nt->qp_vec); + kfree(nt->mw_vec); kfree(nt); } @@ -1028,15 +1115,13 @@ static void ntb_rx_copy_callback(void *data) unsigned int len = entry->len; struct ntb_payload_header *hdr = entry->rx_hdr; - /* Ensure that the data is fully copied out before clearing the flag */ - wmb(); hdr->flags = 0; iowrite32(entry->index, &qp->rx_info->entry); ntb_list_add(&qp->ntb_rx_free_q_lock, &entry->entry, &qp->rx_free_q); - if (qp->rx_handler && qp->client_ready == NTB_LINK_UP) + if (qp->rx_handler && qp->client_ready) qp->rx_handler(qp, qp->cb_data, cb_data, len); } @@ -1047,6 +1132,9 @@ static void ntb_memcpy_rx(struct ntb_queue_entry *entry, void *offset) memcpy(buf, offset, len); + /* Ensure that the data is fully copied out before clearing the flag */ + wmb(); + ntb_rx_copy_callback(entry); } @@ -1071,8 +1159,8 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset, goto err_wait; device = chan->device; - pay_off = (size_t) offset & ~PAGE_MASK; - buff_off = (size_t) buf & ~PAGE_MASK; + pay_off = (size_t)offset & ~PAGE_MASK; + buff_off = (size_t)buf & ~PAGE_MASK; if (!is_dma_copy_aligned(device, pay_off, buff_off, len)) goto err_wait; @@ -1138,86 +1226,103 @@ static int ntb_process_rxc(struct ntb_transport_qp *qp) struct ntb_payload_header *hdr; struct ntb_queue_entry *entry; void *offset; + int rc; offset = qp->rx_buff + qp->rx_max_frame * qp->rx_index; hdr = offset + qp->rx_max_frame - sizeof(struct ntb_payload_header); - entry = ntb_list_rm(&qp->ntb_rx_pend_q_lock, &qp->rx_pend_q); - if (!entry) { - dev_dbg(&ntb_query_pdev(qp->ndev)->dev, - "no buffer - HDR ver %u, len %d, flags %x\n", - hdr->ver, hdr->len, hdr->flags); - qp->rx_err_no_buf++; - return -ENOMEM; - } + dev_dbg(&qp->ndev->pdev->dev, "qp %d: RX ver %u len %d flags %x\n", + qp->qp_num, hdr->ver, hdr->len, hdr->flags); if (!(hdr->flags & DESC_DONE_FLAG)) { - ntb_list_add(&qp->ntb_rx_pend_q_lock, &entry->entry, - &qp->rx_pend_q); + dev_dbg(&qp->ndev->pdev->dev, "done flag not set\n"); qp->rx_ring_empty++; return -EAGAIN; } - if (hdr->ver != (u32) qp->rx_pkts) { - dev_dbg(&ntb_query_pdev(qp->ndev)->dev, - "qp %d: version mismatch, expected %llu - got %u\n", - qp->qp_num, qp->rx_pkts, hdr->ver); - ntb_list_add(&qp->ntb_rx_pend_q_lock, &entry->entry, - &qp->rx_pend_q); + if (hdr->flags & LINK_DOWN_FLAG) { + dev_dbg(&qp->ndev->pdev->dev, "link down flag set\n"); + ntb_qp_link_down(qp); + hdr->flags = 0; + return -EAGAIN; + } + + if (hdr->ver != (u32)qp->rx_pkts) { + dev_dbg(&qp->ndev->pdev->dev, + "version mismatch, expected %llu - got %u\n", + qp->rx_pkts, hdr->ver); qp->rx_err_ver++; return -EIO; } - if (hdr->flags & LINK_DOWN_FLAG) { - ntb_qp_link_down(qp); + entry = ntb_list_rm(&qp->ntb_rx_pend_q_lock, &qp->rx_pend_q); + if (!entry) { + dev_dbg(&qp->ndev->pdev->dev, "no receive buffer\n"); + qp->rx_err_no_buf++; + rc = -ENOMEM; goto err; } - dev_dbg(&ntb_query_pdev(qp->ndev)->dev, - "rx offset %u, ver %u - %d payload received, buf size %d\n", - qp->rx_index, hdr->ver, hdr->len, entry->len); - - qp->rx_bytes += hdr->len; - qp->rx_pkts++; - if (hdr->len > entry->len) { - qp->rx_err_oflow++; - dev_dbg(&ntb_query_pdev(qp->ndev)->dev, - "RX overflow! Wanted %d got %d\n", + dev_dbg(&qp->ndev->pdev->dev, + "receive buffer overflow! Wanted %d got %d\n", hdr->len, entry->len); + qp->rx_err_oflow++; + rc = -EIO; goto err; } + dev_dbg(&qp->ndev->pdev->dev, + "RX OK index %u ver %u size %d into buf size %d\n", + qp->rx_index, hdr->ver, hdr->len, entry->len); + + qp->rx_bytes += hdr->len; + qp->rx_pkts++; + entry->index = qp->rx_index; entry->rx_hdr = hdr; ntb_async_rx(entry, offset, hdr->len); -out: qp->rx_index++; qp->rx_index %= qp->rx_max_entry; return 0; err: - ntb_list_add(&qp->ntb_rx_pend_q_lock, &entry->entry, &qp->rx_pend_q); - /* Ensure that the data is fully copied out before clearing the flag */ - wmb(); + /* FIXME: if this syncrhonous update of the rx_index gets ahead of + * asyncrhonous ntb_rx_copy_callback of previous entry, there are three + * scenarios: + * + * 1) The peer might miss this update, but observe the update + * from the memcpy completion callback. In this case, the buffer will + * not be freed on the peer to be reused for a different packet. The + * successful rx of a later packet would clear the condition, but the + * condition could persist if several rx fail in a row. + * + * 2) The peer may observe this update before the asyncrhonous copy of + * prior packets is completed. The peer may overwrite the buffers of + * the prior packets before they are copied. + * + * 3) Both: the peer may observe the update, and then observe the index + * decrement by the asynchronous completion callback. Who knows what + * badness that will cause. + */ hdr->flags = 0; iowrite32(qp->rx_index, &qp->rx_info->entry); - goto out; + return rc; } -static int ntb_transport_rxc_db(void *data, int db_num) +static void ntb_transport_rxc_db(unsigned long data) { - struct ntb_transport_qp *qp = data; + struct ntb_transport_qp *qp = (void *)data; int rc, i; - dev_dbg(&ntb_query_pdev(qp->ndev)->dev, "%s: doorbell %d received\n", - __func__, db_num); + dev_dbg(&qp->ndev->pdev->dev, "%s: doorbell %d received\n", + __func__, qp->qp_num); /* Limit the number of packets processed in a single interrupt to * provide fairness to others @@ -1231,7 +1336,21 @@ static int ntb_transport_rxc_db(void *data, int db_num) if (qp->dma_chan) dma_async_issue_pending(qp->dma_chan); - return i; + if (i == qp->rx_max_entry) { + /* there is more work to do */ + tasklet_schedule(&qp->rxc_db_work); + } else if (ntb_db_read(qp->ndev) & BIT_ULL(qp->qp_num)) { + /* the doorbell bit is set: clear it */ + ntb_db_clear(qp->ndev, BIT_ULL(qp->qp_num)); + /* ntb_db_read ensures ntb_db_clear write is committed */ + ntb_db_read(qp->ndev); + + /* an interrupt may have arrived between finishing + * ntb_process_rxc and clearing the doorbell bit: + * there might be some more work to do. + */ + tasklet_schedule(&qp->rxc_db_work); + } } static void ntb_tx_copy_callback(void *data) @@ -1240,11 +1359,9 @@ static void ntb_tx_copy_callback(void *data) struct ntb_transport_qp *qp = entry->qp; struct ntb_payload_header __iomem *hdr = entry->tx_hdr; - /* Ensure that the data is fully copied out before setting the flags */ - wmb(); iowrite32(entry->flags | DESC_DONE_FLAG, &hdr->flags); - ntb_ring_doorbell(qp->ndev, qp->qp_num); + ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num)); /* The entry length can only be zero if the packet is intended to be a * "link down" or similar. Since no payload is being sent in these @@ -1263,7 +1380,18 @@ static void ntb_tx_copy_callback(void *data) static void ntb_memcpy_tx(struct ntb_queue_entry *entry, void __iomem *offset) { +#ifdef ARCH_HAS_NOCACHE_UACCESS + /* + * Using non-temporal mov to improve performance on non-cached + * writes, even though we aren't actually copying from user space. + */ + __copy_from_user_inatomic_nocache(offset, entry->buf, entry->len); +#else memcpy_toio(offset, entry->buf, entry->len); +#endif + + /* Ensure that the data is fully copied out before setting the flags */ + wmb(); ntb_tx_copy_callback(entry); } @@ -1288,7 +1416,7 @@ static void ntb_async_tx(struct ntb_transport_qp *qp, entry->tx_hdr = hdr; iowrite32(entry->len, &hdr->len); - iowrite32((u32) qp->tx_pkts, &hdr->ver); + iowrite32((u32)qp->tx_pkts, &hdr->ver); if (!chan) goto err; @@ -1298,8 +1426,8 @@ static void ntb_async_tx(struct ntb_transport_qp *qp, device = chan->device; dest = qp->tx_mw_phys + qp->tx_max_frame * qp->tx_index; - buff_off = (size_t) buf & ~PAGE_MASK; - dest_off = (size_t) dest & ~PAGE_MASK; + buff_off = (size_t)buf & ~PAGE_MASK; + dest_off = (size_t)dest & ~PAGE_MASK; if (!is_dma_copy_aligned(device, buff_off, dest_off, len)) goto err; @@ -1347,9 +1475,6 @@ err: static int ntb_process_tx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry) { - dev_dbg(&ntb_query_pdev(qp->ndev)->dev, "%lld - tx %u, entry len %d flags %x buff %p\n", - qp->tx_pkts, qp->tx_index, entry->len, entry->flags, - entry->buf); if (qp->tx_index == qp->remote_rx_info->entry) { qp->tx_ring_full++; return -EAGAIN; @@ -1376,15 +1501,14 @@ static int ntb_process_tx(struct ntb_transport_qp *qp, static void ntb_send_link_down(struct ntb_transport_qp *qp) { - struct pci_dev *pdev = ntb_query_pdev(qp->ndev); + struct pci_dev *pdev = qp->ndev->pdev; struct ntb_queue_entry *entry; int i, rc; - if (qp->qp_link == NTB_LINK_DOWN) + if (!qp->link_is_up) return; - qp->qp_link = NTB_LINK_DOWN; - dev_info(&pdev->dev, "qp %d: Link Down\n", qp->qp_num); + dev_info(&pdev->dev, "qp %d: Send Link Down\n", qp->qp_num); for (i = 0; i < NTB_LINK_DOWN_TIMEOUT; i++) { entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q); @@ -1405,6 +1529,13 @@ static void ntb_send_link_down(struct ntb_transport_qp *qp) if (rc) dev_err(&pdev->dev, "ntb: QP%d unable to send linkdown msg\n", qp->qp_num); + + ntb_qp_link_down_reset(qp); +} + +static bool ntb_dma_filter_fn(struct dma_chan *chan, void *node) +{ + return dev_to_node(&chan->dev->device) == (int)(unsigned long)node; } /** @@ -1422,18 +1553,25 @@ static void ntb_send_link_down(struct ntb_transport_qp *qp) * RETURNS: pointer to newly created ntb_queue, NULL on error. */ struct ntb_transport_qp * -ntb_transport_create_queue(void *data, struct pci_dev *pdev, +ntb_transport_create_queue(void *data, struct device *client_dev, const struct ntb_queue_handlers *handlers) { + struct ntb_dev *ndev; + struct pci_dev *pdev; + struct ntb_transport_ctx *nt; struct ntb_queue_entry *entry; struct ntb_transport_qp *qp; - struct ntb_transport *nt; + u64 qp_bit; unsigned int free_queue; - int rc, i; + dma_cap_mask_t dma_mask; + int node; + int i; - nt = ntb_find_transport(pdev); - if (!nt) - goto err; + ndev = dev_ntb(client_dev->parent); + pdev = ndev->pdev; + nt = ndev->ctx; + + node = dev_to_node(&ndev->dev); free_queue = ffs(nt->qp_bitmap); if (!free_queue) @@ -1442,23 +1580,31 @@ ntb_transport_create_queue(void *data, struct pci_dev *pdev, /* decrement free_queue to make it zero based */ free_queue--; - clear_bit(free_queue, &nt->qp_bitmap); + qp = &nt->qp_vec[free_queue]; + qp_bit = BIT_ULL(qp->qp_num); + + nt->qp_bitmap_free &= ~qp_bit; - qp = &nt->qps[free_queue]; qp->cb_data = data; qp->rx_handler = handlers->rx_handler; qp->tx_handler = handlers->tx_handler; qp->event_handler = handlers->event_handler; - dmaengine_get(); - qp->dma_chan = dma_find_channel(DMA_MEMCPY); - if (!qp->dma_chan) { - dmaengine_put(); - dev_info(&pdev->dev, "Unable to allocate DMA channel, using CPU instead\n"); + dma_cap_zero(dma_mask); + dma_cap_set(DMA_MEMCPY, dma_mask); + + if (use_dma) { + qp->dma_chan = dma_request_channel(dma_mask, ntb_dma_filter_fn, + (void *)(unsigned long)node); + if (!qp->dma_chan) + dev_info(&pdev->dev, "Unable to allocate DMA channel\n"); + } else { + qp->dma_chan = NULL; } + dev_dbg(&pdev->dev, "Using %s memcpy\n", qp->dma_chan ? "DMA" : "CPU"); for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { - entry = kzalloc(sizeof(struct ntb_queue_entry), GFP_ATOMIC); + entry = kzalloc_node(sizeof(*entry), GFP_ATOMIC, node); if (!entry) goto err1; @@ -1468,7 +1614,7 @@ ntb_transport_create_queue(void *data, struct pci_dev *pdev, } for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { - entry = kzalloc(sizeof(struct ntb_queue_entry), GFP_ATOMIC); + entry = kzalloc_node(sizeof(*entry), GFP_ATOMIC, node); if (!entry) goto err2; @@ -1477,10 +1623,8 @@ ntb_transport_create_queue(void *data, struct pci_dev *pdev, &qp->tx_free_q); } - rc = ntb_register_db_callback(qp->ndev, free_queue, qp, - ntb_transport_rxc_db); - if (rc) - goto err2; + ntb_db_clear(qp->ndev, qp_bit); + ntb_db_clear_mask(qp->ndev, qp_bit); dev_info(&pdev->dev, "NTB Transport QP %d created\n", qp->qp_num); @@ -1493,8 +1637,8 @@ err1: while ((entry = ntb_list_rm(&qp->ntb_rx_free_q_lock, &qp->rx_free_q))) kfree(entry); if (qp->dma_chan) - dmaengine_put(); - set_bit(free_queue, &nt->qp_bitmap); + dma_release_channel(qp->dma_chan); + nt->qp_bitmap_free |= qp_bit; err: return NULL; } @@ -1508,13 +1652,15 @@ EXPORT_SYMBOL_GPL(ntb_transport_create_queue); */ void ntb_transport_free_queue(struct ntb_transport_qp *qp) { + struct ntb_transport_ctx *nt = qp->transport; struct pci_dev *pdev; struct ntb_queue_entry *entry; + u64 qp_bit; if (!qp) return; - pdev = ntb_query_pdev(qp->ndev); + pdev = qp->ndev->pdev; if (qp->dma_chan) { struct dma_chan *chan = qp->dma_chan; @@ -1528,13 +1674,21 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp) */ dma_sync_wait(chan, qp->last_cookie); dmaengine_terminate_all(chan); - dmaengine_put(); + dma_release_channel(chan); } - ntb_unregister_db_callback(qp->ndev, qp->qp_num); + qp_bit = BIT_ULL(qp->qp_num); + + ntb_db_set_mask(qp->ndev, qp_bit); + tasklet_disable(&qp->rxc_db_work); cancel_delayed_work_sync(&qp->link_work); + qp->cb_data = NULL; + qp->rx_handler = NULL; + qp->tx_handler = NULL; + qp->event_handler = NULL; + while ((entry = ntb_list_rm(&qp->ntb_rx_free_q_lock, &qp->rx_free_q))) kfree(entry); @@ -1546,7 +1700,7 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp) while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q))) kfree(entry); - set_bit(qp->qp_num, &qp->transport->qp_bitmap); + nt->qp_bitmap_free |= qp_bit; dev_info(&pdev->dev, "NTB Transport QP %d freed\n", qp->qp_num); } @@ -1567,7 +1721,7 @@ void *ntb_transport_rx_remove(struct ntb_transport_qp *qp, unsigned int *len) struct ntb_queue_entry *entry; void *buf; - if (!qp || qp->client_ready == NTB_LINK_UP) + if (!qp || qp->client_ready) return NULL; entry = ntb_list_rm(&qp->ntb_rx_pend_q_lock, &qp->rx_pend_q); @@ -1636,7 +1790,7 @@ int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, struct ntb_queue_entry *entry; int rc; - if (!qp || qp->qp_link != NTB_LINK_UP || !len) + if (!qp || !qp->link_is_up || !len) return -EINVAL; entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q); @@ -1670,9 +1824,9 @@ void ntb_transport_link_up(struct ntb_transport_qp *qp) if (!qp) return; - qp->client_ready = NTB_LINK_UP; + qp->client_ready = true; - if (qp->transport->transport_link == NTB_LINK_UP) + if (qp->transport->link_is_up) schedule_delayed_work(&qp->link_work, 0); } EXPORT_SYMBOL_GPL(ntb_transport_link_up); @@ -1688,27 +1842,20 @@ EXPORT_SYMBOL_GPL(ntb_transport_link_up); void ntb_transport_link_down(struct ntb_transport_qp *qp) { struct pci_dev *pdev; - int rc, val; + int val; if (!qp) return; - pdev = ntb_query_pdev(qp->ndev); - qp->client_ready = NTB_LINK_DOWN; + pdev = qp->ndev->pdev; + qp->client_ready = false; - rc = ntb_read_local_spad(qp->ndev, QP_LINKS, &val); - if (rc) { - dev_err(&pdev->dev, "Error reading spad %d\n", QP_LINKS); - return; - } + val = ntb_spad_read(qp->ndev, QP_LINKS); - rc = ntb_write_remote_spad(qp->ndev, QP_LINKS, - val & ~(1 << qp->qp_num)); - if (rc) - dev_err(&pdev->dev, "Error writing %x to remote spad %d\n", - val & ~(1 << qp->qp_num), QP_LINKS); + ntb_peer_spad_write(qp->ndev, QP_LINKS, + val & ~BIT(qp->qp_num)); - if (qp->qp_link == NTB_LINK_UP) + if (qp->link_is_up) ntb_send_link_down(qp); else cancel_delayed_work_sync(&qp->link_work); @@ -1728,7 +1875,7 @@ bool ntb_transport_link_query(struct ntb_transport_qp *qp) if (!qp) return false; - return qp->qp_link == NTB_LINK_UP; + return qp->link_is_up; } EXPORT_SYMBOL_GPL(ntb_transport_link_query); @@ -1774,3 +1921,71 @@ unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp) return max; } EXPORT_SYMBOL_GPL(ntb_transport_max_size); + +static void ntb_transport_doorbell_callback(void *data, int vector) +{ + struct ntb_transport_ctx *nt = data; + struct ntb_transport_qp *qp; + u64 db_bits; + unsigned int qp_num; + + db_bits = (nt->qp_bitmap & ~nt->qp_bitmap_free & + ntb_db_vector_mask(nt->ndev, vector)); + + while (db_bits) { + qp_num = __ffs(db_bits); + qp = &nt->qp_vec[qp_num]; + + tasklet_schedule(&qp->rxc_db_work); + + db_bits &= ~BIT_ULL(qp_num); + } +} + +static const struct ntb_ctx_ops ntb_transport_ops = { + .link_event = ntb_transport_event_callback, + .db_event = ntb_transport_doorbell_callback, +}; + +static struct ntb_client ntb_transport_client = { + .ops = { + .probe = ntb_transport_probe, + .remove = ntb_transport_free, + }, +}; + +static int __init ntb_transport_init(void) +{ + int rc; + + pr_info("%s, version %s\n", NTB_TRANSPORT_DESC, NTB_TRANSPORT_VER); + + if (debugfs_initialized()) + nt_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); + + rc = bus_register(&ntb_transport_bus); + if (rc) + goto err_bus; + + rc = ntb_register_client(&ntb_transport_client); + if (rc) + goto err_client; + + return 0; + +err_client: + bus_unregister(&ntb_transport_bus); +err_bus: + debugfs_remove_recursive(nt_debugfs_dir); + return rc; +} +module_init(ntb_transport_init); + +static void __exit ntb_transport_exit(void) +{ + debugfs_remove_recursive(nt_debugfs_dir); + + ntb_unregister_client(&ntb_transport_client); + bus_unregister(&ntb_transport_bus); +} +module_exit(ntb_transport_exit); diff --git a/drivers/ntb/test/Kconfig b/drivers/ntb/test/Kconfig new file mode 100644 index 000000000000..01852f98a843 --- /dev/null +++ b/drivers/ntb/test/Kconfig @@ -0,0 +1,19 @@ +config NTB_PINGPONG + tristate "NTB Ping Pong Test Client" + help + This is a simple ping pong driver that exercises the scratchpads and + doorbells of the ntb hardware. This driver may be used to test that + your ntb hardware and drivers are functioning at a basic level. + + If unsure, say N. + +config NTB_TOOL + tristate "NTB Debugging Tool Test Client" + help + This is a simple debugging driver that enables the doorbell and + scratchpad registers to be read and written from the debugfs. This + enables more complicated debugging to be scripted from user space. + This driver may be used to test that your ntb hardware and drivers are + functioning at a basic level. + + If unsure, say N. diff --git a/drivers/ntb/test/Makefile b/drivers/ntb/test/Makefile new file mode 100644 index 000000000000..0ea32a324b6c --- /dev/null +++ b/drivers/ntb/test/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_NTB_PINGPONG) += ntb_pingpong.o +obj-$(CONFIG_NTB_TOOL) += ntb_tool.o diff --git a/drivers/ntb/test/ntb_pingpong.c b/drivers/ntb/test/ntb_pingpong.c new file mode 100644 index 000000000000..fe1600566981 --- /dev/null +++ b/drivers/ntb/test/ntb_pingpong.c @@ -0,0 +1,250 @@ +/* + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copy + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * PCIe NTB Pingpong Linux driver + * + * Contact Information: + * Allen Hubbe <Allen.Hubbe@emc.com> + */ + +/* Note: load this module with option 'dyndbg=+p' */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#include <linux/dma-mapping.h> +#include <linux/pci.h> +#include <linux/slab.h> +#include <linux/spinlock.h> + +#include <linux/ntb.h> + +#define DRIVER_NAME "ntb_pingpong" +#define DRIVER_DESCRIPTION "PCIe NTB Simple Pingpong Client" + +#define DRIVER_LICENSE "Dual BSD/GPL" +#define DRIVER_VERSION "1.0" +#define DRIVER_RELDATE "24 March 2015" +#define DRIVER_AUTHOR "Allen Hubbe <Allen.Hubbe@emc.com>" + +MODULE_LICENSE(DRIVER_LICENSE); +MODULE_VERSION(DRIVER_VERSION); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESCRIPTION); + +static unsigned int unsafe; +module_param(unsafe, uint, 0644); +MODULE_PARM_DESC(unsafe, "Run even though ntb operations may be unsafe"); + +static unsigned int delay_ms = 1000; +module_param(delay_ms, uint, 0644); +MODULE_PARM_DESC(delay_ms, "Milliseconds to delay the response to peer"); + +static unsigned long db_init = 0x7; +module_param(db_init, ulong, 0644); +MODULE_PARM_DESC(delay_ms, "Initial doorbell bits to ring on the peer"); + +struct pp_ctx { + struct ntb_dev *ntb; + u64 db_bits; + /* synchronize access to db_bits by ping and pong */ + spinlock_t db_lock; + struct timer_list db_timer; + unsigned long db_delay; +}; + +static void pp_ping(unsigned long ctx) +{ + struct pp_ctx *pp = (void *)ctx; + unsigned long irqflags; + u64 db_bits, db_mask; + u32 spad_rd, spad_wr; + + spin_lock_irqsave(&pp->db_lock, irqflags); + { + db_mask = ntb_db_valid_mask(pp->ntb); + db_bits = ntb_db_read(pp->ntb); + + if (db_bits) { + dev_dbg(&pp->ntb->dev, + "Masked pongs %#llx\n", + db_bits); + ntb_db_clear(pp->ntb, db_bits); + } + + db_bits = ((pp->db_bits | db_bits) << 1) & db_mask; + + if (!db_bits) + db_bits = db_init; + + spad_rd = ntb_spad_read(pp->ntb, 0); + spad_wr = spad_rd + 1; + + dev_dbg(&pp->ntb->dev, + "Ping bits %#llx read %#x write %#x\n", + db_bits, spad_rd, spad_wr); + + ntb_peer_spad_write(pp->ntb, 0, spad_wr); + ntb_peer_db_set(pp->ntb, db_bits); + ntb_db_clear_mask(pp->ntb, db_mask); + + pp->db_bits = 0; + } + spin_unlock_irqrestore(&pp->db_lock, irqflags); +} + +static void pp_link_event(void *ctx) +{ + struct pp_ctx *pp = ctx; + + if (ntb_link_is_up(pp->ntb, NULL, NULL) == 1) { + dev_dbg(&pp->ntb->dev, "link is up\n"); + pp_ping((unsigned long)pp); + } else { + dev_dbg(&pp->ntb->dev, "link is down\n"); + del_timer(&pp->db_timer); + } +} + +static void pp_db_event(void *ctx, int vec) +{ + struct pp_ctx *pp = ctx; + u64 db_bits, db_mask; + unsigned long irqflags; + + spin_lock_irqsave(&pp->db_lock, irqflags); + { + db_mask = ntb_db_vector_mask(pp->ntb, vec); + db_bits = db_mask & ntb_db_read(pp->ntb); + ntb_db_set_mask(pp->ntb, db_mask); + ntb_db_clear(pp->ntb, db_bits); + + pp->db_bits |= db_bits; + + mod_timer(&pp->db_timer, jiffies + pp->db_delay); + + dev_dbg(&pp->ntb->dev, + "Pong vec %d bits %#llx\n", + vec, db_bits); + } + spin_unlock_irqrestore(&pp->db_lock, irqflags); +} + +static const struct ntb_ctx_ops pp_ops = { + .link_event = pp_link_event, + .db_event = pp_db_event, +}; + +static int pp_probe(struct ntb_client *client, + struct ntb_dev *ntb) +{ + struct pp_ctx *pp; + int rc; + + if (ntb_db_is_unsafe(ntb)) { + dev_dbg(&ntb->dev, "doorbell is unsafe\n"); + if (!unsafe) { + rc = -EINVAL; + goto err_pp; + } + } + + if (ntb_spad_is_unsafe(ntb)) { + dev_dbg(&ntb->dev, "scratchpad is unsafe\n"); + if (!unsafe) { + rc = -EINVAL; + goto err_pp; + } + } + + pp = kmalloc(sizeof(*pp), GFP_KERNEL); + if (!pp) { + rc = -ENOMEM; + goto err_pp; + } + + pp->ntb = ntb; + pp->db_bits = 0; + spin_lock_init(&pp->db_lock); + setup_timer(&pp->db_timer, pp_ping, (unsigned long)pp); + pp->db_delay = msecs_to_jiffies(delay_ms); + + rc = ntb_set_ctx(ntb, pp, &pp_ops); + if (rc) + goto err_ctx; + + ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); + ntb_link_event(ntb); + + return 0; + +err_ctx: + kfree(pp); +err_pp: + return rc; +} + +static void pp_remove(struct ntb_client *client, + struct ntb_dev *ntb) +{ + struct pp_ctx *pp = ntb->ctx; + + ntb_clear_ctx(ntb); + del_timer_sync(&pp->db_timer); + ntb_link_disable(ntb); + + kfree(pp); +} + +static struct ntb_client pp_client = { + .ops = { + .probe = pp_probe, + .remove = pp_remove, + }, +}; +module_ntb_client(pp_client); diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c new file mode 100644 index 000000000000..6f5dc6ca673d --- /dev/null +++ b/drivers/ntb/test/ntb_tool.c @@ -0,0 +1,556 @@ +/* + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copy + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * PCIe NTB Debugging Tool Linux driver + * + * Contact Information: + * Allen Hubbe <Allen.Hubbe@emc.com> + */ + +/* + * How to use this tool, by example. + * + * Assuming $DBG_DIR is something like: + * '/sys/kernel/debug/ntb_tool/0000:00:03.0' + * + * Eg: check if clearing the doorbell mask generates an interrupt. + * + * # Set the doorbell mask + * root@self# echo 's 1' > $DBG_DIR/mask + * + * # Ring the doorbell from the peer + * root@peer# echo 's 1' > $DBG_DIR/peer_db + * + * # Clear the doorbell mask + * root@self# echo 'c 1' > $DBG_DIR/mask + * + * Observe debugging output in dmesg or your console. You should see a + * doorbell event triggered by clearing the mask. If not, this may indicate an + * issue with the hardware that needs to be worked around in the driver. + * + * Eg: read and write scratchpad registers + * + * root@peer# echo '0 0x01010101 1 0x7f7f7f7f' > $DBG_DIR/peer_spad + * + * root@self# cat $DBG_DIR/spad + * + * Observe that spad 0 and 1 have the values set by the peer. + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#include <linux/debugfs.h> +#include <linux/dma-mapping.h> +#include <linux/pci.h> +#include <linux/slab.h> + +#include <linux/ntb.h> + +#define DRIVER_NAME "ntb_tool" +#define DRIVER_DESCRIPTION "PCIe NTB Debugging Tool" + +#define DRIVER_LICENSE "Dual BSD/GPL" +#define DRIVER_VERSION "1.0" +#define DRIVER_RELDATE "22 April 2015" +#define DRIVER_AUTHOR "Allen Hubbe <Allen.Hubbe@emc.com>" + +MODULE_LICENSE(DRIVER_LICENSE); +MODULE_VERSION(DRIVER_VERSION); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESCRIPTION); + +static struct dentry *tool_dbgfs; + +struct tool_ctx { + struct ntb_dev *ntb; + struct dentry *dbgfs; +}; + +#define SPAD_FNAME_SIZE 0x10 +#define INT_PTR(x) ((void *)(unsigned long)x) +#define PTR_INT(x) ((int)(unsigned long)x) + +#define TOOL_FOPS_RDWR(__name, __read, __write) \ + const struct file_operations __name = { \ + .owner = THIS_MODULE, \ + .open = simple_open, \ + .read = __read, \ + .write = __write, \ + } + +static void tool_link_event(void *ctx) +{ + struct tool_ctx *tc = ctx; + enum ntb_speed speed; + enum ntb_width width; + int up; + + up = ntb_link_is_up(tc->ntb, &speed, &width); + + dev_dbg(&tc->ntb->dev, "link is %s speed %d width %d\n", + up ? "up" : "down", speed, width); +} + +static void tool_db_event(void *ctx, int vec) +{ + struct tool_ctx *tc = ctx; + u64 db_bits, db_mask; + + db_mask = ntb_db_vector_mask(tc->ntb, vec); + db_bits = ntb_db_read(tc->ntb); + + dev_dbg(&tc->ntb->dev, "doorbell vec %d mask %#llx bits %#llx\n", + vec, db_mask, db_bits); +} + +static const struct ntb_ctx_ops tool_ops = { + .link_event = tool_link_event, + .db_event = tool_db_event, +}; + +static ssize_t tool_dbfn_read(struct tool_ctx *tc, char __user *ubuf, + size_t size, loff_t *offp, + u64 (*db_read_fn)(struct ntb_dev *)) +{ + size_t buf_size; + char *buf; + ssize_t pos, rc; + + if (!db_read_fn) + return -EINVAL; + + buf_size = min_t(size_t, size, 0x20); + + buf = kmalloc(buf_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + pos = scnprintf(buf, buf_size, "%#llx\n", + db_read_fn(tc->ntb)); + + rc = simple_read_from_buffer(ubuf, size, offp, buf, pos); + + kfree(buf); + + return rc; +} + +static ssize_t tool_dbfn_write(struct tool_ctx *tc, + const char __user *ubuf, + size_t size, loff_t *offp, + int (*db_set_fn)(struct ntb_dev *, u64), + int (*db_clear_fn)(struct ntb_dev *, u64)) +{ + u64 db_bits; + char *buf, cmd; + ssize_t rc; + int n; + + buf = kmalloc(size + 1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + rc = simple_write_to_buffer(buf, size, offp, ubuf, size); + if (rc < 0) { + kfree(buf); + return rc; + } + + buf[size] = 0; + + n = sscanf(buf, "%c %lli", &cmd, &db_bits); + + kfree(buf); + + if (n != 2) { + rc = -EINVAL; + } else if (cmd == 's') { + if (!db_set_fn) + rc = -EINVAL; + else + rc = db_set_fn(tc->ntb, db_bits); + } else if (cmd == 'c') { + if (!db_clear_fn) + rc = -EINVAL; + else + rc = db_clear_fn(tc->ntb, db_bits); + } else { + rc = -EINVAL; + } + + return rc ? : size; +} + +static ssize_t tool_spadfn_read(struct tool_ctx *tc, char __user *ubuf, + size_t size, loff_t *offp, + u32 (*spad_read_fn)(struct ntb_dev *, int)) +{ + size_t buf_size; + char *buf; + ssize_t pos, rc; + int i, spad_count; + + if (!spad_read_fn) + return -EINVAL; + + buf_size = min_t(size_t, size, 0x100); + + buf = kmalloc(buf_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + pos = 0; + + spad_count = ntb_spad_count(tc->ntb); + for (i = 0; i < spad_count; ++i) { + pos += scnprintf(buf + pos, buf_size - pos, "%d\t%#x\n", + i, spad_read_fn(tc->ntb, i)); + } + + rc = simple_read_from_buffer(ubuf, size, offp, buf, pos); + + kfree(buf); + + return rc; +} + +static ssize_t tool_spadfn_write(struct tool_ctx *tc, + const char __user *ubuf, + size_t size, loff_t *offp, + int (*spad_write_fn)(struct ntb_dev *, + int, u32)) +{ + int spad_idx; + u32 spad_val; + char *buf; + int pos, n; + ssize_t rc; + + if (!spad_write_fn) { + dev_dbg(&tc->ntb->dev, "no spad write fn\n"); + return -EINVAL; + } + + buf = kmalloc(size + 1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + rc = simple_write_to_buffer(buf, size, offp, ubuf, size); + if (rc < 0) { + kfree(buf); + return rc; + } + + buf[size] = 0; + + n = sscanf(buf, "%d %i%n", &spad_idx, &spad_val, &pos); + while (n == 2) { + rc = spad_write_fn(tc->ntb, spad_idx, spad_val); + if (rc) + break; + + n = sscanf(buf + pos, "%d %i%n", &spad_idx, &spad_val, &pos); + } + + if (n < 0) + rc = n; + + kfree(buf); + + return rc ? : size; +} + +static ssize_t tool_db_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_dbfn_read(tc, ubuf, size, offp, + tc->ntb->ops->db_read); +} + +static ssize_t tool_db_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_dbfn_write(tc, ubuf, size, offp, + tc->ntb->ops->db_set, + tc->ntb->ops->db_clear); +} + +static TOOL_FOPS_RDWR(tool_db_fops, + tool_db_read, + tool_db_write); + +static ssize_t tool_mask_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_dbfn_read(tc, ubuf, size, offp, + tc->ntb->ops->db_read_mask); +} + +static ssize_t tool_mask_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_dbfn_write(tc, ubuf, size, offp, + tc->ntb->ops->db_set_mask, + tc->ntb->ops->db_clear_mask); +} + +static TOOL_FOPS_RDWR(tool_mask_fops, + tool_mask_read, + tool_mask_write); + +static ssize_t tool_peer_db_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_dbfn_read(tc, ubuf, size, offp, + tc->ntb->ops->peer_db_read); +} + +static ssize_t tool_peer_db_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_dbfn_write(tc, ubuf, size, offp, + tc->ntb->ops->peer_db_set, + tc->ntb->ops->peer_db_clear); +} + +static TOOL_FOPS_RDWR(tool_peer_db_fops, + tool_peer_db_read, + tool_peer_db_write); + +static ssize_t tool_peer_mask_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_dbfn_read(tc, ubuf, size, offp, + tc->ntb->ops->peer_db_read_mask); +} + +static ssize_t tool_peer_mask_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_dbfn_write(tc, ubuf, size, offp, + tc->ntb->ops->peer_db_set_mask, + tc->ntb->ops->peer_db_clear_mask); +} + +static TOOL_FOPS_RDWR(tool_peer_mask_fops, + tool_peer_mask_read, + tool_peer_mask_write); + +static ssize_t tool_spad_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_spadfn_read(tc, ubuf, size, offp, + tc->ntb->ops->spad_read); +} + +static ssize_t tool_spad_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_spadfn_write(tc, ubuf, size, offp, + tc->ntb->ops->spad_write); +} + +static TOOL_FOPS_RDWR(tool_spad_fops, + tool_spad_read, + tool_spad_write); + +static ssize_t tool_peer_spad_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_spadfn_read(tc, ubuf, size, offp, + tc->ntb->ops->peer_spad_read); +} + +static ssize_t tool_peer_spad_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_spadfn_write(tc, ubuf, size, offp, + tc->ntb->ops->peer_spad_write); +} + +static TOOL_FOPS_RDWR(tool_peer_spad_fops, + tool_peer_spad_read, + tool_peer_spad_write); + +static void tool_setup_dbgfs(struct tool_ctx *tc) +{ + /* This modules is useless without dbgfs... */ + if (!tool_dbgfs) { + tc->dbgfs = NULL; + return; + } + + tc->dbgfs = debugfs_create_dir(dev_name(&tc->ntb->dev), + tool_dbgfs); + if (!tc->dbgfs) + return; + + debugfs_create_file("db", S_IRUSR | S_IWUSR, tc->dbgfs, + tc, &tool_db_fops); + + debugfs_create_file("mask", S_IRUSR | S_IWUSR, tc->dbgfs, + tc, &tool_mask_fops); + + debugfs_create_file("peer_db", S_IRUSR | S_IWUSR, tc->dbgfs, + tc, &tool_peer_db_fops); + + debugfs_create_file("peer_mask", S_IRUSR | S_IWUSR, tc->dbgfs, + tc, &tool_peer_mask_fops); + + debugfs_create_file("spad", S_IRUSR | S_IWUSR, tc->dbgfs, + tc, &tool_spad_fops); + + debugfs_create_file("peer_spad", S_IRUSR | S_IWUSR, tc->dbgfs, + tc, &tool_peer_spad_fops); +} + +static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb) +{ + struct tool_ctx *tc; + int rc; + + if (ntb_db_is_unsafe(ntb)) + dev_dbg(&ntb->dev, "doorbell is unsafe\n"); + + if (ntb_spad_is_unsafe(ntb)) + dev_dbg(&ntb->dev, "scratchpad is unsafe\n"); + + tc = kmalloc(sizeof(*tc), GFP_KERNEL); + if (!tc) { + rc = -ENOMEM; + goto err_tc; + } + + tc->ntb = ntb; + + tool_setup_dbgfs(tc); + + rc = ntb_set_ctx(ntb, tc, &tool_ops); + if (rc) + goto err_ctx; + + ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); + ntb_link_event(ntb); + + return 0; + +err_ctx: + debugfs_remove_recursive(tc->dbgfs); + kfree(tc); +err_tc: + return rc; +} + +static void tool_remove(struct ntb_client *self, struct ntb_dev *ntb) +{ + struct tool_ctx *tc = ntb->ctx; + + ntb_clear_ctx(ntb); + ntb_link_disable(ntb); + + debugfs_remove_recursive(tc->dbgfs); + kfree(tc); +} + +static struct ntb_client tool_client = { + .ops = { + .probe = tool_probe, + .remove = tool_remove, + }, +}; + +static int __init tool_init(void) +{ + int rc; + + if (debugfs_initialized()) + tool_dbgfs = debugfs_create_dir(KBUILD_MODNAME, NULL); + + rc = ntb_register_client(&tool_client); + if (rc) + goto err_client; + + return 0; + +err_client: + debugfs_remove_recursive(tool_dbgfs); + return rc; +} +module_init(tool_init); + +static void __exit tool_exit(void) +{ + ntb_unregister_client(&tool_client); + debugfs_remove_recursive(tool_dbgfs); +} +module_exit(tool_exit); diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 8eb22c0ca7ce..7e2c43f701bc 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -535,8 +535,6 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, __func__, dimm_name, cmd_name, i); return -ENXIO; } - if (!access_ok(VERIFY_READ, p + in_len, in_size)) - return -EFAULT; if (in_len < sizeof(in_env)) copy = min_t(u32, sizeof(in_env) - in_len, in_size); else @@ -557,8 +555,6 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, __func__, dimm_name, cmd_name, i); return -EFAULT; } - if (!access_ok(VERIFY_WRITE, p + in_len + out_len, out_size)) - return -EFAULT; if (out_len < sizeof(out_env)) copy = min_t(u32, sizeof(out_env) - out_len, out_size); else @@ -570,9 +566,6 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, } buf_len = out_len + in_len; - if (!access_ok(VERIFY_WRITE, p, sizeof(buf_len))) - return -EFAULT; - if (buf_len > ND_IOCTL_MAX_BUFLEN) { dev_dbg(dev, "%s:%s cmd: %s buf_len: %zu > %d\n", __func__, dimm_name, cmd_name, buf_len, @@ -706,8 +699,10 @@ int __init nvdimm_bus_init(void) nvdimm_major = rc; nd_class = class_create(THIS_MODULE, "nd"); - if (IS_ERR(nd_class)) + if (IS_ERR(nd_class)) { + rc = PTR_ERR(nd_class); goto err_class; + } return 0; diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 1e2f57f6d297..6dc13e4de396 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -912,4 +912,11 @@ config PVPANIC a paravirtualized device provided by QEMU; it lets a virtual machine (guest) communicate panic events to the host. +config INTEL_PMC_IPC + tristate "Intel PMC IPC Driver" + ---help--- + This driver provides support for PMC control on some Intel platforms. + The PMC is an ARC processor which defines IPC commands for communication + with other entities in the CPU. + endif # X86_PLATFORM_DEVICES diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile index b3e54ed863c3..dda95a985321 100644 --- a/drivers/platform/x86/Makefile +++ b/drivers/platform/x86/Makefile @@ -59,3 +59,4 @@ obj-$(CONFIG_INTEL_SMARTCONNECT) += intel-smartconnect.o obj-$(CONFIG_PVPANIC) += pvpanic.o obj-$(CONFIG_ALIENWARE_WMI) += alienware-wmi.o +obj-$(CONFIG_INTEL_PMC_IPC) += intel_pmc_ipc.o diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index 35de903cb506..ed317ccac4a2 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -307,7 +307,6 @@ static const struct dmi_system_id dell_quirks[] __initconst = { }; static struct calling_interface_buffer *buffer; -static struct page *bufferpage; static DEFINE_MUTEX(buffer_mutex); static int hwswitch_state; @@ -424,45 +423,125 @@ static inline int dell_smi_error(int value) } } -/* Derived from information in DellWirelessCtl.cpp: - Class 17, select 11 is radio control. It returns an array of 32-bit values. - - Input byte 0 = 0: Wireless information - - result[0]: return code - result[1]: - Bit 0: Hardware switch supported - Bit 1: Wifi locator supported - Bit 2: Wifi is supported - Bit 3: Bluetooth is supported - Bit 4: WWAN is supported - Bit 5: Wireless keyboard supported - Bits 6-7: Reserved - Bit 8: Wifi is installed - Bit 9: Bluetooth is installed - Bit 10: WWAN is installed - Bits 11-15: Reserved - Bit 16: Hardware switch is on - Bit 17: Wifi is blocked - Bit 18: Bluetooth is blocked - Bit 19: WWAN is blocked - Bits 20-31: Reserved - result[2]: NVRAM size in bytes - result[3]: NVRAM format version number - - Input byte 0 = 2: Wireless switch configuration - result[0]: return code - result[1]: - Bit 0: Wifi controlled by switch - Bit 1: Bluetooth controlled by switch - Bit 2: WWAN controlled by switch - Bits 3-6: Reserved - Bit 7: Wireless switch config locked - Bit 8: Wifi locator enabled - Bits 9-14: Reserved - Bit 15: Wifi locator setting locked - Bits 16-31: Reserved -*/ +/* + * Derived from information in smbios-wireless-ctl: + * + * cbSelect 17, Value 11 + * + * Return Wireless Info + * cbArg1, byte0 = 0x00 + * + * cbRes1 Standard return codes (0, -1, -2) + * cbRes2 Info bit flags: + * + * 0 Hardware switch supported (1) + * 1 WiFi locator supported (1) + * 2 WLAN supported (1) + * 3 Bluetooth (BT) supported (1) + * 4 WWAN supported (1) + * 5 Wireless KBD supported (1) + * 6 Uw b supported (1) + * 7 WiGig supported (1) + * 8 WLAN installed (1) + * 9 BT installed (1) + * 10 WWAN installed (1) + * 11 Uw b installed (1) + * 12 WiGig installed (1) + * 13-15 Reserved (0) + * 16 Hardware (HW) switch is On (1) + * 17 WLAN disabled (1) + * 18 BT disabled (1) + * 19 WWAN disabled (1) + * 20 Uw b disabled (1) + * 21 WiGig disabled (1) + * 20-31 Reserved (0) + * + * cbRes3 NVRAM size in bytes + * cbRes4, byte 0 NVRAM format version number + * + * + * Set QuickSet Radio Disable Flag + * cbArg1, byte0 = 0x01 + * cbArg1, byte1 + * Radio ID value: + * 0 Radio Status + * 1 WLAN ID + * 2 BT ID + * 3 WWAN ID + * 4 UWB ID + * 5 WIGIG ID + * cbArg1, byte2 Flag bits: + * 0 QuickSet disables radio (1) + * 1-7 Reserved (0) + * + * cbRes1 Standard return codes (0, -1, -2) + * cbRes2 QuickSet (QS) radio disable bit map: + * 0 QS disables WLAN + * 1 QS disables BT + * 2 QS disables WWAN + * 3 QS disables UWB + * 4 QS disables WIGIG + * 5-31 Reserved (0) + * + * Wireless Switch Configuration + * cbArg1, byte0 = 0x02 + * + * cbArg1, byte1 + * Subcommand: + * 0 Get config + * 1 Set config + * 2 Set WiFi locator enable/disable + * cbArg1,byte2 + * Switch settings (if byte 1==1): + * 0 WLAN sw itch control (1) + * 1 BT sw itch control (1) + * 2 WWAN sw itch control (1) + * 3 UWB sw itch control (1) + * 4 WiGig sw itch control (1) + * 5-7 Reserved (0) + * cbArg1, byte2 Enable bits (if byte 1==2): + * 0 Enable WiFi locator (1) + * + * cbRes1 Standard return codes (0, -1, -2) + * cbRes2 QuickSet radio disable bit map: + * 0 WLAN controlled by sw itch (1) + * 1 BT controlled by sw itch (1) + * 2 WWAN controlled by sw itch (1) + * 3 UWB controlled by sw itch (1) + * 4 WiGig controlled by sw itch (1) + * 5-6 Reserved (0) + * 7 Wireless sw itch config locked (1) + * 8 WiFi locator enabled (1) + * 9-14 Reserved (0) + * 15 WiFi locator setting locked (1) + * 16-31 Reserved (0) + * + * Read Local Config Data (LCD) + * cbArg1, byte0 = 0x10 + * cbArg1, byte1 NVRAM index low byte + * cbArg1, byte2 NVRAM index high byte + * cbRes1 Standard return codes (0, -1, -2) + * cbRes2 4 bytes read from LCD[index] + * cbRes3 4 bytes read from LCD[index+4] + * cbRes4 4 bytes read from LCD[index+8] + * + * Write Local Config Data (LCD) + * cbArg1, byte0 = 0x11 + * cbArg1, byte1 NVRAM index low byte + * cbArg1, byte2 NVRAM index high byte + * cbArg2 4 bytes to w rite at LCD[index] + * cbArg3 4 bytes to w rite at LCD[index+4] + * cbArg4 4 bytes to w rite at LCD[index+8] + * cbRes1 Standard return codes (0, -1, -2) + * + * Populate Local Config Data from NVRAM + * cbArg1, byte0 = 0x12 + * cbRes1 Standard return codes (0, -1, -2) + * + * Commit Local Config Data to NVRAM + * cbArg1, byte0 = 0x13 + * cbRes1 Standard return codes (0, -1, -2) + */ static int dell_rfkill_set(void *data, bool blocked) { @@ -550,12 +629,21 @@ static int dell_debugfs_show(struct seq_file *s, void *data) (status & BIT(4)) >> 4); seq_printf(s, "Bit 5 : Wireless keyboard supported: %lu\n", (status & BIT(5)) >> 5); + seq_printf(s, "Bit 6 : UWB supported: %lu\n", + (status & BIT(6)) >> 6); + seq_printf(s, "Bit 7 : WiGig supported: %lu\n", + (status & BIT(7)) >> 7); seq_printf(s, "Bit 8 : Wifi is installed: %lu\n", (status & BIT(8)) >> 8); seq_printf(s, "Bit 9 : Bluetooth is installed: %lu\n", (status & BIT(9)) >> 9); seq_printf(s, "Bit 10: WWAN is installed: %lu\n", (status & BIT(10)) >> 10); + seq_printf(s, "Bit 11: UWB installed: %lu\n", + (status & BIT(11)) >> 11); + seq_printf(s, "Bit 12: WiGig installed: %lu\n", + (status & BIT(12)) >> 12); + seq_printf(s, "Bit 16: Hardware switch is on: %lu\n", (status & BIT(16)) >> 16); seq_printf(s, "Bit 17: Wifi is blocked: %lu\n", @@ -564,6 +652,10 @@ static int dell_debugfs_show(struct seq_file *s, void *data) (status & BIT(18)) >> 18); seq_printf(s, "Bit 19: WWAN is blocked: %lu\n", (status & BIT(19)) >> 19); + seq_printf(s, "Bit 20: UWB is blocked: %lu\n", + (status & BIT(20)) >> 20); + seq_printf(s, "Bit 21: WiGig is blocked: %lu\n", + (status & BIT(21)) >> 21); seq_printf(s, "\nhwswitch_state:\t0x%X\n", hwswitch_state); seq_printf(s, "Bit 0 : Wifi controlled by switch: %lu\n", @@ -572,6 +664,10 @@ static int dell_debugfs_show(struct seq_file *s, void *data) (hwswitch_state & BIT(1)) >> 1); seq_printf(s, "Bit 2 : WWAN controlled by switch: %lu\n", (hwswitch_state & BIT(2)) >> 2); + seq_printf(s, "Bit 3 : UWB controlled by switch: %lu\n", + (hwswitch_state & BIT(3)) >> 3); + seq_printf(s, "Bit 4 : WiGig controlled by switch: %lu\n", + (hwswitch_state & BIT(4)) >> 4); seq_printf(s, "Bit 7 : Wireless switch config locked: %lu\n", (hwswitch_state & BIT(7)) >> 7); seq_printf(s, "Bit 8 : Wifi locator enabled: %lu\n", @@ -1972,12 +2068,11 @@ static int __init dell_init(void) * Allocate buffer below 4GB for SMI data--only 32-bit physical addr * is passed to SMI handler. */ - bufferpage = alloc_page(GFP_KERNEL | GFP_DMA32); - if (!bufferpage) { + buffer = (void *)__get_free_page(GFP_KERNEL | GFP_DMA32); + if (!buffer) { ret = -ENOMEM; goto fail_buffer; } - buffer = page_address(bufferpage); ret = dell_setup_rfkill(); @@ -2034,7 +2129,7 @@ static int __init dell_init(void) fail_backlight: dell_cleanup_rfkill(); fail_rfkill: - free_page((unsigned long)bufferpage); + free_page((unsigned long)buffer); fail_buffer: platform_device_del(platform_device); fail_platform_device2: diff --git a/drivers/platform/x86/intel_pmc_ipc.c b/drivers/platform/x86/intel_pmc_ipc.c new file mode 100644 index 000000000000..d734763dab69 --- /dev/null +++ b/drivers/platform/x86/intel_pmc_ipc.c @@ -0,0 +1,767 @@ +/* + * intel_pmc_ipc.c: Driver for the Intel PMC IPC mechanism + * + * (C) Copyright 2014-2015 Intel Corporation + * + * This driver is based on Intel SCU IPC driver(intel_scu_opc.c) by + * Sreedhara DS <sreedhara.ds@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + * + * PMC running in ARC processor communicates with other entity running in IA + * core through IPC mechanism which in turn messaging between IA core ad PMC. + */ + +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/device.h> +#include <linux/pm.h> +#include <linux/pci.h> +#include <linux/platform_device.h> +#include <linux/interrupt.h> +#include <linux/pm_qos.h> +#include <linux/kernel.h> +#include <linux/bitops.h> +#include <linux/sched.h> +#include <linux/atomic.h> +#include <linux/notifier.h> +#include <linux/suspend.h> +#include <linux/acpi.h> +#include <asm/intel_pmc_ipc.h> +#include <linux/mfd/lpc_ich.h> + +/* + * IPC registers + * The IA write to IPC_CMD command register triggers an interrupt to the ARC, + * The ARC handles the interrupt and services it, writing optional data to + * the IPC1 registers, updates the IPC_STS response register with the status. + */ +#define IPC_CMD 0x0 +#define IPC_CMD_MSI 0x100 +#define IPC_CMD_SIZE 16 +#define IPC_CMD_SUBCMD 12 +#define IPC_STATUS 0x04 +#define IPC_STATUS_IRQ 0x4 +#define IPC_STATUS_ERR 0x2 +#define IPC_STATUS_BUSY 0x1 +#define IPC_SPTR 0x08 +#define IPC_DPTR 0x0C +#define IPC_WRITE_BUFFER 0x80 +#define IPC_READ_BUFFER 0x90 + +/* + * 16-byte buffer for sending data associated with IPC command. + */ +#define IPC_DATA_BUFFER_SIZE 16 + +#define IPC_LOOP_CNT 3000000 +#define IPC_MAX_SEC 3 + +#define IPC_TRIGGER_MODE_IRQ true + +/* exported resources from IFWI */ +#define PLAT_RESOURCE_IPC_INDEX 0 +#define PLAT_RESOURCE_IPC_SIZE 0x1000 +#define PLAT_RESOURCE_GCR_SIZE 0x1000 +#define PLAT_RESOURCE_PUNIT_DATA_INDEX 1 +#define PLAT_RESOURCE_PUNIT_INTER_INDEX 2 +#define PLAT_RESOURCE_ACPI_IO_INDEX 0 + +/* + * BIOS does not create an ACPI device for each PMC function, + * but exports multiple resources from one ACPI device(IPC) for + * multiple functions. This driver is responsible to create a + * platform device and to export resources for those functions. + */ +#define TCO_DEVICE_NAME "iTCO_wdt" +#define SMI_EN_OFFSET 0x30 +#define SMI_EN_SIZE 4 +#define TCO_BASE_OFFSET 0x60 +#define TCO_REGS_SIZE 16 +#define PUNIT_DEVICE_NAME "intel_punit_ipc" + +static const int iTCO_version = 3; + +static struct intel_pmc_ipc_dev { + struct device *dev; + void __iomem *ipc_base; + bool irq_mode; + int irq; + int cmd; + struct completion cmd_complete; + + /* The following PMC BARs share the same ACPI device with the IPC */ + void *acpi_io_base; + int acpi_io_size; + struct platform_device *tco_dev; + + /* gcr */ + void *gcr_base; + int gcr_size; + + /* punit */ + void *punit_base; + int punit_size; + void *punit_base2; + int punit_size2; + struct platform_device *punit_dev; +} ipcdev; + +static char *ipc_err_sources[] = { + [IPC_ERR_NONE] = + "no error", + [IPC_ERR_CMD_NOT_SUPPORTED] = + "command not supported", + [IPC_ERR_CMD_NOT_SERVICED] = + "command not serviced", + [IPC_ERR_UNABLE_TO_SERVICE] = + "unable to service", + [IPC_ERR_CMD_INVALID] = + "command invalid", + [IPC_ERR_CMD_FAILED] = + "command failed", + [IPC_ERR_EMSECURITY] = + "Invalid Battery", + [IPC_ERR_UNSIGNEDKERNEL] = + "Unsigned kernel", +}; + +/* Prevent concurrent calls to the PMC */ +static DEFINE_MUTEX(ipclock); + +static inline void ipc_send_command(u32 cmd) +{ + ipcdev.cmd = cmd; + if (ipcdev.irq_mode) { + reinit_completion(&ipcdev.cmd_complete); + cmd |= IPC_CMD_MSI; + } + writel(cmd, ipcdev.ipc_base + IPC_CMD); +} + +static inline u32 ipc_read_status(void) +{ + return readl(ipcdev.ipc_base + IPC_STATUS); +} + +static inline void ipc_data_writel(u32 data, u32 offset) +{ + writel(data, ipcdev.ipc_base + IPC_WRITE_BUFFER + offset); +} + +static inline u8 ipc_data_readb(u32 offset) +{ + return readb(ipcdev.ipc_base + IPC_READ_BUFFER + offset); +} + +static inline u32 ipc_data_readl(u32 offset) +{ + return readl(ipcdev.ipc_base + IPC_READ_BUFFER + offset); +} + +static int intel_pmc_ipc_check_status(void) +{ + int status; + int ret = 0; + + if (ipcdev.irq_mode) { + if (0 == wait_for_completion_timeout( + &ipcdev.cmd_complete, IPC_MAX_SEC * HZ)) + ret = -ETIMEDOUT; + } else { + int loop_count = IPC_LOOP_CNT; + + while ((ipc_read_status() & IPC_STATUS_BUSY) && --loop_count) + udelay(1); + if (loop_count == 0) + ret = -ETIMEDOUT; + } + + status = ipc_read_status(); + if (ret == -ETIMEDOUT) { + dev_err(ipcdev.dev, + "IPC timed out, TS=0x%x, CMD=0x%x\n", + status, ipcdev.cmd); + return ret; + } + + if (status & IPC_STATUS_ERR) { + int i; + + ret = -EIO; + i = (status >> IPC_CMD_SIZE) & 0xFF; + if (i < ARRAY_SIZE(ipc_err_sources)) + dev_err(ipcdev.dev, + "IPC failed: %s, STS=0x%x, CMD=0x%x\n", + ipc_err_sources[i], status, ipcdev.cmd); + else + dev_err(ipcdev.dev, + "IPC failed: unknown, STS=0x%x, CMD=0x%x\n", + status, ipcdev.cmd); + if ((i == IPC_ERR_UNSIGNEDKERNEL) || (i == IPC_ERR_EMSECURITY)) + ret = -EACCES; + } + + return ret; +} + +/* + * intel_pmc_ipc_simple_command + * @cmd: command + * @sub: sub type + */ +int intel_pmc_ipc_simple_command(int cmd, int sub) +{ + int ret; + + mutex_lock(&ipclock); + if (ipcdev.dev == NULL) { + mutex_unlock(&ipclock); + return -ENODEV; + } + ipc_send_command(sub << IPC_CMD_SUBCMD | cmd); + ret = intel_pmc_ipc_check_status(); + mutex_unlock(&ipclock); + + return ret; +} +EXPORT_SYMBOL_GPL(intel_pmc_ipc_simple_command); + +/* + * intel_pmc_ipc_raw_cmd + * @cmd: command + * @sub: sub type + * @in: input data + * @inlen: input length in bytes + * @out: output data + * @outlen: output length in dwords + * @sptr: data writing to SPTR register + * @dptr: data writing to DPTR register + */ +int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen, u32 *out, + u32 outlen, u32 dptr, u32 sptr) +{ + u32 wbuf[4] = { 0 }; + int ret; + int i; + + if (inlen > IPC_DATA_BUFFER_SIZE || outlen > IPC_DATA_BUFFER_SIZE / 4) + return -EINVAL; + + mutex_lock(&ipclock); + if (ipcdev.dev == NULL) { + mutex_unlock(&ipclock); + return -ENODEV; + } + memcpy(wbuf, in, inlen); + writel(dptr, ipcdev.ipc_base + IPC_DPTR); + writel(sptr, ipcdev.ipc_base + IPC_SPTR); + /* The input data register is 32bit register and inlen is in Byte */ + for (i = 0; i < ((inlen + 3) / 4); i++) + ipc_data_writel(wbuf[i], 4 * i); + ipc_send_command((inlen << IPC_CMD_SIZE) | + (sub << IPC_CMD_SUBCMD) | cmd); + ret = intel_pmc_ipc_check_status(); + if (!ret) { + /* out is read from 32bit register and outlen is in 32bit */ + for (i = 0; i < outlen; i++) + *out++ = ipc_data_readl(4 * i); + } + mutex_unlock(&ipclock); + + return ret; +} +EXPORT_SYMBOL_GPL(intel_pmc_ipc_raw_cmd); + +/* + * intel_pmc_ipc_command + * @cmd: command + * @sub: sub type + * @in: input data + * @inlen: input length in bytes + * @out: output data + * @outlen: output length in dwords + */ +int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen, + u32 *out, u32 outlen) +{ + return intel_pmc_ipc_raw_cmd(cmd, sub, in, inlen, out, outlen, 0, 0); +} +EXPORT_SYMBOL_GPL(intel_pmc_ipc_command); + +static irqreturn_t ioc(int irq, void *dev_id) +{ + int status; + + if (ipcdev.irq_mode) { + status = ipc_read_status(); + writel(status | IPC_STATUS_IRQ, ipcdev.ipc_base + IPC_STATUS); + } + complete(&ipcdev.cmd_complete); + + return IRQ_HANDLED; +} + +static int ipc_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + resource_size_t pci_resource; + int ret; + int len; + + ipcdev.dev = &pci_dev_get(pdev)->dev; + ipcdev.irq_mode = IPC_TRIGGER_MODE_IRQ; + + ret = pci_enable_device(pdev); + if (ret) + return ret; + + ret = pci_request_regions(pdev, "intel_pmc_ipc"); + if (ret) + return ret; + + pci_resource = pci_resource_start(pdev, 0); + len = pci_resource_len(pdev, 0); + if (!pci_resource || !len) { + dev_err(&pdev->dev, "Failed to get resource\n"); + return -ENOMEM; + } + + init_completion(&ipcdev.cmd_complete); + + if (request_irq(pdev->irq, ioc, 0, "intel_pmc_ipc", &ipcdev)) { + dev_err(&pdev->dev, "Failed to request irq\n"); + return -EBUSY; + } + + ipcdev.ipc_base = ioremap_nocache(pci_resource, len); + if (!ipcdev.ipc_base) { + dev_err(&pdev->dev, "Failed to ioremap ipc base\n"); + free_irq(pdev->irq, &ipcdev); + ret = -ENOMEM; + } + + return ret; +} + +static void ipc_pci_remove(struct pci_dev *pdev) +{ + free_irq(pdev->irq, &ipcdev); + pci_release_regions(pdev); + pci_dev_put(pdev); + iounmap(ipcdev.ipc_base); + ipcdev.dev = NULL; +} + +static const struct pci_device_id ipc_pci_ids[] = { + {PCI_VDEVICE(INTEL, 0x0a94), 0}, + {PCI_VDEVICE(INTEL, 0x1a94), 0}, + { 0,} +}; +MODULE_DEVICE_TABLE(pci, ipc_pci_ids); + +static struct pci_driver ipc_pci_driver = { + .name = "intel_pmc_ipc", + .id_table = ipc_pci_ids, + .probe = ipc_pci_probe, + .remove = ipc_pci_remove, +}; + +static ssize_t intel_pmc_ipc_simple_cmd_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int subcmd; + int cmd; + int ret; + + ret = sscanf(buf, "%d %d", &cmd, &subcmd); + if (ret != 2) { + dev_err(dev, "Error args\n"); + return -EINVAL; + } + + ret = intel_pmc_ipc_simple_command(cmd, subcmd); + if (ret) { + dev_err(dev, "command %d error with %d\n", cmd, ret); + return ret; + } + return (ssize_t)count; +} + +static ssize_t intel_pmc_ipc_northpeak_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned long val; + int subcmd; + int ret; + + if (kstrtoul(buf, 0, &val)) + return -EINVAL; + + if (val) + subcmd = 1; + else + subcmd = 0; + ret = intel_pmc_ipc_simple_command(PMC_IPC_NORTHPEAK_CTRL, subcmd); + if (ret) { + dev_err(dev, "command north %d error with %d\n", subcmd, ret); + return ret; + } + return (ssize_t)count; +} + +static DEVICE_ATTR(simplecmd, S_IWUSR, + NULL, intel_pmc_ipc_simple_cmd_store); +static DEVICE_ATTR(northpeak, S_IWUSR, + NULL, intel_pmc_ipc_northpeak_store); + +static struct attribute *intel_ipc_attrs[] = { + &dev_attr_northpeak.attr, + &dev_attr_simplecmd.attr, + NULL +}; + +static const struct attribute_group intel_ipc_group = { + .attrs = intel_ipc_attrs, +}; + +#define PUNIT_RESOURCE_INTER 1 +static struct resource punit_res[] = { + /* Punit */ + { + .flags = IORESOURCE_MEM, + }, + { + .flags = IORESOURCE_MEM, + }, +}; + +#define TCO_RESOURCE_ACPI_IO 0 +#define TCO_RESOURCE_SMI_EN_IO 1 +#define TCO_RESOURCE_GCR_MEM 2 +static struct resource tco_res[] = { + /* ACPI - TCO */ + { + .flags = IORESOURCE_IO, + }, + /* ACPI - SMI */ + { + .flags = IORESOURCE_IO, + }, + /* GCS */ + { + .flags = IORESOURCE_MEM, + }, +}; + +static struct lpc_ich_info tco_info = { + .name = "Apollo Lake SoC", + .iTCO_version = 3, +}; + +static int ipc_create_punit_device(void) +{ + struct platform_device *pdev; + struct resource *res; + int ret; + + pdev = platform_device_alloc(PUNIT_DEVICE_NAME, -1); + if (!pdev) { + dev_err(ipcdev.dev, "Failed to alloc punit platform device\n"); + return -ENOMEM; + } + + pdev->dev.parent = ipcdev.dev; + + res = punit_res; + res->start = (resource_size_t)ipcdev.punit_base; + res->end = res->start + ipcdev.punit_size - 1; + + res = punit_res + PUNIT_RESOURCE_INTER; + res->start = (resource_size_t)ipcdev.punit_base2; + res->end = res->start + ipcdev.punit_size2 - 1; + + ret = platform_device_add_resources(pdev, punit_res, + ARRAY_SIZE(punit_res)); + if (ret) { + dev_err(ipcdev.dev, "Failed to add platform punit resources\n"); + goto err; + } + + ret = platform_device_add(pdev); + if (ret) { + dev_err(ipcdev.dev, "Failed to add punit platform device\n"); + goto err; + } + ipcdev.punit_dev = pdev; + + return 0; +err: + platform_device_put(pdev); + return ret; +} + +static int ipc_create_tco_device(void) +{ + struct platform_device *pdev; + struct resource *res; + int ret; + + pdev = platform_device_alloc(TCO_DEVICE_NAME, -1); + if (!pdev) { + dev_err(ipcdev.dev, "Failed to alloc tco platform device\n"); + return -ENOMEM; + } + + pdev->dev.parent = ipcdev.dev; + + res = tco_res + TCO_RESOURCE_ACPI_IO; + res->start = (resource_size_t)ipcdev.acpi_io_base + TCO_BASE_OFFSET; + res->end = res->start + TCO_REGS_SIZE - 1; + + res = tco_res + TCO_RESOURCE_SMI_EN_IO; + res->start = (resource_size_t)ipcdev.acpi_io_base + SMI_EN_OFFSET; + res->end = res->start + SMI_EN_SIZE - 1; + + res = tco_res + TCO_RESOURCE_GCR_MEM; + res->start = (resource_size_t)ipcdev.gcr_base; + res->end = res->start + ipcdev.gcr_size - 1; + + ret = platform_device_add_resources(pdev, tco_res, ARRAY_SIZE(tco_res)); + if (ret) { + dev_err(ipcdev.dev, "Failed to add tco platform resources\n"); + goto err; + } + + ret = platform_device_add_data(pdev, &tco_info, + sizeof(struct lpc_ich_info)); + if (ret) { + dev_err(ipcdev.dev, "Failed to add tco platform data\n"); + goto err; + } + + ret = platform_device_add(pdev); + if (ret) { + dev_err(ipcdev.dev, "Failed to add tco platform device\n"); + goto err; + } + ipcdev.tco_dev = pdev; + + return 0; +err: + platform_device_put(pdev); + return ret; +} + +static int ipc_create_pmc_devices(void) +{ + int ret; + + ret = ipc_create_tco_device(); + if (ret) { + dev_err(ipcdev.dev, "Failed to add tco platform device\n"); + return ret; + } + ret = ipc_create_punit_device(); + if (ret) { + dev_err(ipcdev.dev, "Failed to add punit platform device\n"); + platform_device_unregister(ipcdev.tco_dev); + } + return ret; +} + +static int ipc_plat_get_res(struct platform_device *pdev) +{ + struct resource *res; + void __iomem *addr; + int size; + + res = platform_get_resource(pdev, IORESOURCE_IO, + PLAT_RESOURCE_ACPI_IO_INDEX); + if (!res) { + dev_err(&pdev->dev, "Failed to get io resource\n"); + return -ENXIO; + } + size = resource_size(res); + ipcdev.acpi_io_base = (void *)res->start; + ipcdev.acpi_io_size = size; + dev_info(&pdev->dev, "io res: %llx %x\n", + (long long)res->start, (int)resource_size(res)); + + res = platform_get_resource(pdev, IORESOURCE_MEM, + PLAT_RESOURCE_PUNIT_DATA_INDEX); + if (!res) { + dev_err(&pdev->dev, "Failed to get punit resource\n"); + return -ENXIO; + } + size = resource_size(res); + ipcdev.punit_base = (void *)res->start; + ipcdev.punit_size = size; + dev_info(&pdev->dev, "punit data res: %llx %x\n", + (long long)res->start, (int)resource_size(res)); + + res = platform_get_resource(pdev, IORESOURCE_MEM, + PLAT_RESOURCE_PUNIT_INTER_INDEX); + if (!res) { + dev_err(&pdev->dev, "Failed to get punit inter resource\n"); + return -ENXIO; + } + size = resource_size(res); + ipcdev.punit_base2 = (void *)res->start; + ipcdev.punit_size2 = size; + dev_info(&pdev->dev, "punit interface res: %llx %x\n", + (long long)res->start, (int)resource_size(res)); + + res = platform_get_resource(pdev, IORESOURCE_MEM, + PLAT_RESOURCE_IPC_INDEX); + if (!res) { + dev_err(&pdev->dev, "Failed to get ipc resource\n"); + return -ENXIO; + } + size = PLAT_RESOURCE_IPC_SIZE; + if (!request_mem_region(res->start, size, pdev->name)) { + dev_err(&pdev->dev, "Failed to request ipc resource\n"); + return -EBUSY; + } + addr = ioremap_nocache(res->start, size); + if (!addr) { + dev_err(&pdev->dev, "I/O memory remapping failed\n"); + release_mem_region(res->start, size); + return -ENOMEM; + } + ipcdev.ipc_base = addr; + + ipcdev.gcr_base = (void *)(res->start + size); + ipcdev.gcr_size = PLAT_RESOURCE_GCR_SIZE; + dev_info(&pdev->dev, "ipc res: %llx %x\n", + (long long)res->start, (int)resource_size(res)); + + return 0; +} + +#ifdef CONFIG_ACPI +static const struct acpi_device_id ipc_acpi_ids[] = { + { "INT34D2", 0}, + { } +}; +MODULE_DEVICE_TABLE(acpi, ipc_acpi_ids); +#endif + +static int ipc_plat_probe(struct platform_device *pdev) +{ + struct resource *res; + int ret; + + ipcdev.dev = &pdev->dev; + ipcdev.irq_mode = IPC_TRIGGER_MODE_IRQ; + init_completion(&ipcdev.cmd_complete); + + ipcdev.irq = platform_get_irq(pdev, 0); + if (ipcdev.irq < 0) { + dev_err(&pdev->dev, "Failed to get irq\n"); + return -EINVAL; + } + + ret = ipc_plat_get_res(pdev); + if (ret) { + dev_err(&pdev->dev, "Failed to request resource\n"); + return ret; + } + + ret = ipc_create_pmc_devices(); + if (ret) { + dev_err(&pdev->dev, "Failed to create pmc devices\n"); + goto err_device; + } + + if (request_irq(ipcdev.irq, ioc, 0, "intel_pmc_ipc", &ipcdev)) { + dev_err(&pdev->dev, "Failed to request irq\n"); + ret = -EBUSY; + goto err_irq; + } + + ret = sysfs_create_group(&pdev->dev.kobj, &intel_ipc_group); + if (ret) { + dev_err(&pdev->dev, "Failed to create sysfs group %d\n", + ret); + goto err_sys; + } + + return 0; +err_sys: + free_irq(ipcdev.irq, &ipcdev); +err_irq: + platform_device_unregister(ipcdev.tco_dev); + platform_device_unregister(ipcdev.punit_dev); +err_device: + iounmap(ipcdev.ipc_base); + res = platform_get_resource(pdev, IORESOURCE_MEM, + PLAT_RESOURCE_IPC_INDEX); + if (res) + release_mem_region(res->start, PLAT_RESOURCE_IPC_SIZE); + return ret; +} + +static int ipc_plat_remove(struct platform_device *pdev) +{ + struct resource *res; + + sysfs_remove_group(&pdev->dev.kobj, &intel_ipc_group); + free_irq(ipcdev.irq, &ipcdev); + platform_device_unregister(ipcdev.tco_dev); + platform_device_unregister(ipcdev.punit_dev); + iounmap(ipcdev.ipc_base); + res = platform_get_resource(pdev, IORESOURCE_MEM, + PLAT_RESOURCE_IPC_INDEX); + if (res) + release_mem_region(res->start, PLAT_RESOURCE_IPC_SIZE); + ipcdev.dev = NULL; + return 0; +} + +static struct platform_driver ipc_plat_driver = { + .remove = ipc_plat_remove, + .probe = ipc_plat_probe, + .driver = { + .name = "pmc-ipc-plat", + .acpi_match_table = ACPI_PTR(ipc_acpi_ids), + }, +}; + +static int __init intel_pmc_ipc_init(void) +{ + int ret; + + ret = platform_driver_register(&ipc_plat_driver); + if (ret) { + pr_err("Failed to register PMC ipc platform driver\n"); + return ret; + } + ret = pci_register_driver(&ipc_pci_driver); + if (ret) { + pr_err("Failed to register PMC ipc pci driver\n"); + platform_driver_unregister(&ipc_plat_driver); + return ret; + } + return ret; +} + +static void __exit intel_pmc_ipc_exit(void) +{ + pci_unregister_driver(&ipc_pci_driver); + platform_driver_unregister(&ipc_plat_driver); +} + +MODULE_AUTHOR("Zha Qipeng <qipeng.zha@intel.com>"); +MODULE_DESCRIPTION("Intel PMC IPC driver"); +MODULE_LICENSE("GPL"); + +/* Some modules are dependent on this, so init earlier */ +fs_initcall(intel_pmc_ipc_init); +module_exit(intel_pmc_ipc_exit); diff --git a/drivers/platform/x86/tc1100-wmi.c b/drivers/platform/x86/tc1100-wmi.c index e36542564131..89aa976f0ab2 100644 --- a/drivers/platform/x86/tc1100-wmi.c +++ b/drivers/platform/x86/tc1100-wmi.c @@ -82,7 +82,7 @@ static int get_state(u32 *out, u8 instance) tmp = 0; } - if (result.length > 0 && result.pointer) + if (result.length > 0) kfree(result.pointer); switch (instance) { diff --git a/drivers/pnp/system.c b/drivers/pnp/system.c index 515f33882ab8..49c1720df59a 100644 --- a/drivers/pnp/system.c +++ b/drivers/pnp/system.c @@ -7,7 +7,6 @@ * Bjorn Helgaas <bjorn.helgaas@hp.com> */ -#include <linux/acpi.h> #include <linux/pnp.h> #include <linux/device.h> #include <linux/init.h> @@ -23,41 +22,25 @@ static const struct pnp_device_id pnp_dev_table[] = { {"", 0} }; -#ifdef CONFIG_ACPI -static bool __reserve_range(u64 start, unsigned int length, bool io, char *desc) -{ - u8 space_id = io ? ACPI_ADR_SPACE_SYSTEM_IO : ACPI_ADR_SPACE_SYSTEM_MEMORY; - return !acpi_reserve_region(start, length, space_id, IORESOURCE_BUSY, desc); -} -#else -static bool __reserve_range(u64 start, unsigned int length, bool io, char *desc) -{ - struct resource *res; - - res = io ? request_region(start, length, desc) : - request_mem_region(start, length, desc); - if (res) { - res->flags &= ~IORESOURCE_BUSY; - return true; - } - return false; -} -#endif - static void reserve_range(struct pnp_dev *dev, struct resource *r, int port) { char *regionid; const char *pnpid = dev_name(&dev->dev); resource_size_t start = r->start, end = r->end; - bool reserved; + struct resource *res; regionid = kmalloc(16, GFP_KERNEL); if (!regionid) return; snprintf(regionid, 16, "pnp %s", pnpid); - reserved = __reserve_range(start, end - start + 1, !!port, regionid); - if (!reserved) + if (port) + res = request_region(start, end - start + 1, regionid); + else + res = request_mem_region(start, end - start + 1, regionid); + if (res) + res->flags &= ~IORESOURCE_BUSY; + else kfree(regionid); /* @@ -66,7 +49,7 @@ static void reserve_range(struct pnp_dev *dev, struct resource *r, int port) * have double reservations. */ dev_info(&dev->dev, "%pR %s reserved\n", r, - reserved ? "has been" : "could not be"); + res ? "has been" : "could not be"); } static void reserve_resources_of_dev(struct pnp_dev *dev) diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig index 5e343bab9458..28c711f0ac6b 100644 --- a/drivers/remoteproc/Kconfig +++ b/drivers/remoteproc/Kconfig @@ -41,6 +41,19 @@ config STE_MODEM_RPROC This can be either built-in or a loadable module. If unsure say N. +config WKUP_M3_RPROC + tristate "AMx3xx Wakeup M3 remoteproc support" + depends on SOC_AM33XX || SOC_AM43XX + select REMOTEPROC + help + Say y here to support Wakeup M3 remote processor on TI AM33xx + and AM43xx family of SoCs. + + Required for Suspend-to-RAM on AM33xx and AM43xx SoCs. Also needed + for deep CPUIdle states on AM33xx SoCs. Allows for loading of the + firmware onto these remote processors. + If unsure say N. + config DA8XX_REMOTEPROC tristate "DA8xx/OMAP-L13x remoteproc support" depends on ARCH_DAVINCI_DA8XX diff --git a/drivers/remoteproc/Makefile b/drivers/remoteproc/Makefile index ac2ff75686d2..81b04d1e2e58 100644 --- a/drivers/remoteproc/Makefile +++ b/drivers/remoteproc/Makefile @@ -9,4 +9,5 @@ remoteproc-y += remoteproc_virtio.o remoteproc-y += remoteproc_elf_loader.o obj-$(CONFIG_OMAP_REMOTEPROC) += omap_remoteproc.o obj-$(CONFIG_STE_MODEM_RPROC) += ste_modem_rproc.o +obj-$(CONFIG_WKUP_M3_RPROC) += wkup_m3_rproc.o obj-$(CONFIG_DA8XX_REMOTEPROC) += da8xx_remoteproc.o diff --git a/drivers/remoteproc/da8xx_remoteproc.c b/drivers/remoteproc/da8xx_remoteproc.c index f8d6a0661c14..009e56f67de2 100644 --- a/drivers/remoteproc/da8xx_remoteproc.c +++ b/drivers/remoteproc/da8xx_remoteproc.c @@ -26,8 +26,7 @@ static char *da8xx_fw_name; module_param(da8xx_fw_name, charp, S_IRUGO); MODULE_PARM_DESC(da8xx_fw_name, - "\n\t\tName of DSP firmware file in /lib/firmware" - " (if not specified defaults to 'rproc-dsp-fw')"); + "Name of DSP firmware file in /lib/firmware (if not specified defaults to 'rproc-dsp-fw')"); /* * OMAP-L138 Technical References: diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index 11cdb119e4f3..8b3130f22b42 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -44,6 +44,9 @@ #include "remoteproc_internal.h" +static DEFINE_MUTEX(rproc_list_mutex); +static LIST_HEAD(rproc_list); + typedef int (*rproc_handle_resources_t)(struct rproc *rproc, struct resource_table *table, int len); typedef int (*rproc_handle_resource_t)(struct rproc *rproc, @@ -132,32 +135,48 @@ static void rproc_disable_iommu(struct rproc *rproc) iommu_detach_device(domain, dev); iommu_domain_free(domain); - - return; } -/* +/** + * rproc_da_to_va() - lookup the kernel virtual address for a remoteproc address + * @rproc: handle of a remote processor + * @da: remoteproc device address to translate + * @len: length of the memory region @da is pointing to + * * Some remote processors will ask us to allocate them physically contiguous * memory regions (which we call "carveouts"), and map them to specific - * device addresses (which are hardcoded in the firmware). + * device addresses (which are hardcoded in the firmware). They may also have + * dedicated memory regions internal to the processors, and use them either + * exclusively or alongside carveouts. * * They may then ask us to copy objects into specific device addresses (e.g. * code/data sections) or expose us certain symbols in other device address * (e.g. their trace buffer). * - * This function is an internal helper with which we can go over the allocated - * carveouts and translate specific device address to kernel virtual addresses - * so we can access the referenced memory. + * This function is a helper function with which we can go over the allocated + * carveouts and translate specific device addresses to kernel virtual addresses + * so we can access the referenced memory. This function also allows to perform + * translations on the internal remoteproc memory regions through a platform + * implementation specific da_to_va ops, if present. + * + * The function returns a valid kernel address on success or NULL on failure. * * Note: phys_to_virt(iommu_iova_to_phys(rproc->domain, da)) will work too, * but only on kernel direct mapped RAM memory. Instead, we're just using - * here the output of the DMA API, which should be more correct. + * here the output of the DMA API for the carveouts, which should be more + * correct. */ void *rproc_da_to_va(struct rproc *rproc, u64 da, int len) { struct rproc_mem_entry *carveout; void *ptr = NULL; + if (rproc->ops->da_to_va) { + ptr = rproc->ops->da_to_va(rproc, da, len); + if (ptr) + goto out; + } + list_for_each_entry(carveout, &rproc->carveouts, node) { int offset = da - carveout->da; @@ -174,6 +193,7 @@ void *rproc_da_to_va(struct rproc *rproc, u64 da, int len) break; } +out: return ptr; } EXPORT_SYMBOL(rproc_da_to_va); @@ -411,10 +431,8 @@ static int rproc_handle_trace(struct rproc *rproc, struct fw_rsc_trace *rsc, } trace = kzalloc(sizeof(*trace), GFP_KERNEL); - if (!trace) { - dev_err(dev, "kzalloc trace failed\n"); + if (!trace) return -ENOMEM; - } /* set the trace buffer dma properties */ trace->len = rsc->len; @@ -489,10 +507,8 @@ static int rproc_handle_devmem(struct rproc *rproc, struct fw_rsc_devmem *rsc, } mapping = kzalloc(sizeof(*mapping), GFP_KERNEL); - if (!mapping) { - dev_err(dev, "kzalloc mapping failed\n"); + if (!mapping) return -ENOMEM; - } ret = iommu_map(rproc->domain, rsc->da, rsc->pa, rsc->len, rsc->flags); if (ret) { @@ -565,10 +581,8 @@ static int rproc_handle_carveout(struct rproc *rproc, rsc->da, rsc->pa, rsc->len, rsc->flags); carveout = kzalloc(sizeof(*carveout), GFP_KERNEL); - if (!carveout) { - dev_err(dev, "kzalloc carveout failed\n"); + if (!carveout) return -ENOMEM; - } va = dma_alloc_coherent(dev->parent, rsc->len, &dma, GFP_KERNEL); if (!va) { @@ -768,7 +782,8 @@ static void rproc_resource_cleanup(struct rproc *rproc) /* clean up carveout allocations */ list_for_each_entry_safe(entry, tmp, &rproc->carveouts, node) { - dma_free_coherent(dev->parent, entry->len, entry->va, entry->dma); + dma_free_coherent(dev->parent, entry->len, entry->va, + entry->dma); list_del(&entry->node); kfree(entry); } @@ -808,9 +823,8 @@ static int rproc_fw_boot(struct rproc *rproc, const struct firmware *fw) /* look for the resource table */ table = rproc_find_rsc_table(rproc, fw, &tablesz); - if (!table) { + if (!table) goto clean_up; - } /* Verify that resource table in loaded fw is unchanged */ if (rproc->table_csum != crc32(0, table, tablesz)) { @@ -911,7 +925,8 @@ static void rproc_fw_config_virtio(const struct firmware *fw, void *context) /* count the number of notify-ids */ rproc->max_notifyid = -1; - ret = rproc_handle_resources(rproc, tablesz, rproc_count_vrings_handler); + ret = rproc_handle_resources(rproc, tablesz, + rproc_count_vrings_handler); if (ret) goto out; @@ -1152,6 +1167,50 @@ out: EXPORT_SYMBOL(rproc_shutdown); /** + * rproc_get_by_phandle() - find a remote processor by phandle + * @phandle: phandle to the rproc + * + * Finds an rproc handle using the remote processor's phandle, and then + * return a handle to the rproc. + * + * This function increments the remote processor's refcount, so always + * use rproc_put() to decrement it back once rproc isn't needed anymore. + * + * Returns the rproc handle on success, and NULL on failure. + */ +#ifdef CONFIG_OF +struct rproc *rproc_get_by_phandle(phandle phandle) +{ + struct rproc *rproc = NULL, *r; + struct device_node *np; + + np = of_find_node_by_phandle(phandle); + if (!np) + return NULL; + + mutex_lock(&rproc_list_mutex); + list_for_each_entry(r, &rproc_list, node) { + if (r->dev.parent && r->dev.parent->of_node == np) { + rproc = r; + get_device(&rproc->dev); + break; + } + } + mutex_unlock(&rproc_list_mutex); + + of_node_put(np); + + return rproc; +} +#else +struct rproc *rproc_get_by_phandle(phandle phandle) +{ + return NULL; +} +#endif +EXPORT_SYMBOL(rproc_get_by_phandle); + +/** * rproc_add() - register a remote processor * @rproc: the remote processor handle to register * @@ -1180,6 +1239,11 @@ int rproc_add(struct rproc *rproc) if (ret < 0) return ret; + /* expose to rproc_get_by_phandle users */ + mutex_lock(&rproc_list_mutex); + list_add(&rproc->node, &rproc_list); + mutex_unlock(&rproc_list_mutex); + dev_info(dev, "%s is available\n", rproc->name); dev_info(dev, "Note: remoteproc is still under development and considered experimental.\n"); @@ -1268,10 +1332,8 @@ struct rproc *rproc_alloc(struct device *dev, const char *name, name_len = strlen(name) + strlen(template) - 2 + 1; rproc = kzalloc(sizeof(struct rproc) + len + name_len, GFP_KERNEL); - if (!rproc) { - dev_err(dev, "%s: kzalloc failed\n", __func__); + if (!rproc) return NULL; - } if (!firmware) { p = (char *)rproc + sizeof(struct rproc) + len; @@ -1369,6 +1431,11 @@ int rproc_del(struct rproc *rproc) /* Free the copy of the resource table */ kfree(rproc->cached_table); + /* the rproc is downref'ed as soon as it's removed from the klist */ + mutex_lock(&rproc_list_mutex); + list_del(&rproc->node); + mutex_unlock(&rproc_list_mutex); + device_del(&rproc->dev); return 0; diff --git a/drivers/remoteproc/remoteproc_internal.h b/drivers/remoteproc/remoteproc_internal.h index 70701a50ddfa..8041b95cb058 100644 --- a/drivers/remoteproc/remoteproc_internal.h +++ b/drivers/remoteproc/remoteproc_internal.h @@ -35,7 +35,7 @@ struct rproc; * @get_boot_addr: get boot address to entry point specified in firmware */ struct rproc_fw_ops { - struct resource_table *(*find_rsc_table) (struct rproc *rproc, + struct resource_table *(*find_rsc_table)(struct rproc *rproc, const struct firmware *fw, int *tablesz); struct resource_table *(*find_loaded_rsc_table)(struct rproc *rproc, diff --git a/drivers/remoteproc/ste_modem_rproc.c b/drivers/remoteproc/ste_modem_rproc.c index dd193f35a1ff..53dc17bdd54e 100644 --- a/drivers/remoteproc/ste_modem_rproc.c +++ b/drivers/remoteproc/ste_modem_rproc.c @@ -67,8 +67,7 @@ static int sproc_load_segments(struct rproc *rproc, const struct firmware *fw) static const struct ste_toc_entry *sproc_find_rsc_entry(const void *data) { int i; - const struct ste_toc *toc; - toc = data; + const struct ste_toc *toc = data; /* Search the table for the resource table */ for (i = 0; i < SPROC_MAX_TOC_ENTRIES && @@ -230,6 +229,7 @@ static int sproc_start(struct rproc *rproc) static int sproc_stop(struct rproc *rproc) { struct sproc *sproc = rproc->priv; + sproc_dbg(sproc, "stop ste-modem\n"); return sproc->mdev->ops.power(sproc->mdev, false); diff --git a/drivers/remoteproc/wkup_m3_rproc.c b/drivers/remoteproc/wkup_m3_rproc.c new file mode 100644 index 000000000000..edf81819cce1 --- /dev/null +++ b/drivers/remoteproc/wkup_m3_rproc.c @@ -0,0 +1,257 @@ +/* + * TI AMx3 Wakeup M3 Remote Processor driver + * + * Copyright (C) 2014-2015 Texas Instruments, Inc. + * + * Dave Gerlach <d-gerlach@ti.com> + * Suman Anna <s-anna@ti.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/err.h> +#include <linux/interrupt.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/of_address.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include <linux/remoteproc.h> + +#include <linux/platform_data/wkup_m3.h> + +#include "remoteproc_internal.h" + +#define WKUPM3_MEM_MAX 2 + +/** + * struct wkup_m3_mem - WkupM3 internal memory structure + * @cpu_addr: MPU virtual address of the memory region + * @bus_addr: Bus address used to access the memory region + * @dev_addr: Device address from Wakeup M3 view + * @size: Size of the memory region + */ +struct wkup_m3_mem { + void __iomem *cpu_addr; + phys_addr_t bus_addr; + u32 dev_addr; + size_t size; +}; + +/** + * struct wkup_m3_rproc - WkupM3 remote processor state + * @rproc: rproc handle + * @pdev: pointer to platform device + * @mem: WkupM3 memory information + */ +struct wkup_m3_rproc { + struct rproc *rproc; + struct platform_device *pdev; + struct wkup_m3_mem mem[WKUPM3_MEM_MAX]; +}; + +static int wkup_m3_rproc_start(struct rproc *rproc) +{ + struct wkup_m3_rproc *wkupm3 = rproc->priv; + struct platform_device *pdev = wkupm3->pdev; + struct device *dev = &pdev->dev; + struct wkup_m3_platform_data *pdata = dev_get_platdata(dev); + + if (pdata->deassert_reset(pdev, pdata->reset_name)) { + dev_err(dev, "Unable to reset wkup_m3!\n"); + return -ENODEV; + } + + return 0; +} + +static int wkup_m3_rproc_stop(struct rproc *rproc) +{ + struct wkup_m3_rproc *wkupm3 = rproc->priv; + struct platform_device *pdev = wkupm3->pdev; + struct device *dev = &pdev->dev; + struct wkup_m3_platform_data *pdata = dev_get_platdata(dev); + + if (pdata->assert_reset(pdev, pdata->reset_name)) { + dev_err(dev, "Unable to assert reset of wkup_m3!\n"); + return -ENODEV; + } + + return 0; +} + +static void *wkup_m3_rproc_da_to_va(struct rproc *rproc, u64 da, int len) +{ + struct wkup_m3_rproc *wkupm3 = rproc->priv; + void *va = NULL; + int i; + u32 offset; + + if (len <= 0) + return NULL; + + for (i = 0; i < WKUPM3_MEM_MAX; i++) { + if (da >= wkupm3->mem[i].dev_addr && da + len <= + wkupm3->mem[i].dev_addr + wkupm3->mem[i].size) { + offset = da - wkupm3->mem[i].dev_addr; + /* __force to make sparse happy with type conversion */ + va = (__force void *)(wkupm3->mem[i].cpu_addr + offset); + break; + } + } + + return va; +} + +static struct rproc_ops wkup_m3_rproc_ops = { + .start = wkup_m3_rproc_start, + .stop = wkup_m3_rproc_stop, + .da_to_va = wkup_m3_rproc_da_to_va, +}; + +static const struct of_device_id wkup_m3_rproc_of_match[] = { + { .compatible = "ti,am3352-wkup-m3", }, + { .compatible = "ti,am4372-wkup-m3", }, + {}, +}; + +static int wkup_m3_rproc_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct wkup_m3_platform_data *pdata = dev->platform_data; + /* umem always needs to be processed first */ + const char *mem_names[WKUPM3_MEM_MAX] = { "umem", "dmem" }; + struct wkup_m3_rproc *wkupm3; + const char *fw_name; + struct rproc *rproc; + struct resource *res; + const __be32 *addrp; + u32 l4_offset = 0; + u64 size; + int ret; + int i; + + if (!(pdata && pdata->deassert_reset && pdata->assert_reset && + pdata->reset_name)) { + dev_err(dev, "Platform data missing!\n"); + return -ENODEV; + } + + ret = of_property_read_string(dev->of_node, "ti,pm-firmware", + &fw_name); + if (ret) { + dev_err(dev, "No firmware filename given\n"); + return -ENODEV; + } + + pm_runtime_enable(&pdev->dev); + ret = pm_runtime_get_sync(&pdev->dev); + if (ret < 0) { + dev_err(&pdev->dev, "pm_runtime_get_sync() failed\n"); + goto err; + } + + rproc = rproc_alloc(dev, "wkup_m3", &wkup_m3_rproc_ops, + fw_name, sizeof(*wkupm3)); + if (!rproc) { + ret = -ENOMEM; + goto err; + } + + wkupm3 = rproc->priv; + wkupm3->rproc = rproc; + wkupm3->pdev = pdev; + + for (i = 0; i < ARRAY_SIZE(mem_names); i++) { + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, + mem_names[i]); + wkupm3->mem[i].cpu_addr = devm_ioremap_resource(dev, res); + if (IS_ERR(wkupm3->mem[i].cpu_addr)) { + dev_err(&pdev->dev, "devm_ioremap_resource failed for resource %d\n", + i); + ret = PTR_ERR(wkupm3->mem[i].cpu_addr); + goto err; + } + wkupm3->mem[i].bus_addr = res->start; + wkupm3->mem[i].size = resource_size(res); + addrp = of_get_address(dev->of_node, i, &size, NULL); + /* + * The wkupm3 has umem at address 0 in its view, so the device + * addresses for each memory region is computed as a relative + * offset of the bus address for umem, and therefore needs to be + * processed first. + */ + if (!strcmp(mem_names[i], "umem")) + l4_offset = be32_to_cpu(*addrp); + wkupm3->mem[i].dev_addr = be32_to_cpu(*addrp) - l4_offset; + } + + dev_set_drvdata(dev, rproc); + + ret = rproc_add(rproc); + if (ret) { + dev_err(dev, "rproc_add failed\n"); + goto err_put_rproc; + } + + return 0; + +err_put_rproc: + rproc_put(rproc); +err: + pm_runtime_put_noidle(dev); + pm_runtime_disable(dev); + return ret; +} + +static int wkup_m3_rproc_remove(struct platform_device *pdev) +{ + struct rproc *rproc = platform_get_drvdata(pdev); + + rproc_del(rproc); + rproc_put(rproc); + pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); + + return 0; +} + +#ifdef CONFIG_PM +static int wkup_m3_rpm_suspend(struct device *dev) +{ + return -EBUSY; +} + +static int wkup_m3_rpm_resume(struct device *dev) +{ + return 0; +} +#endif + +static const struct dev_pm_ops wkup_m3_rproc_pm_ops = { + SET_RUNTIME_PM_OPS(wkup_m3_rpm_suspend, wkup_m3_rpm_resume, NULL) +}; + +static struct platform_driver wkup_m3_rproc_driver = { + .probe = wkup_m3_rproc_probe, + .remove = wkup_m3_rproc_remove, + .driver = { + .name = "wkup_m3_rproc", + .of_match_table = wkup_m3_rproc_of_match, + .pm = &wkup_m3_rproc_pm_ops, + }, +}; + +module_platform_driver(wkup_m3_rproc_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("TI Wakeup M3 remote processor control driver"); +MODULE_AUTHOR("Dave Gerlach <d-gerlach@ti.com>"); diff --git a/drivers/s390/kvm/virtio_ccw.c b/drivers/s390/kvm/virtio_ccw.c index 6f1fa1773e76..f8d8fdb26b72 100644 --- a/drivers/s390/kvm/virtio_ccw.c +++ b/drivers/s390/kvm/virtio_ccw.c @@ -65,6 +65,7 @@ struct virtio_ccw_device { bool is_thinint; bool going_away; bool device_lost; + unsigned int config_ready; void *airq_info; }; @@ -833,8 +834,11 @@ static void virtio_ccw_get_config(struct virtio_device *vdev, if (ret) goto out_free; - memcpy(vcdev->config, config_area, sizeof(vcdev->config)); - memcpy(buf, &vcdev->config[offset], len); + memcpy(vcdev->config, config_area, offset + len); + if (buf) + memcpy(buf, &vcdev->config[offset], len); + if (vcdev->config_ready < offset + len) + vcdev->config_ready = offset + len; out_free: kfree(config_area); @@ -857,6 +861,9 @@ static void virtio_ccw_set_config(struct virtio_device *vdev, if (!config_area) goto out_free; + /* Make sure we don't overwrite fields. */ + if (vcdev->config_ready < offset) + virtio_ccw_get_config(vdev, 0, NULL, offset); memcpy(&vcdev->config[offset], buf, len); /* Write the config area to the host. */ memcpy(config_area, vcdev->config, sizeof(vcdev->config)); diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 4a484d60be0d..b749026aa592 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1191,7 +1191,7 @@ static int __qlt_24xx_handle_abts(struct scsi_qla_host *vha, list_for_each_entry(se_cmd, &se_sess->sess_cmd_list, se_cmd_list) { struct qla_tgt_cmd *cmd = container_of(se_cmd, struct qla_tgt_cmd, se_cmd); - if (cmd->tag == abts->exchange_addr_to_abort) { + if (se_cmd->tag == abts->exchange_addr_to_abort) { lun = cmd->unpacked_lun; found_lun = true; break; @@ -1728,9 +1728,8 @@ static int qlt_pre_xmit_response(struct qla_tgt_cmd *cmd, if (unlikely(cmd->aborted)) { ql_dbg(ql_dbg_tgt_mgt, vha, 0xf014, - "qla_target(%d): terminating exchange " - "for aborted cmd=%p (se_cmd=%p, tag=%d)", vha->vp_idx, cmd, - se_cmd, cmd->tag); + "qla_target(%d): terminating exchange for aborted cmd=%p (se_cmd=%p, tag=%lld)", + vha->vp_idx, cmd, se_cmd, se_cmd->tag); cmd->state = QLA_TGT_STATE_ABORTED; cmd->cmd_flags |= BIT_6; @@ -1765,18 +1764,17 @@ static int qlt_pre_xmit_response(struct qla_tgt_cmd *cmd, if (se_cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) { prm->residual = se_cmd->residual_count; ql_dbg(ql_dbg_io + ql_dbg_verbose, vha, 0x305c, - "Residual underflow: %d (tag %d, " - "op %x, bufflen %d, rq_result %x)\n", prm->residual, - cmd->tag, se_cmd->t_task_cdb ? se_cmd->t_task_cdb[0] : 0, - cmd->bufflen, prm->rq_result); + "Residual underflow: %d (tag %lld, op %x, bufflen %d, rq_result %x)\n", + prm->residual, se_cmd->tag, + se_cmd->t_task_cdb ? se_cmd->t_task_cdb[0] : 0, + cmd->bufflen, prm->rq_result); prm->rq_result |= SS_RESIDUAL_UNDER; } else if (se_cmd->se_cmd_flags & SCF_OVERFLOW_BIT) { prm->residual = se_cmd->residual_count; ql_dbg(ql_dbg_io, vha, 0x305d, - "Residual overflow: %d (tag %d, " - "op %x, bufflen %d, rq_result %x)\n", prm->residual, - cmd->tag, se_cmd->t_task_cdb ? se_cmd->t_task_cdb[0] : 0, - cmd->bufflen, prm->rq_result); + "Residual overflow: %d (tag %lld, op %x, bufflen %d, rq_result %x)\n", + prm->residual, se_cmd->tag, se_cmd->t_task_cdb ? + se_cmd->t_task_cdb[0] : 0, cmd->bufflen, prm->rq_result); prm->rq_result |= SS_RESIDUAL_OVER; } @@ -1849,7 +1847,7 @@ static void qlt_check_srr_debug(struct qla_tgt_cmd *cmd, int *xmit_type) == 50) { *xmit_type &= ~QLA_TGT_XMIT_STATUS; ql_dbg(ql_dbg_tgt_mgt, cmd->vha, 0xf015, - "Dropping cmd %p (tag %d) status", cmd, cmd->tag); + "Dropping cmd %p (tag %d) status", cmd, se_cmd->tag); } #endif /* @@ -1873,7 +1871,7 @@ static void qlt_check_srr_debug(struct qla_tgt_cmd *cmd, int *xmit_type) ql_dbg(ql_dbg_tgt_mgt, cmd->vha, 0xf016, "Cutting cmd %p (tag %d) buffer" " tail to len %d, sg_cnt %d (cmd->bufflen %d," - " cmd->sg_cnt %d)", cmd, cmd->tag, tot_len, leave, + " cmd->sg_cnt %d)", cmd, se_cmd->tag, tot_len, leave, cmd->bufflen, cmd->sg_cnt); cmd->bufflen = tot_len; @@ -1885,13 +1883,13 @@ static void qlt_check_srr_debug(struct qla_tgt_cmd *cmd, int *xmit_type) ql_dbg(ql_dbg_tgt_mgt, cmd->vha, 0xf017, "Cutting cmd %p (tag %d) buffer head " - "to offset %d (cmd->bufflen %d)", cmd, cmd->tag, offset, + "to offset %d (cmd->bufflen %d)", cmd, se_cmd->tag, offset, cmd->bufflen); if (offset == 0) *xmit_type &= ~QLA_TGT_XMIT_DATA; else if (qlt_set_data_offset(cmd, offset)) { ql_dbg(ql_dbg_tgt_mgt, cmd->vha, 0xf018, - "qlt_set_data_offset() failed (tag %d)", cmd->tag); + "qlt_set_data_offset() failed (tag %d)", se_cmd->tag); } } } @@ -3194,7 +3192,7 @@ skip_term: return; } else if (cmd->state == QLA_TGT_STATE_ABORTED) { ql_dbg(ql_dbg_tgt_mgt, vha, 0xf01e, - "Aborted command %p (tag %d) finished\n", cmd, cmd->tag); + "Aborted command %p (tag %lld) finished\n", cmd, se_cmd->tag); } else { ql_dbg(ql_dbg_tgt_mgt, vha, 0xf05c, "qla_target(%d): A command in state (%d) should " @@ -3266,7 +3264,7 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd) goto out_term; cdb = &atio->u.isp24.fcp_cmnd.cdb[0]; - cmd->tag = atio->u.isp24.exchange_addr; + cmd->se_cmd.tag = atio->u.isp24.exchange_addr; cmd->unpacked_lun = scsilun_to_int( (struct scsi_lun *)&atio->u.isp24.fcp_cmnd.lun); @@ -3893,9 +3891,8 @@ static void qlt_handle_srr(struct scsi_qla_host *vha, resp = 1; } else { ql_dbg(ql_dbg_tgt_mgt, vha, 0xf064, - "qla_target(%d): SRR for in data for cmd " - "without them (tag %d, SCSI status %d), " - "reject", vha->vp_idx, cmd->tag, + "qla_target(%d): SRR for in data for cmd without them (tag %lld, SCSI status %d), reject", + vha->vp_idx, se_cmd->tag, cmd->se_cmd.scsi_status); goto out_reject; } @@ -3929,10 +3926,8 @@ static void qlt_handle_srr(struct scsi_qla_host *vha, } } else { ql_dbg(ql_dbg_tgt_mgt, vha, 0xf066, - "qla_target(%d): SRR for out data for cmd " - "without them (tag %d, SCSI status %d), " - "reject", vha->vp_idx, cmd->tag, - cmd->se_cmd.scsi_status); + "qla_target(%d): SRR for out data for cmd without them (tag %lld, SCSI status %d), reject", + vha->vp_idx, se_cmd->tag, cmd->se_cmd.scsi_status); goto out_reject; } break; @@ -4053,10 +4048,9 @@ restart: cmd->sg = se_cmd->t_data_sg; ql_dbg(ql_dbg_tgt_mgt, vha, 0xf02c, - "SRR cmd %p (se_cmd %p, tag %d, op %x), " - "sg_cnt=%d, offset=%d", cmd, &cmd->se_cmd, cmd->tag, - se_cmd->t_task_cdb ? se_cmd->t_task_cdb[0] : 0, - cmd->sg_cnt, cmd->offset); + "SRR cmd %p (se_cmd %p, tag %lld, op %x), sg_cnt=%d, offset=%d", + cmd, &cmd->se_cmd, se_cmd->tag, se_cmd->t_task_cdb ? + se_cmd->t_task_cdb[0] : 0, cmd->sg_cnt, cmd->offset); qlt_handle_srr(vha, sctio, imm); diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h index 332086776dfe..985d76dd706b 100644 --- a/drivers/scsi/qla2xxx/qla_target.h +++ b/drivers/scsi/qla2xxx/qla_target.h @@ -924,7 +924,6 @@ struct qla_tgt_cmd { int sg_cnt; /* SG segments count */ int bufflen; /* cmd buffer length */ int offset; - uint32_t tag; uint32_t unpacked_lun; enum dma_data_direction dma_data_direction; uint32_t reset_count; diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index e32d24ec7a11..d9a8c6084346 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -44,7 +44,6 @@ #include <target/target_core_base.h> #include <target/target_core_fabric.h> #include <target/target_core_fabric_configfs.h> -#include <target/target_core_configfs.h> #include <target/configfs_macros.h> #include "qla_def.h" @@ -54,9 +53,6 @@ static struct workqueue_struct *tcm_qla2xxx_free_wq; static struct workqueue_struct *tcm_qla2xxx_cmd_wq; -static const struct target_core_fabric_ops tcm_qla2xxx_ops; -static const struct target_core_fabric_ops tcm_qla2xxx_npiv_ops; - /* * Parse WWN. * If strict, we require lower-case hex and colon separators to be sure @@ -191,23 +187,6 @@ static char *tcm_qla2xxx_npiv_get_fabric_name(void) return "qla2xxx_npiv"; } -static u8 tcm_qla2xxx_get_fabric_proto_ident(struct se_portal_group *se_tpg) -{ - struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, - struct tcm_qla2xxx_tpg, se_tpg); - struct tcm_qla2xxx_lport *lport = tpg->lport; - u8 proto_id; - - switch (lport->lport_proto_id) { - case SCSI_PROTOCOL_FCP: - default: - proto_id = fc_get_fabric_proto_ident(se_tpg); - break; - } - - return proto_id; -} - static char *tcm_qla2xxx_get_fabric_wwn(struct se_portal_group *se_tpg) { struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, @@ -224,78 +203,6 @@ static u16 tcm_qla2xxx_get_tag(struct se_portal_group *se_tpg) return tpg->lport_tpgt; } -static u32 tcm_qla2xxx_get_default_depth(struct se_portal_group *se_tpg) -{ - return 1; -} - -static u32 tcm_qla2xxx_get_pr_transport_id( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code, - unsigned char *buf) -{ - struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, - struct tcm_qla2xxx_tpg, se_tpg); - struct tcm_qla2xxx_lport *lport = tpg->lport; - int ret = 0; - - switch (lport->lport_proto_id) { - case SCSI_PROTOCOL_FCP: - default: - ret = fc_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); - break; - } - - return ret; -} - -static u32 tcm_qla2xxx_get_pr_transport_id_len( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code) -{ - struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, - struct tcm_qla2xxx_tpg, se_tpg); - struct tcm_qla2xxx_lport *lport = tpg->lport; - int ret = 0; - - switch (lport->lport_proto_id) { - case SCSI_PROTOCOL_FCP: - default: - ret = fc_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); - break; - } - - return ret; -} - -static char *tcm_qla2xxx_parse_pr_out_transport_id( - struct se_portal_group *se_tpg, - const char *buf, - u32 *out_tid_len, - char **port_nexus_ptr) -{ - struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, - struct tcm_qla2xxx_tpg, se_tpg); - struct tcm_qla2xxx_lport *lport = tpg->lport; - char *tid = NULL; - - switch (lport->lport_proto_id) { - case SCSI_PROTOCOL_FCP: - default: - tid = fc_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); - break; - } - - return tid; -} - static int tcm_qla2xxx_check_demo_mode(struct se_portal_group *se_tpg) { struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, @@ -344,29 +251,6 @@ static int tcm_qla2xxx_check_prot_fabric_only(struct se_portal_group *se_tpg) return tpg->tpg_attrib.fabric_prot_type; } -static struct se_node_acl *tcm_qla2xxx_alloc_fabric_acl( - struct se_portal_group *se_tpg) -{ - struct tcm_qla2xxx_nacl *nacl; - - nacl = kzalloc(sizeof(struct tcm_qla2xxx_nacl), GFP_KERNEL); - if (!nacl) { - pr_err("Unable to allocate struct tcm_qla2xxx_nacl\n"); - return NULL; - } - - return &nacl->se_node_acl; -} - -static void tcm_qla2xxx_release_fabric_acl( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl) -{ - struct tcm_qla2xxx_nacl *nacl = container_of(se_nacl, - struct tcm_qla2xxx_nacl, se_node_acl); - kfree(nacl); -} - static u32 tcm_qla2xxx_tpg_get_inst_index(struct se_portal_group *se_tpg) { struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, @@ -430,7 +314,7 @@ static int tcm_qla2xxx_check_stop_free(struct se_cmd *se_cmd) cmd->cmd_flags |= BIT_14; } - return target_put_sess_cmd(se_cmd->se_sess, se_cmd); + return target_put_sess_cmd(se_cmd); } /* tcm_qla2xxx_release_cmd - Callback from TCM Core to release underlying @@ -534,19 +418,6 @@ static void tcm_qla2xxx_set_default_node_attrs(struct se_node_acl *nacl) return; } -static u32 tcm_qla2xxx_get_task_tag(struct se_cmd *se_cmd) -{ - struct qla_tgt_cmd *cmd; - - /* check for task mgmt cmd */ - if (se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB) - return 0xffffffff; - - cmd = container_of(se_cmd, struct qla_tgt_cmd, se_cmd); - - return cmd->tag; -} - static int tcm_qla2xxx_get_cmd_state(struct se_cmd *se_cmd) { return 0; @@ -827,17 +698,6 @@ static void tcm_qla2xxx_release_session(struct kref *kref) qlt_unreg_sess(se_sess->fabric_sess_ptr); } -static void tcm_qla2xxx_put_session(struct se_session *se_sess) -{ - struct qla_tgt_sess *sess = se_sess->fabric_sess_ptr; - struct qla_hw_data *ha = sess->vha->hw; - unsigned long flags; - - spin_lock_irqsave(&ha->hardware_lock, flags); - kref_put(&se_sess->sess_kref, tcm_qla2xxx_release_session); - spin_unlock_irqrestore(&ha->hardware_lock, flags); -} - static void tcm_qla2xxx_put_sess(struct qla_tgt_sess *sess) { if (!sess) @@ -853,53 +713,20 @@ static void tcm_qla2xxx_shutdown_sess(struct qla_tgt_sess *sess) target_sess_cmd_list_set_waiting(sess->se_sess); } -static struct se_node_acl *tcm_qla2xxx_make_nodeacl( - struct se_portal_group *se_tpg, - struct config_group *group, - const char *name) +static int tcm_qla2xxx_init_nodeacl(struct se_node_acl *se_nacl, + const char *name) { - struct se_node_acl *se_nacl, *se_nacl_new; - struct tcm_qla2xxx_nacl *nacl; + struct tcm_qla2xxx_nacl *nacl = + container_of(se_nacl, struct tcm_qla2xxx_nacl, se_node_acl); u64 wwnn; - u32 qla2xxx_nexus_depth; if (tcm_qla2xxx_parse_wwn(name, &wwnn, 1) < 0) - return ERR_PTR(-EINVAL); - - se_nacl_new = tcm_qla2xxx_alloc_fabric_acl(se_tpg); - if (!se_nacl_new) - return ERR_PTR(-ENOMEM); -/* #warning FIXME: Hardcoded qla2xxx_nexus depth in tcm_qla2xxx_make_nodeacl */ - qla2xxx_nexus_depth = 1; + return -EINVAL; - /* - * se_nacl_new may be released by core_tpg_add_initiator_node_acl() - * when converting a NodeACL from demo mode -> explict - */ - se_nacl = core_tpg_add_initiator_node_acl(se_tpg, se_nacl_new, - name, qla2xxx_nexus_depth); - if (IS_ERR(se_nacl)) { - tcm_qla2xxx_release_fabric_acl(se_tpg, se_nacl_new); - return se_nacl; - } - /* - * Locate our struct tcm_qla2xxx_nacl and set the FC Nport WWPN - */ - nacl = container_of(se_nacl, struct tcm_qla2xxx_nacl, se_node_acl); nacl->nport_wwnn = wwnn; tcm_qla2xxx_format_wwn(&nacl->nport_name[0], TCM_QLA2XXX_NAMELEN, wwnn); - return se_nacl; -} - -static void tcm_qla2xxx_drop_nodeacl(struct se_node_acl *se_acl) -{ - struct se_portal_group *se_tpg = se_acl->se_tpg; - struct tcm_qla2xxx_nacl *nacl = container_of(se_acl, - struct tcm_qla2xxx_nacl, se_node_acl); - - core_tpg_del_initiator_node_acl(se_tpg, se_acl, 1); - kfree(nacl); + return 0; } /* Start items for tcm_qla2xxx_tpg_attrib_cit */ @@ -1175,8 +1002,7 @@ static struct se_portal_group *tcm_qla2xxx_make_tpg( tpg->tpg_attrib.cache_dynamic_acls = 1; tpg->tpg_attrib.demo_mode_login_only = 1; - ret = core_tpg_register(&tcm_qla2xxx_ops, wwn, - &tpg->se_tpg, tpg, TRANSPORT_TPG_TYPE_NORMAL); + ret = core_tpg_register(wwn, &tpg->se_tpg, SCSI_PROTOCOL_FCP); if (ret < 0) { kfree(tpg); return NULL; @@ -1295,8 +1121,7 @@ static struct se_portal_group *tcm_qla2xxx_npiv_make_tpg( tpg->tpg_attrib.cache_dynamic_acls = 1; tpg->tpg_attrib.demo_mode_login_only = 1; - ret = core_tpg_register(&tcm_qla2xxx_npiv_ops, wwn, - &tpg->se_tpg, tpg, TRANSPORT_TPG_TYPE_NORMAL); + ret = core_tpg_register(wwn, &tpg->se_tpg, SCSI_PROTOCOL_FCP); if (ret < 0) { kfree(tpg); return NULL; @@ -1988,14 +1813,10 @@ static struct configfs_attribute *tcm_qla2xxx_wwn_attrs[] = { static const struct target_core_fabric_ops tcm_qla2xxx_ops = { .module = THIS_MODULE, .name = "qla2xxx", + .node_acl_size = sizeof(struct tcm_qla2xxx_nacl), .get_fabric_name = tcm_qla2xxx_get_fabric_name, - .get_fabric_proto_ident = tcm_qla2xxx_get_fabric_proto_ident, .tpg_get_wwn = tcm_qla2xxx_get_fabric_wwn, .tpg_get_tag = tcm_qla2xxx_get_tag, - .tpg_get_default_depth = tcm_qla2xxx_get_default_depth, - .tpg_get_pr_transport_id = tcm_qla2xxx_get_pr_transport_id, - .tpg_get_pr_transport_id_len = tcm_qla2xxx_get_pr_transport_id_len, - .tpg_parse_pr_out_transport_id = tcm_qla2xxx_parse_pr_out_transport_id, .tpg_check_demo_mode = tcm_qla2xxx_check_demo_mode, .tpg_check_demo_mode_cache = tcm_qla2xxx_check_demo_mode_cache, .tpg_check_demo_mode_write_protect = @@ -2004,12 +1825,9 @@ static const struct target_core_fabric_ops tcm_qla2xxx_ops = { tcm_qla2xxx_check_prod_write_protect, .tpg_check_prot_fabric_only = tcm_qla2xxx_check_prot_fabric_only, .tpg_check_demo_mode_login_only = tcm_qla2xxx_check_demo_mode_login_only, - .tpg_alloc_fabric_acl = tcm_qla2xxx_alloc_fabric_acl, - .tpg_release_fabric_acl = tcm_qla2xxx_release_fabric_acl, .tpg_get_inst_index = tcm_qla2xxx_tpg_get_inst_index, .check_stop_free = tcm_qla2xxx_check_stop_free, .release_cmd = tcm_qla2xxx_release_cmd, - .put_session = tcm_qla2xxx_put_session, .shutdown_session = tcm_qla2xxx_shutdown_session, .close_session = tcm_qla2xxx_close_session, .sess_get_index = tcm_qla2xxx_sess_get_index, @@ -2017,7 +1835,6 @@ static const struct target_core_fabric_ops tcm_qla2xxx_ops = { .write_pending = tcm_qla2xxx_write_pending, .write_pending_status = tcm_qla2xxx_write_pending_status, .set_default_node_attributes = tcm_qla2xxx_set_default_node_attrs, - .get_task_tag = tcm_qla2xxx_get_task_tag, .get_cmd_state = tcm_qla2xxx_get_cmd_state, .queue_data_in = tcm_qla2xxx_queue_data_in, .queue_status = tcm_qla2xxx_queue_status, @@ -2031,12 +1848,7 @@ static const struct target_core_fabric_ops tcm_qla2xxx_ops = { .fabric_drop_wwn = tcm_qla2xxx_drop_lport, .fabric_make_tpg = tcm_qla2xxx_make_tpg, .fabric_drop_tpg = tcm_qla2xxx_drop_tpg, - .fabric_post_link = NULL, - .fabric_pre_unlink = NULL, - .fabric_make_np = NULL, - .fabric_drop_np = NULL, - .fabric_make_nodeacl = tcm_qla2xxx_make_nodeacl, - .fabric_drop_nodeacl = tcm_qla2xxx_drop_nodeacl, + .fabric_init_nodeacl = tcm_qla2xxx_init_nodeacl, .tfc_wwn_attrs = tcm_qla2xxx_wwn_attrs, .tfc_tpg_base_attrs = tcm_qla2xxx_tpg_attrs, @@ -2046,26 +1858,19 @@ static const struct target_core_fabric_ops tcm_qla2xxx_ops = { static const struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = { .module = THIS_MODULE, .name = "qla2xxx_npiv", + .node_acl_size = sizeof(struct tcm_qla2xxx_nacl), .get_fabric_name = tcm_qla2xxx_npiv_get_fabric_name, - .get_fabric_proto_ident = tcm_qla2xxx_get_fabric_proto_ident, .tpg_get_wwn = tcm_qla2xxx_get_fabric_wwn, .tpg_get_tag = tcm_qla2xxx_get_tag, - .tpg_get_default_depth = tcm_qla2xxx_get_default_depth, - .tpg_get_pr_transport_id = tcm_qla2xxx_get_pr_transport_id, - .tpg_get_pr_transport_id_len = tcm_qla2xxx_get_pr_transport_id_len, - .tpg_parse_pr_out_transport_id = tcm_qla2xxx_parse_pr_out_transport_id, .tpg_check_demo_mode = tcm_qla2xxx_check_demo_mode, .tpg_check_demo_mode_cache = tcm_qla2xxx_check_demo_mode_cache, .tpg_check_demo_mode_write_protect = tcm_qla2xxx_check_demo_mode, .tpg_check_prod_mode_write_protect = tcm_qla2xxx_check_prod_write_protect, .tpg_check_demo_mode_login_only = tcm_qla2xxx_check_demo_mode_login_only, - .tpg_alloc_fabric_acl = tcm_qla2xxx_alloc_fabric_acl, - .tpg_release_fabric_acl = tcm_qla2xxx_release_fabric_acl, .tpg_get_inst_index = tcm_qla2xxx_tpg_get_inst_index, .check_stop_free = tcm_qla2xxx_check_stop_free, .release_cmd = tcm_qla2xxx_release_cmd, - .put_session = tcm_qla2xxx_put_session, .shutdown_session = tcm_qla2xxx_shutdown_session, .close_session = tcm_qla2xxx_close_session, .sess_get_index = tcm_qla2xxx_sess_get_index, @@ -2073,7 +1878,6 @@ static const struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = { .write_pending = tcm_qla2xxx_write_pending, .write_pending_status = tcm_qla2xxx_write_pending_status, .set_default_node_attributes = tcm_qla2xxx_set_default_node_attrs, - .get_task_tag = tcm_qla2xxx_get_task_tag, .get_cmd_state = tcm_qla2xxx_get_cmd_state, .queue_data_in = tcm_qla2xxx_queue_data_in, .queue_status = tcm_qla2xxx_queue_status, @@ -2087,12 +1891,7 @@ static const struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = { .fabric_drop_wwn = tcm_qla2xxx_npiv_drop_lport, .fabric_make_tpg = tcm_qla2xxx_npiv_make_tpg, .fabric_drop_tpg = tcm_qla2xxx_drop_tpg, - .fabric_post_link = NULL, - .fabric_pre_unlink = NULL, - .fabric_make_np = NULL, - .fabric_drop_np = NULL, - .fabric_make_nodeacl = tcm_qla2xxx_make_nodeacl, - .fabric_drop_nodeacl = tcm_qla2xxx_drop_nodeacl, + .fabric_init_nodeacl = tcm_qla2xxx_init_nodeacl, .tfc_wwn_attrs = tcm_qla2xxx_wwn_attrs, .tfc_tpg_base_attrs = tcm_qla2xxx_npiv_tpg_attrs, diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.h b/drivers/scsi/qla2xxx/tcm_qla2xxx.h index 23295115c9fc..3bbf4cb6fd97 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.h +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.h @@ -13,6 +13,8 @@ #include "qla_target.h" struct tcm_qla2xxx_nacl { + struct se_node_acl se_node_acl; + /* From libfc struct fc_rport->port_id */ u32 nport_id; /* Binary World Wide unique Node Name for remote FC Initiator Nport */ @@ -23,8 +25,6 @@ struct tcm_qla2xxx_nacl { struct qla_tgt_sess *qla_tgt_sess; /* Pointer to TCM FC nexus */ struct se_session *nport_nexus; - /* Returned by tcm_qla2xxx_make_nodeacl() */ - struct se_node_acl se_node_acl; }; struct tcm_qla2xxx_tpg_attrib { @@ -57,8 +57,6 @@ struct tcm_qla2xxx_fc_loopid { }; struct tcm_qla2xxx_lport { - /* SCSI protocol the lport is providing */ - u8 lport_proto_id; /* Binary World Wide unique Port Name for FC Target Lport */ u64 lport_wwpn; /* Binary World Wide unique Port Name for FC NPIV Target Lport */ diff --git a/drivers/soc/qcom/spm.c b/drivers/soc/qcom/spm.c index b562af816c0a..b04b05a0904e 100644 --- a/drivers/soc/qcom/spm.c +++ b/drivers/soc/qcom/spm.c @@ -260,7 +260,7 @@ static int __init qcom_cpuidle_init(struct device_node *cpu_node, int cpu) /* We have atleast one power down mode */ cpumask_clear(&mask); cpumask_set_cpu(cpu, &mask); - qcom_scm_set_warm_boot_addr(cpu_resume, &mask); + qcom_scm_set_warm_boot_addr(cpu_resume_arm, &mask); } per_cpu(qcom_idle_ops, cpu) = fns; diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index a3fba366cebe..4e68b62193ed 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -29,7 +29,6 @@ #include <scsi/scsi_tcq.h> #include <target/target_core_base.h> #include <target/target_core_fabric.h> -#include <target/target_core_configfs.h> #include <target/iscsi/iscsi_target_core.h> #include "iscsi_target_parameters.h" @@ -716,7 +715,7 @@ static int iscsit_add_reject_from_cmd( */ if (cmd->se_cmd.se_tfo != NULL) { pr_debug("iscsi reject: calling target_put_sess_cmd >>>>>>\n"); - target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + target_put_sess_cmd(&cmd->se_cmd); } return -1; } @@ -1002,13 +1001,15 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, hdr->cmdsn, be32_to_cpu(hdr->data_length), payload_length, conn->cid); - target_get_sess_cmd(conn->sess->se_sess, &cmd->se_cmd, true); + target_get_sess_cmd(&cmd->se_cmd, true); cmd->sense_reason = transport_lookup_cmd_lun(&cmd->se_cmd, scsilun_to_int(&hdr->lun)); if (cmd->sense_reason) goto attach_cmd; + /* only used for printks or comparing with ->ref_task_tag */ + cmd->se_cmd.tag = (__force u32)cmd->init_task_tag; cmd->sense_reason = target_setup_cmd_from_cdb(&cmd->se_cmd, hdr->cdb); if (cmd->sense_reason) { if (cmd->sense_reason == TCM_OUT_OF_RESOURCES) { @@ -1068,7 +1069,7 @@ int iscsit_process_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) return -1; else if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) { - target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + target_put_sess_cmd(&cmd->se_cmd); return 0; } } @@ -1084,7 +1085,7 @@ int iscsit_process_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, if (!cmd->sense_reason) return 0; - target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + target_put_sess_cmd(&cmd->se_cmd); return 0; } @@ -1115,7 +1116,6 @@ static int iscsit_get_immediate_data(struct iscsi_cmd *cmd, struct iscsi_scsi_req *hdr, bool dump_payload) { - struct iscsi_conn *conn = cmd->conn; int cmdsn_ret = 0, immed_ret = IMMEDIATE_DATA_NORMAL_OPERATION; /* * Special case for Unsupported SAM WRITE Opcodes and ImmediateData=Yes. @@ -1142,7 +1142,7 @@ after_immediate_data: rc = iscsit_dump_data_payload(cmd->conn, cmd->first_burst_len, 1); - target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + target_put_sess_cmd(&cmd->se_cmd); return rc; } else if (cmd->unsolicited_data) iscsit_set_unsoliticed_dataout(cmd); @@ -1811,7 +1811,7 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, conn->sess->se_sess, 0, DMA_NONE, TCM_SIMPLE_TAG, cmd->sense_buffer + 2); - target_get_sess_cmd(conn->sess->se_sess, &cmd->se_cmd, true); + target_get_sess_cmd(&cmd->se_cmd, true); sess_ref = true; switch (function) { @@ -1953,7 +1953,7 @@ attach: */ if (sess_ref) { pr_debug("Handle TMR, using sess_ref=true check\n"); - target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); + target_put_sess_cmd(&cmd->se_cmd); } iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); @@ -2737,11 +2737,7 @@ static int iscsit_send_datain(struct iscsi_cmd *cmd, struct iscsi_conn *conn) cmd->iov_data_count = iov_count; cmd->tx_size = tx_size; - /* sendpage is preferred but can't insert markers */ - if (!conn->conn_ops->IFMarker) - ret = iscsit_fe_sendpage_sg(cmd, conn); - else - ret = iscsit_send_tx_data(cmd, conn, 0); + ret = iscsit_fe_sendpage_sg(cmd, conn); iscsit_unmap_iovec(cmd); @@ -4073,17 +4069,9 @@ static int iscsi_target_rx_opcode(struct iscsi_conn *conn, unsigned char *buf) " opcode while ERL=0, closing iSCSI connection.\n"); return -1; } - if (!conn->conn_ops->OFMarker) { - pr_err("Unable to recover from unknown" - " opcode while OFMarker=No, closing iSCSI" - " connection.\n"); - return -1; - } - if (iscsit_recover_from_unknown_opcode(conn) < 0) { - pr_err("Unable to recover from unknown" - " opcode, closing iSCSI connection.\n"); - return -1; - } + pr_err("Unable to recover from unknown opcode while OFMarker=No," + " closing iSCSI connection.\n"); + ret = -1; break; } diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 469fce44ebad..c1898c84b3d2 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -24,7 +24,6 @@ #include <target/target_core_base.h> #include <target/target_core_fabric.h> #include <target/target_core_fabric_configfs.h> -#include <target/target_core_configfs.h> #include <target/configfs_macros.h> #include <target/iscsi/iscsi_transport.h> @@ -860,57 +859,19 @@ static struct configfs_attribute *lio_target_initiator_attrs[] = { NULL, }; -static struct se_node_acl *lio_tpg_alloc_fabric_acl( - struct se_portal_group *se_tpg) +static int lio_target_init_nodeacl(struct se_node_acl *se_nacl, + const char *name) { - struct iscsi_node_acl *acl; - - acl = kzalloc(sizeof(struct iscsi_node_acl), GFP_KERNEL); - if (!acl) { - pr_err("Unable to allocate memory for struct iscsi_node_acl\n"); - return NULL; - } - - return &acl->se_node_acl; -} - -static struct se_node_acl *lio_target_make_nodeacl( - struct se_portal_group *se_tpg, - struct config_group *group, - const char *name) -{ - struct config_group *stats_cg; - struct iscsi_node_acl *acl; - struct se_node_acl *se_nacl_new, *se_nacl; - struct iscsi_portal_group *tpg = container_of(se_tpg, - struct iscsi_portal_group, tpg_se_tpg); - u32 cmdsn_depth; - - se_nacl_new = lio_tpg_alloc_fabric_acl(se_tpg); - if (!se_nacl_new) - return ERR_PTR(-ENOMEM); - - cmdsn_depth = tpg->tpg_attrib.default_cmdsn_depth; - /* - * se_nacl_new may be released by core_tpg_add_initiator_node_acl() - * when converting a NdoeACL from demo mode -> explict - */ - se_nacl = core_tpg_add_initiator_node_acl(se_tpg, se_nacl_new, - name, cmdsn_depth); - if (IS_ERR(se_nacl)) - return se_nacl; - - acl = container_of(se_nacl, struct iscsi_node_acl, se_node_acl); - stats_cg = &se_nacl->acl_fabric_stat_group; + struct iscsi_node_acl *acl = + container_of(se_nacl, struct iscsi_node_acl, se_node_acl); + struct config_group *stats_cg = &se_nacl->acl_fabric_stat_group; stats_cg->default_groups = kmalloc(sizeof(struct config_group *) * 2, GFP_KERNEL); if (!stats_cg->default_groups) { pr_err("Unable to allocate memory for" " stats_cg->default_groups\n"); - core_tpg_del_initiator_node_acl(se_tpg, se_nacl, 1); - kfree(acl); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } stats_cg->default_groups[0] = &acl->node_stat_grps.iscsi_sess_stats_group; @@ -918,13 +879,11 @@ static struct se_node_acl *lio_target_make_nodeacl( config_group_init_type_name(&acl->node_stat_grps.iscsi_sess_stats_group, "iscsi_sess_stats", &iscsi_stat_sess_cit); - return se_nacl; + return 0; } -static void lio_target_drop_nodeacl( - struct se_node_acl *se_nacl) +static void lio_target_cleanup_nodeacl( struct se_node_acl *se_nacl) { - struct se_portal_group *se_tpg = se_nacl->se_tpg; struct iscsi_node_acl *acl = container_of(se_nacl, struct iscsi_node_acl, se_node_acl); struct config_item *df_item; @@ -938,9 +897,6 @@ static void lio_target_drop_nodeacl( config_item_put(df_item); } kfree(stats_cg->default_groups); - - core_tpg_del_initiator_node_acl(se_tpg, se_nacl, 1); - kfree(acl); } /* End items for lio_target_acl_cit */ @@ -1463,8 +1419,7 @@ static struct se_portal_group *lio_target_tiqn_addtpg( if (!tpg) return NULL; - ret = core_tpg_register(&iscsi_ops, wwn, &tpg->tpg_se_tpg, - tpg, TRANSPORT_TPG_TYPE_NORMAL); + ret = core_tpg_register(wwn, &tpg->tpg_se_tpg, SCSI_PROTOCOL_ISCSI); if (ret < 0) return NULL; @@ -1735,14 +1690,6 @@ static char *iscsi_get_fabric_name(void) return "iSCSI"; } -static u32 iscsi_get_task_tag(struct se_cmd *se_cmd) -{ - struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); - - /* only used for printks or comparism with ->ref_task_tag */ - return (__force u32)cmd->init_task_tag; -} - static int iscsi_get_cmd_state(struct se_cmd *se_cmd) { struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); @@ -1832,78 +1779,58 @@ static void lio_aborted_task(struct se_cmd *se_cmd) cmd->conn->conn_transport->iscsit_aborted_task(cmd->conn, cmd); } -static char *lio_tpg_get_endpoint_wwn(struct se_portal_group *se_tpg) +static inline struct iscsi_portal_group *iscsi_tpg(struct se_portal_group *se_tpg) { - struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; + return container_of(se_tpg, struct iscsi_portal_group, tpg_se_tpg); +} - return &tpg->tpg_tiqn->tiqn[0]; +static char *lio_tpg_get_endpoint_wwn(struct se_portal_group *se_tpg) +{ + return iscsi_tpg(se_tpg)->tpg_tiqn->tiqn; } static u16 lio_tpg_get_tag(struct se_portal_group *se_tpg) { - struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; - - return tpg->tpgt; + return iscsi_tpg(se_tpg)->tpgt; } static u32 lio_tpg_get_default_depth(struct se_portal_group *se_tpg) { - struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; - - return tpg->tpg_attrib.default_cmdsn_depth; + return iscsi_tpg(se_tpg)->tpg_attrib.default_cmdsn_depth; } static int lio_tpg_check_demo_mode(struct se_portal_group *se_tpg) { - struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; - - return tpg->tpg_attrib.generate_node_acls; + return iscsi_tpg(se_tpg)->tpg_attrib.generate_node_acls; } static int lio_tpg_check_demo_mode_cache(struct se_portal_group *se_tpg) { - struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; - - return tpg->tpg_attrib.cache_dynamic_acls; + return iscsi_tpg(se_tpg)->tpg_attrib.cache_dynamic_acls; } static int lio_tpg_check_demo_mode_write_protect( struct se_portal_group *se_tpg) { - struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; - - return tpg->tpg_attrib.demo_mode_write_protect; + return iscsi_tpg(se_tpg)->tpg_attrib.demo_mode_write_protect; } static int lio_tpg_check_prod_mode_write_protect( struct se_portal_group *se_tpg) { - struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; - - return tpg->tpg_attrib.prod_mode_write_protect; + return iscsi_tpg(se_tpg)->tpg_attrib.prod_mode_write_protect; } static int lio_tpg_check_prot_fabric_only( struct se_portal_group *se_tpg) { - struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; /* * Only report fabric_prot_type if t10_pi has also been enabled * for incoming ib_isert sessions. */ - if (!tpg->tpg_attrib.t10_pi) + if (!iscsi_tpg(se_tpg)->tpg_attrib.t10_pi) return 0; - - return tpg->tpg_attrib.fabric_prot_type; -} - -static void lio_tpg_release_fabric_acl( - struct se_portal_group *se_tpg, - struct se_node_acl *se_acl) -{ - struct iscsi_node_acl *acl = container_of(se_acl, - struct iscsi_node_acl, se_node_acl); - kfree(acl); + return iscsi_tpg(se_tpg)->tpg_attrib.fabric_prot_type; } /* @@ -1948,9 +1875,7 @@ static void lio_tpg_close_session(struct se_session *se_sess) static u32 lio_tpg_get_inst_index(struct se_portal_group *se_tpg) { - struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; - - return tpg->tpg_tiqn->tiqn_index; + return iscsi_tpg(se_tpg)->tpg_tiqn->tiqn_index; } static void lio_set_default_node_attributes(struct se_node_acl *se_acl) @@ -1967,7 +1892,7 @@ static void lio_set_default_node_attributes(struct se_node_acl *se_acl) static int lio_check_stop_free(struct se_cmd *se_cmd) { - return target_put_sess_cmd(se_cmd->se_sess, se_cmd); + return target_put_sess_cmd(se_cmd); } static void lio_release_cmd(struct se_cmd *se_cmd) @@ -1981,14 +1906,11 @@ static void lio_release_cmd(struct se_cmd *se_cmd) const struct target_core_fabric_ops iscsi_ops = { .module = THIS_MODULE, .name = "iscsi", + .node_acl_size = sizeof(struct iscsi_node_acl), .get_fabric_name = iscsi_get_fabric_name, - .get_fabric_proto_ident = iscsi_get_fabric_proto_ident, .tpg_get_wwn = lio_tpg_get_endpoint_wwn, .tpg_get_tag = lio_tpg_get_tag, .tpg_get_default_depth = lio_tpg_get_default_depth, - .tpg_get_pr_transport_id = iscsi_get_pr_transport_id, - .tpg_get_pr_transport_id_len = iscsi_get_pr_transport_id_len, - .tpg_parse_pr_out_transport_id = iscsi_parse_pr_out_transport_id, .tpg_check_demo_mode = lio_tpg_check_demo_mode, .tpg_check_demo_mode_cache = lio_tpg_check_demo_mode_cache, .tpg_check_demo_mode_write_protect = @@ -1996,8 +1918,6 @@ const struct target_core_fabric_ops iscsi_ops = { .tpg_check_prod_mode_write_protect = lio_tpg_check_prod_mode_write_protect, .tpg_check_prot_fabric_only = &lio_tpg_check_prot_fabric_only, - .tpg_alloc_fabric_acl = lio_tpg_alloc_fabric_acl, - .tpg_release_fabric_acl = lio_tpg_release_fabric_acl, .tpg_get_inst_index = lio_tpg_get_inst_index, .check_stop_free = lio_check_stop_free, .release_cmd = lio_release_cmd, @@ -2008,7 +1928,6 @@ const struct target_core_fabric_ops iscsi_ops = { .write_pending = lio_write_pending, .write_pending_status = lio_write_pending_status, .set_default_node_attributes = lio_set_default_node_attributes, - .get_task_tag = iscsi_get_task_tag, .get_cmd_state = iscsi_get_cmd_state, .queue_data_in = lio_queue_data_in, .queue_status = lio_queue_status, @@ -2020,8 +1939,8 @@ const struct target_core_fabric_ops iscsi_ops = { .fabric_drop_tpg = lio_target_tiqn_deltpg, .fabric_make_np = lio_target_call_addnptotpg, .fabric_drop_np = lio_target_call_delnpfromtpg, - .fabric_make_nodeacl = lio_target_make_nodeacl, - .fabric_drop_nodeacl = lio_target_drop_nodeacl, + .fabric_init_nodeacl = lio_target_init_nodeacl, + .fabric_cleanup_nodeacl = lio_target_cleanup_nodeacl, .tfc_discovery_attrs = lio_target_discovery_auth_attrs, .tfc_wwn_attrs = lio_target_wwn_attrs, diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c index 959a14c9dd5d..210f6e4830e3 100644 --- a/drivers/target/iscsi/iscsi_target_erl0.c +++ b/drivers/target/iscsi/iscsi_target_erl0.c @@ -956,56 +956,3 @@ void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn) iscsit_handle_connection_cleanup(conn); } - -/* - * This is the simple function that makes the magic of - * sync and steering happen in the follow paradoxical order: - * - * 0) Receive conn->of_marker (bytes left until next OFMarker) - * bytes into an offload buffer. When we pass the exact number - * of bytes in conn->of_marker, iscsit_dump_data_payload() and hence - * rx_data() will automatically receive the identical u32 marker - * values and store it in conn->of_marker_offset; - * 1) Now conn->of_marker_offset will contain the offset to the start - * of the next iSCSI PDU. Dump these remaining bytes into another - * offload buffer. - * 2) We are done! - * Next byte in the TCP stream will contain the next iSCSI PDU! - * Cool Huh?! - */ -int iscsit_recover_from_unknown_opcode(struct iscsi_conn *conn) -{ - /* - * Make sure the remaining bytes to next maker is a sane value. - */ - if (conn->of_marker > (conn->conn_ops->OFMarkInt * 4)) { - pr_err("Remaining bytes to OFMarker: %u exceeds" - " OFMarkInt bytes: %u.\n", conn->of_marker, - conn->conn_ops->OFMarkInt * 4); - return -1; - } - - pr_debug("Advancing %u bytes in TCP stream to get to the" - " next OFMarker.\n", conn->of_marker); - - if (iscsit_dump_data_payload(conn, conn->of_marker, 0) < 0) - return -1; - - /* - * Make sure the offset marker we retrived is a valid value. - */ - if (conn->of_marker_offset > (ISCSI_HDR_LEN + (ISCSI_CRC_LEN * 2) + - conn->conn_ops->MaxRecvDataSegmentLength)) { - pr_err("OfMarker offset value: %u exceeds limit.\n", - conn->of_marker_offset); - return -1; - } - - pr_debug("Discarding %u bytes of TCP stream to get to the" - " next iSCSI Opcode.\n", conn->of_marker_offset); - - if (iscsit_dump_data_payload(conn, conn->of_marker_offset, 0) < 0) - return -1; - - return 0; -} diff --git a/drivers/target/iscsi/iscsi_target_erl0.h b/drivers/target/iscsi/iscsi_target_erl0.h index 21acc9a06376..a9e2f9497fb2 100644 --- a/drivers/target/iscsi/iscsi_target_erl0.h +++ b/drivers/target/iscsi/iscsi_target_erl0.h @@ -10,6 +10,5 @@ extern void iscsit_connection_reinstatement_rcfr(struct iscsi_conn *); extern void iscsit_cause_connection_reinstatement(struct iscsi_conn *, int); extern void iscsit_fall_back_to_erl0(struct iscsi_session *); extern void iscsit_take_action_for_connection_exit(struct iscsi_conn *); -extern int iscsit_recover_from_unknown_opcode(struct iscsi_conn *); #endif /*** ISCSI_TARGET_ERL0_H ***/ diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 70d799dfab03..3d0fe4ff5590 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -410,8 +410,6 @@ static int iscsi_login_zero_tsih_s2( if (iscsi_change_param_sprintf(conn, "ErrorRecoveryLevel=%d", na->default_erl)) return -1; - if (iscsi_login_disable_FIM_keys(conn->param_list, conn) < 0) - return -1; /* * Set RDMAExtensions=Yes by default for iSER enabled network portals */ @@ -477,59 +475,6 @@ check_prot: return 0; } -/* - * Remove PSTATE_NEGOTIATE for the four FIM related keys. - * The Initiator node will be able to enable FIM by proposing them itself. - */ -int iscsi_login_disable_FIM_keys( - struct iscsi_param_list *param_list, - struct iscsi_conn *conn) -{ - struct iscsi_param *param; - - param = iscsi_find_param_from_key("OFMarker", param_list); - if (!param) { - pr_err("iscsi_find_param_from_key() for" - " OFMarker failed\n"); - iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, - ISCSI_LOGIN_STATUS_NO_RESOURCES); - return -1; - } - param->state &= ~PSTATE_NEGOTIATE; - - param = iscsi_find_param_from_key("OFMarkInt", param_list); - if (!param) { - pr_err("iscsi_find_param_from_key() for" - " IFMarker failed\n"); - iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, - ISCSI_LOGIN_STATUS_NO_RESOURCES); - return -1; - } - param->state &= ~PSTATE_NEGOTIATE; - - param = iscsi_find_param_from_key("IFMarker", param_list); - if (!param) { - pr_err("iscsi_find_param_from_key() for" - " IFMarker failed\n"); - iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, - ISCSI_LOGIN_STATUS_NO_RESOURCES); - return -1; - } - param->state &= ~PSTATE_NEGOTIATE; - - param = iscsi_find_param_from_key("IFMarkInt", param_list); - if (!param) { - pr_err("iscsi_find_param_from_key() for" - " IFMarker failed\n"); - iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, - ISCSI_LOGIN_STATUS_NO_RESOURCES); - return -1; - } - param->state &= ~PSTATE_NEGOTIATE; - - return 0; -} - static int iscsi_login_non_zero_tsih_s1( struct iscsi_conn *conn, unsigned char *buf) @@ -616,7 +561,7 @@ static int iscsi_login_non_zero_tsih_s2( if (iscsi_change_param_sprintf(conn, "TargetPortalGroupTag=%hu", sess->tpg->tpgt)) return -1; - return iscsi_login_disable_FIM_keys(conn->param_list, conn); + return 0; } int iscsi_login_post_auth_non_zero_tsih( @@ -765,7 +710,6 @@ int iscsi_post_login_handler( conn->conn_state = TARG_CONN_STATE_LOGGED_IN; iscsi_set_connection_parameters(conn->conn_ops, conn->param_list); - iscsit_set_sync_and_steering_values(conn); /* * SCSI Initiator -> SCSI Target Port Mapping */ diff --git a/drivers/target/iscsi/iscsi_target_login.h b/drivers/target/iscsi/iscsi_target_login.h index 29d098324b7f..1c7358081533 100644 --- a/drivers/target/iscsi/iscsi_target_login.h +++ b/drivers/target/iscsi/iscsi_target_login.h @@ -16,6 +16,5 @@ extern int iscsi_post_login_handler(struct iscsi_np *, struct iscsi_conn *, u8); extern void iscsi_target_login_sess_out(struct iscsi_conn *, struct iscsi_np *, bool, bool); extern int iscsi_target_login_thread(void *); -extern int iscsi_login_disable_FIM_keys(struct iscsi_param_list *, struct iscsi_conn *); #endif /*** ISCSI_TARGET_LOGIN_H ***/ diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c index d4f9e9645697..e8a52f7d6204 100644 --- a/drivers/target/iscsi/iscsi_target_parameters.c +++ b/drivers/target/iscsi/iscsi_target_parameters.c @@ -34,13 +34,6 @@ int iscsi_login_rx_data( iov.iov_len = length; iov.iov_base = buf; - /* - * Initial Marker-less Interval. - * Add the values regardless of IFMarker/OFMarker, considering - * it may not be negoitated yet. - */ - conn->of_marker += length; - rx_got = rx_data(conn, &iov, 1, length); if (rx_got != length) { pr_err("rx_data returned %d, expecting %d.\n", @@ -72,13 +65,6 @@ int iscsi_login_tx_data( iov_cnt++; } - /* - * Initial Marker-less Interval. - * Add the values regardless of IFMarker/OFMarker, considering - * it may not be negoitated yet. - */ - conn->if_marker += length; - tx_sent = tx_data(conn, &iov[0], iov_cnt, length); if (tx_sent != length) { pr_err("tx_data returned %d, expecting %d.\n", @@ -97,12 +83,6 @@ void iscsi_dump_conn_ops(struct iscsi_conn_ops *conn_ops) "CRC32C" : "None"); pr_debug("MaxRecvDataSegmentLength: %u\n", conn_ops->MaxRecvDataSegmentLength); - pr_debug("OFMarker: %s\n", (conn_ops->OFMarker) ? "Yes" : "No"); - pr_debug("IFMarker: %s\n", (conn_ops->IFMarker) ? "Yes" : "No"); - if (conn_ops->OFMarker) - pr_debug("OFMarkInt: %u\n", conn_ops->OFMarkInt); - if (conn_ops->IFMarker) - pr_debug("IFMarkInt: %u\n", conn_ops->IFMarkInt); } void iscsi_dump_sess_ops(struct iscsi_sess_ops *sess_ops) @@ -194,10 +174,6 @@ static struct iscsi_param *iscsi_set_default_param(struct iscsi_param_list *para case TYPERANGE_DIGEST: param->type = TYPE_VALUE_LIST | TYPE_STRING; break; - case TYPERANGE_MARKINT: - param->type = TYPE_NUMBER_RANGE; - param->type_range |= TYPERANGE_1_TO_65535; - break; case TYPERANGE_ISCSINAME: case TYPERANGE_SESSIONTYPE: case TYPERANGE_TARGETADDRESS: @@ -422,13 +398,13 @@ int iscsi_create_default_params(struct iscsi_param_list **param_list_ptr) param = iscsi_set_default_param(pl, IFMARKINT, INITIAL_IFMARKINT, PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH, - TYPERANGE_MARKINT, USE_INITIAL_ONLY); + TYPERANGE_UTF8, USE_INITIAL_ONLY); if (!param) goto out; param = iscsi_set_default_param(pl, OFMARKINT, INITIAL_OFMARKINT, PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH, - TYPERANGE_MARKINT, USE_INITIAL_ONLY); + TYPERANGE_UTF8, USE_INITIAL_ONLY); if (!param) goto out; /* @@ -524,9 +500,9 @@ int iscsi_set_keys_to_negotiate( } else if (!strcmp(param->name, OFMARKER)) { SET_PSTATE_NEGOTIATE(param); } else if (!strcmp(param->name, IFMARKINT)) { - SET_PSTATE_NEGOTIATE(param); + SET_PSTATE_REJECT(param); } else if (!strcmp(param->name, OFMARKINT)) { - SET_PSTATE_NEGOTIATE(param); + SET_PSTATE_REJECT(param); } else if (!strcmp(param->name, RDMAEXTENSIONS)) { if (iser) SET_PSTATE_NEGOTIATE(param); @@ -906,91 +882,6 @@ static int iscsi_check_numerical_value(struct iscsi_param *param, char *value_pt return 0; } -static int iscsi_check_numerical_range_value(struct iscsi_param *param, char *value) -{ - char *left_val_ptr = NULL, *right_val_ptr = NULL; - char *tilde_ptr = NULL; - u32 left_val, right_val, local_left_val; - - if (strcmp(param->name, IFMARKINT) && - strcmp(param->name, OFMARKINT)) { - pr_err("Only parameters \"%s\" or \"%s\" may contain a" - " numerical range value.\n", IFMARKINT, OFMARKINT); - return -1; - } - - if (IS_PSTATE_PROPOSER(param)) - return 0; - - tilde_ptr = strchr(value, '~'); - if (!tilde_ptr) { - pr_err("Unable to locate numerical range indicator" - " \"~\" for \"%s\".\n", param->name); - return -1; - } - *tilde_ptr = '\0'; - - left_val_ptr = value; - right_val_ptr = value + strlen(left_val_ptr) + 1; - - if (iscsi_check_numerical_value(param, left_val_ptr) < 0) - return -1; - if (iscsi_check_numerical_value(param, right_val_ptr) < 0) - return -1; - - left_val = simple_strtoul(left_val_ptr, NULL, 0); - right_val = simple_strtoul(right_val_ptr, NULL, 0); - *tilde_ptr = '~'; - - if (right_val < left_val) { - pr_err("Numerical range for parameter \"%s\" contains" - " a right value which is less than the left.\n", - param->name); - return -1; - } - - /* - * For now, enforce reasonable defaults for [I,O]FMarkInt. - */ - tilde_ptr = strchr(param->value, '~'); - if (!tilde_ptr) { - pr_err("Unable to locate numerical range indicator" - " \"~\" for \"%s\".\n", param->name); - return -1; - } - *tilde_ptr = '\0'; - - left_val_ptr = param->value; - right_val_ptr = param->value + strlen(left_val_ptr) + 1; - - local_left_val = simple_strtoul(left_val_ptr, NULL, 0); - *tilde_ptr = '~'; - - if (param->set_param) { - if ((left_val < local_left_val) || - (right_val < local_left_val)) { - pr_err("Passed value range \"%u~%u\" is below" - " minimum left value \"%u\" for key \"%s\"," - " rejecting.\n", left_val, right_val, - local_left_val, param->name); - return -1; - } - } else { - if ((left_val < local_left_val) && - (right_val < local_left_val)) { - pr_err("Received value range \"%u~%u\" is" - " below minimum left value \"%u\" for key" - " \"%s\", rejecting.\n", left_val, right_val, - local_left_val, param->name); - SET_PSTATE_REJECT(param); - if (iscsi_update_param_value(param, REJECT) < 0) - return -1; - } - } - - return 0; -} - static int iscsi_check_string_or_list_value(struct iscsi_param *param, char *value) { if (IS_PSTATE_PROPOSER(param)) @@ -1027,33 +918,6 @@ static int iscsi_check_string_or_list_value(struct iscsi_param *param, char *val return 0; } -/* - * This function is used to pick a value range number, currently just - * returns the lesser of both right values. - */ -static char *iscsi_get_value_from_number_range( - struct iscsi_param *param, - char *value) -{ - char *end_ptr, *tilde_ptr1 = NULL, *tilde_ptr2 = NULL; - u32 acceptor_right_value, proposer_right_value; - - tilde_ptr1 = strchr(value, '~'); - if (!tilde_ptr1) - return NULL; - *tilde_ptr1++ = '\0'; - proposer_right_value = simple_strtoul(tilde_ptr1, &end_ptr, 0); - - tilde_ptr2 = strchr(param->value, '~'); - if (!tilde_ptr2) - return NULL; - *tilde_ptr2++ = '\0'; - acceptor_right_value = simple_strtoul(tilde_ptr2, &end_ptr, 0); - - return (acceptor_right_value >= proposer_right_value) ? - tilde_ptr1 : tilde_ptr2; -} - static char *iscsi_check_valuelist_for_support( struct iscsi_param *param, char *value) @@ -1103,7 +967,7 @@ static int iscsi_check_acceptor_state(struct iscsi_param *param, char *value, struct iscsi_conn *conn) { u8 acceptor_boolean_value = 0, proposer_boolean_value = 0; - char *negoitated_value = NULL; + char *negotiated_value = NULL; if (IS_PSTATE_ACCEPTOR(param)) { pr_err("Received key \"%s\" twice, protocol error.\n", @@ -1203,24 +1067,16 @@ static int iscsi_check_acceptor_state(struct iscsi_param *param, char *value, pr_debug("Updated %s to target MXDSL value: %s\n", param->name, param->value); } - - } else if (IS_TYPE_NUMBER_RANGE(param)) { - negoitated_value = iscsi_get_value_from_number_range( - param, value); - if (!negoitated_value) - return -1; - if (iscsi_update_param_value(param, negoitated_value) < 0) - return -1; } else if (IS_TYPE_VALUE_LIST(param)) { - negoitated_value = iscsi_check_valuelist_for_support( + negotiated_value = iscsi_check_valuelist_for_support( param, value); - if (!negoitated_value) { + if (!negotiated_value) { pr_err("Proposer's value list \"%s\" contains" " no valid values from Acceptor's value list" " \"%s\".\n", value, param->value); return -1; } - if (iscsi_update_param_value(param, negoitated_value) < 0) + if (iscsi_update_param_value(param, negotiated_value) < 0) return -1; } else if (IS_PHASE_DECLARATIVE(param)) { if (iscsi_update_param_value(param, value) < 0) @@ -1239,47 +1095,7 @@ static int iscsi_check_proposer_state(struct iscsi_param *param, char *value) return -1; } - if (IS_TYPE_NUMBER_RANGE(param)) { - u32 left_val = 0, right_val = 0, recieved_value = 0; - char *left_val_ptr = NULL, *right_val_ptr = NULL; - char *tilde_ptr = NULL; - - if (!strcmp(value, IRRELEVANT) || !strcmp(value, REJECT)) { - if (iscsi_update_param_value(param, value) < 0) - return -1; - return 0; - } - - tilde_ptr = strchr(value, '~'); - if (tilde_ptr) { - pr_err("Illegal \"~\" in response for \"%s\".\n", - param->name); - return -1; - } - tilde_ptr = strchr(param->value, '~'); - if (!tilde_ptr) { - pr_err("Unable to locate numerical range" - " indicator \"~\" for \"%s\".\n", param->name); - return -1; - } - *tilde_ptr = '\0'; - - left_val_ptr = param->value; - right_val_ptr = param->value + strlen(left_val_ptr) + 1; - left_val = simple_strtoul(left_val_ptr, NULL, 0); - right_val = simple_strtoul(right_val_ptr, NULL, 0); - recieved_value = simple_strtoul(value, NULL, 0); - - *tilde_ptr = '~'; - - if ((recieved_value < left_val) || - (recieved_value > right_val)) { - pr_err("Illegal response \"%s=%u\", value must" - " be between %u and %u.\n", param->name, - recieved_value, left_val, right_val); - return -1; - } - } else if (IS_TYPE_VALUE_LIST(param)) { + if (IS_TYPE_VALUE_LIST(param)) { char *comma_ptr = NULL, *tmp_ptr = NULL; comma_ptr = strchr(value, ','); @@ -1361,9 +1177,6 @@ static int iscsi_check_value(struct iscsi_param *param, char *value) } else if (IS_TYPE_NUMBER(param)) { if (iscsi_check_numerical_value(param, value) < 0) return -1; - } else if (IS_TYPE_NUMBER_RANGE(param)) { - if (iscsi_check_numerical_range_value(param, value) < 0) - return -1; } else if (IS_TYPE_STRING(param) || IS_TYPE_VALUE_LIST(param)) { if (iscsi_check_string_or_list_value(param, value) < 0) return -1; @@ -1483,8 +1296,6 @@ static int iscsi_enforce_integrity_rules( char *tmpptr; u8 DataSequenceInOrder = 0; u8 ErrorRecoveryLevel = 0, SessionType = 0; - u8 IFMarker = 0, OFMarker = 0; - u8 IFMarkInt_Reject = 1, OFMarkInt_Reject = 1; u32 FirstBurstLength = 0, MaxBurstLength = 0; struct iscsi_param *param = NULL; @@ -1503,28 +1314,12 @@ static int iscsi_enforce_integrity_rules( if (!strcmp(param->name, MAXBURSTLENGTH)) MaxBurstLength = simple_strtoul(param->value, &tmpptr, 0); - if (!strcmp(param->name, IFMARKER)) - if (!strcmp(param->value, YES)) - IFMarker = 1; - if (!strcmp(param->name, OFMARKER)) - if (!strcmp(param->value, YES)) - OFMarker = 1; - if (!strcmp(param->name, IFMARKINT)) - if (!strcmp(param->value, REJECT)) - IFMarkInt_Reject = 1; - if (!strcmp(param->name, OFMARKINT)) - if (!strcmp(param->value, REJECT)) - OFMarkInt_Reject = 1; } list_for_each_entry(param, ¶m_list->param_list, p_list) { if (!(param->phase & phase)) continue; - if (!SessionType && (!IS_PSTATE_ACCEPTOR(param) && - (strcmp(param->name, IFMARKER) && - strcmp(param->name, OFMARKER) && - strcmp(param->name, IFMARKINT) && - strcmp(param->name, OFMARKINT)))) + if (!SessionType && !IS_PSTATE_ACCEPTOR(param)) continue; if (!strcmp(param->name, MAXOUTSTANDINGR2T) && DataSequenceInOrder && (ErrorRecoveryLevel > 0)) { @@ -1556,38 +1351,6 @@ static int iscsi_enforce_integrity_rules( param->name, param->value); } } - if (!strcmp(param->name, IFMARKER) && IFMarkInt_Reject) { - if (iscsi_update_param_value(param, NO) < 0) - return -1; - IFMarker = 0; - pr_debug("Reset \"%s\" to \"%s\".\n", - param->name, param->value); - } - if (!strcmp(param->name, OFMARKER) && OFMarkInt_Reject) { - if (iscsi_update_param_value(param, NO) < 0) - return -1; - OFMarker = 0; - pr_debug("Reset \"%s\" to \"%s\".\n", - param->name, param->value); - } - if (!strcmp(param->name, IFMARKINT) && !IFMarker) { - if (!strcmp(param->value, REJECT)) - continue; - param->state &= ~PSTATE_NEGOTIATE; - if (iscsi_update_param_value(param, IRRELEVANT) < 0) - return -1; - pr_debug("Reset \"%s\" to \"%s\".\n", - param->name, param->value); - } - if (!strcmp(param->name, OFMARKINT) && !OFMarker) { - if (!strcmp(param->value, REJECT)) - continue; - param->state &= ~PSTATE_NEGOTIATE; - if (iscsi_update_param_value(param, IRRELEVANT) < 0) - return -1; - pr_debug("Reset \"%s\" to \"%s\".\n", - param->name, param->value); - } } return 0; @@ -1824,24 +1587,6 @@ void iscsi_set_connection_parameters( */ pr_debug("MaxRecvDataSegmentLength: %u\n", ops->MaxRecvDataSegmentLength); - } else if (!strcmp(param->name, OFMARKER)) { - ops->OFMarker = !strcmp(param->value, YES); - pr_debug("OFMarker: %s\n", - param->value); - } else if (!strcmp(param->name, IFMARKER)) { - ops->IFMarker = !strcmp(param->value, YES); - pr_debug("IFMarker: %s\n", - param->value); - } else if (!strcmp(param->name, OFMARKINT)) { - ops->OFMarkInt = - simple_strtoul(param->value, &tmpptr, 0); - pr_debug("OFMarkInt: %s\n", - param->value); - } else if (!strcmp(param->name, IFMARKINT)) { - ops->IFMarkInt = - simple_strtoul(param->value, &tmpptr, 0); - pr_debug("IFMarkInt: %s\n", - param->value); } else if (!strcmp(param->name, INITIATORRECVDATASEGMENTLENGTH)) { ops->InitiatorRecvDataSegmentLength = simple_strtoul(param->value, &tmpptr, 0); diff --git a/drivers/target/iscsi/iscsi_target_parameters.h b/drivers/target/iscsi/iscsi_target_parameters.h index a47046a752aa..a0751e3f0813 100644 --- a/drivers/target/iscsi/iscsi_target_parameters.h +++ b/drivers/target/iscsi/iscsi_target_parameters.h @@ -138,8 +138,8 @@ extern void iscsi_set_session_parameters(struct iscsi_sess_ops *, #define INITIAL_SESSIONTYPE NORMAL #define INITIAL_IFMARKER NO #define INITIAL_OFMARKER NO -#define INITIAL_IFMARKINT "2048~65535" -#define INITIAL_OFMARKINT "2048~65535" +#define INITIAL_IFMARKINT REJECT +#define INITIAL_OFMARKINT REJECT /* * Initial values for iSER parameters following RFC-5046 Section 6 @@ -239,10 +239,9 @@ extern void iscsi_set_session_parameters(struct iscsi_sess_ops *, #define TYPERANGE_AUTH 0x0200 #define TYPERANGE_DIGEST 0x0400 #define TYPERANGE_ISCSINAME 0x0800 -#define TYPERANGE_MARKINT 0x1000 -#define TYPERANGE_SESSIONTYPE 0x2000 -#define TYPERANGE_TARGETADDRESS 0x4000 -#define TYPERANGE_UTF8 0x8000 +#define TYPERANGE_SESSIONTYPE 0x1000 +#define TYPERANGE_TARGETADDRESS 0x2000 +#define TYPERANGE_UTF8 0x4000 #define IS_TYPERANGE_0_TO_2(p) ((p)->type_range & TYPERANGE_0_TO_2) #define IS_TYPERANGE_0_TO_3600(p) ((p)->type_range & TYPERANGE_0_TO_3600) diff --git a/drivers/target/iscsi/iscsi_target_tmr.c b/drivers/target/iscsi/iscsi_target_tmr.c index fe9a582ca6af..cf59c397007b 100644 --- a/drivers/target/iscsi/iscsi_target_tmr.c +++ b/drivers/target/iscsi/iscsi_target_tmr.c @@ -120,7 +120,7 @@ u8 iscsit_tmr_task_reassign( struct iscsi_tmr_req *tmr_req = cmd->tmr_req; struct se_tmr_req *se_tmr = cmd->se_cmd.se_tmr_req; struct iscsi_tm *hdr = (struct iscsi_tm *) buf; - int ret, ref_lun; + u64 ret, ref_lun; pr_debug("Got TASK_REASSIGN TMR ITT: 0x%08x," " RefTaskTag: 0x%08x, ExpDataSN: 0x%08x, CID: %hu\n", @@ -164,7 +164,7 @@ u8 iscsit_tmr_task_reassign( ref_lun = scsilun_to_int(&hdr->lun); if (ref_lun != ref_cmd->se_cmd.orig_fe_lun) { pr_err("Unable to perform connection recovery for" - " differing ref_lun: %d ref_cmd orig_fe_lun: %u\n", + " differing ref_lun: %llu ref_cmd orig_fe_lun: %llu\n", ref_lun, ref_cmd->se_cmd.orig_fe_lun); return ISCSI_TMF_RSP_REJECTED; } diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 5e3295fe404d..968068ffcb1c 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -18,7 +18,6 @@ #include <target/target_core_base.h> #include <target/target_core_fabric.h> -#include <target/target_core_configfs.h> #include <target/iscsi/iscsi_target_core.h> #include "iscsi_target_erl0.h" @@ -67,9 +66,12 @@ int iscsit_load_discovery_tpg(void) pr_err("Unable to allocate struct iscsi_portal_group\n"); return -1; } - - ret = core_tpg_register(&iscsi_ops, NULL, &tpg->tpg_se_tpg, - tpg, TRANSPORT_TPG_TYPE_DISCOVERY); + /* + * Save iscsi_ops pointer for special case discovery TPG that + * doesn't exist as se_wwn->wwn_group within configfs. + */ + tpg->tpg_se_tpg.se_tpg_tfo = &iscsi_ops; + ret = core_tpg_register(NULL, &tpg->tpg_se_tpg, -1); if (ret < 0) { kfree(tpg); return -1; @@ -280,8 +282,6 @@ int iscsit_tpg_del_portal_group( return -EPERM; } - core_tpg_clear_object_luns(&tpg->tpg_se_tpg); - if (tpg->param_list) { iscsi_release_param_list(tpg->param_list); tpg->param_list = NULL; diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index b18edda3e8af..a2bff0702eb2 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -22,7 +22,6 @@ #include <scsi/iscsi_proto.h> #include <target/target_core_base.h> #include <target/target_core_fabric.h> -#include <target/target_core_configfs.h> #include <target/iscsi/iscsi_transport.h> #include <target/iscsi/iscsi_target_core.h> @@ -746,7 +745,7 @@ void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown) rc = transport_generic_free_cmd(&cmd->se_cmd, shutdown); if (!rc && shutdown && se_cmd && se_cmd->se_sess) { __iscsit_free_cmd(cmd, true, shutdown); - target_put_sess_cmd(se_cmd->se_sess, se_cmd); + target_put_sess_cmd(se_cmd); } break; case ISCSI_OP_REJECT: @@ -762,7 +761,7 @@ void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown) rc = transport_generic_free_cmd(&cmd->se_cmd, shutdown); if (!rc && shutdown && se_cmd->se_sess) { __iscsit_free_cmd(cmd, true, shutdown); - target_put_sess_cmd(se_cmd->se_sess, se_cmd); + target_put_sess_cmd(se_cmd); } break; } @@ -809,54 +808,6 @@ void iscsit_inc_session_usage_count(struct iscsi_session *sess) spin_unlock_bh(&sess->session_usage_lock); } -/* - * Setup conn->if_marker and conn->of_marker values based upon - * the initial marker-less interval. (see iSCSI v19 A.2) - */ -int iscsit_set_sync_and_steering_values(struct iscsi_conn *conn) -{ - int login_ifmarker_count = 0, login_ofmarker_count = 0, next_marker = 0; - /* - * IFMarkInt and OFMarkInt are negotiated as 32-bit words. - */ - u32 IFMarkInt = (conn->conn_ops->IFMarkInt * 4); - u32 OFMarkInt = (conn->conn_ops->OFMarkInt * 4); - - if (conn->conn_ops->OFMarker) { - /* - * Account for the first Login Command received not - * via iscsi_recv_msg(). - */ - conn->of_marker += ISCSI_HDR_LEN; - if (conn->of_marker <= OFMarkInt) { - conn->of_marker = (OFMarkInt - conn->of_marker); - } else { - login_ofmarker_count = (conn->of_marker / OFMarkInt); - next_marker = (OFMarkInt * (login_ofmarker_count + 1)) + - (login_ofmarker_count * MARKER_SIZE); - conn->of_marker = (next_marker - conn->of_marker); - } - conn->of_marker_offset = 0; - pr_debug("Setting OFMarker value to %u based on Initial" - " Markerless Interval.\n", conn->of_marker); - } - - if (conn->conn_ops->IFMarker) { - if (conn->if_marker <= IFMarkInt) { - conn->if_marker = (IFMarkInt - conn->if_marker); - } else { - login_ifmarker_count = (conn->if_marker / IFMarkInt); - next_marker = (IFMarkInt * (login_ifmarker_count + 1)) + - (login_ifmarker_count * MARKER_SIZE); - conn->if_marker = (next_marker - conn->if_marker); - } - pr_debug("Setting IFMarker value to %u based on Initial" - " Markerless Interval.\n", conn->if_marker); - } - - return 0; -} - struct iscsi_conn *iscsit_get_conn_from_cid(struct iscsi_session *sess, u16 cid) { struct iscsi_conn *conn; diff --git a/drivers/target/iscsi/iscsi_target_util.h b/drivers/target/iscsi/iscsi_target_util.h index 1ab754a671ff..995f1cb29d0e 100644 --- a/drivers/target/iscsi/iscsi_target_util.h +++ b/drivers/target/iscsi/iscsi_target_util.h @@ -34,7 +34,6 @@ extern void iscsit_free_cmd(struct iscsi_cmd *, bool); extern int iscsit_check_session_usage_count(struct iscsi_session *); extern void iscsit_dec_session_usage_count(struct iscsi_session *); extern void iscsit_inc_session_usage_count(struct iscsi_session *); -extern int iscsit_set_sync_and_steering_values(struct iscsi_conn *); extern struct iscsi_conn *iscsit_get_conn_from_cid(struct iscsi_session *, u16); extern struct iscsi_conn *iscsit_get_conn_from_cid_rcfr(struct iscsi_session *, u16); extern void iscsit_check_conn_usage_count(struct iscsi_conn *); diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 51f0c895c6a5..a556bdebd775 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -35,14 +35,11 @@ #include <target/target_core_base.h> #include <target/target_core_fabric.h> #include <target/target_core_fabric_configfs.h> -#include <target/target_core_configfs.h> #include "tcm_loop.h" #define to_tcm_loop_hba(hba) container_of(hba, struct tcm_loop_hba, dev) -static const struct target_core_fabric_ops loop_ops; - static struct workqueue_struct *tcm_loop_workqueue; static struct kmem_cache *tcm_loop_cmd_cache; @@ -165,6 +162,7 @@ static void tcm_loop_submission_work(struct work_struct *work) transfer_length = scsi_bufflen(sc); } + se_cmd->tag = tl_cmd->sc_cmd_tag; rc = target_submit_cmd_map_sgls(se_cmd, tl_nexus->se_sess, sc->cmnd, &tl_cmd->tl_sense_buf[0], tl_cmd->sc->device->lun, transfer_length, TCM_SIMPLE_TAG, @@ -217,7 +215,7 @@ static int tcm_loop_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc) * to struct scsi_device */ static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg, - int lun, int task, enum tcm_tmreq_table tmr) + u64 lun, int task, enum tcm_tmreq_table tmr) { struct se_cmd *se_cmd = NULL; struct se_session *se_sess; @@ -409,7 +407,7 @@ static int tcm_loop_driver_probe(struct device *dev) sh->max_id = 2; sh->max_lun = 0; sh->max_channel = 0; - sh->max_cmd_len = TL_SCSI_MAX_CMD_LEN; + sh->max_cmd_len = SCSI_MAX_VARLEN_CDB_SIZE; host_prot = SHOST_DIF_TYPE1_PROTECTION | SHOST_DIF_TYPE2_PROTECTION | SHOST_DIF_TYPE3_PROTECTION | SHOST_DIX_TYPE1_PROTECTION | @@ -520,147 +518,26 @@ static char *tcm_loop_get_fabric_name(void) return "loopback"; } -static u8 tcm_loop_get_fabric_proto_ident(struct se_portal_group *se_tpg) +static inline struct tcm_loop_tpg *tl_tpg(struct se_portal_group *se_tpg) { - struct tcm_loop_tpg *tl_tpg = se_tpg->se_tpg_fabric_ptr; - struct tcm_loop_hba *tl_hba = tl_tpg->tl_hba; - /* - * tl_proto_id is set at tcm_loop_configfs.c:tcm_loop_make_scsi_hba() - * time based on the protocol dependent prefix of the passed configfs group. - * - * Based upon tl_proto_id, TCM_Loop emulates the requested fabric - * ProtocolID using target_core_fabric_lib.c symbols. - */ - switch (tl_hba->tl_proto_id) { - case SCSI_PROTOCOL_SAS: - return sas_get_fabric_proto_ident(se_tpg); - case SCSI_PROTOCOL_FCP: - return fc_get_fabric_proto_ident(se_tpg); - case SCSI_PROTOCOL_ISCSI: - return iscsi_get_fabric_proto_ident(se_tpg); - default: - pr_err("Unknown tl_proto_id: 0x%02x, using" - " SAS emulation\n", tl_hba->tl_proto_id); - break; - } - - return sas_get_fabric_proto_ident(se_tpg); + return container_of(se_tpg, struct tcm_loop_tpg, tl_se_tpg); } static char *tcm_loop_get_endpoint_wwn(struct se_portal_group *se_tpg) { - struct tcm_loop_tpg *tl_tpg = se_tpg->se_tpg_fabric_ptr; /* * Return the passed NAA identifier for the SAS Target Port */ - return &tl_tpg->tl_hba->tl_wwn_address[0]; + return &tl_tpg(se_tpg)->tl_hba->tl_wwn_address[0]; } static u16 tcm_loop_get_tag(struct se_portal_group *se_tpg) { - struct tcm_loop_tpg *tl_tpg = se_tpg->se_tpg_fabric_ptr; /* * This Tag is used when forming SCSI Name identifier in EVPD=1 0x83 * to represent the SCSI Target Port. */ - return tl_tpg->tl_tpgt; -} - -static u32 tcm_loop_get_default_depth(struct se_portal_group *se_tpg) -{ - return 1; -} - -static u32 tcm_loop_get_pr_transport_id( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code, - unsigned char *buf) -{ - struct tcm_loop_tpg *tl_tpg = se_tpg->se_tpg_fabric_ptr; - struct tcm_loop_hba *tl_hba = tl_tpg->tl_hba; - - switch (tl_hba->tl_proto_id) { - case SCSI_PROTOCOL_SAS: - return sas_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); - case SCSI_PROTOCOL_FCP: - return fc_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); - case SCSI_PROTOCOL_ISCSI: - return iscsi_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); - default: - pr_err("Unknown tl_proto_id: 0x%02x, using" - " SAS emulation\n", tl_hba->tl_proto_id); - break; - } - - return sas_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); -} - -static u32 tcm_loop_get_pr_transport_id_len( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code) -{ - struct tcm_loop_tpg *tl_tpg = se_tpg->se_tpg_fabric_ptr; - struct tcm_loop_hba *tl_hba = tl_tpg->tl_hba; - - switch (tl_hba->tl_proto_id) { - case SCSI_PROTOCOL_SAS: - return sas_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); - case SCSI_PROTOCOL_FCP: - return fc_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); - case SCSI_PROTOCOL_ISCSI: - return iscsi_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); - default: - pr_err("Unknown tl_proto_id: 0x%02x, using" - " SAS emulation\n", tl_hba->tl_proto_id); - break; - } - - return sas_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); -} - -/* - * Used for handling SCSI fabric dependent TransportIDs in SPC-3 and above - * Persistent Reservation SPEC_I_PT=1 and PROUT REGISTER_AND_MOVE operations. - */ -static char *tcm_loop_parse_pr_out_transport_id( - struct se_portal_group *se_tpg, - const char *buf, - u32 *out_tid_len, - char **port_nexus_ptr) -{ - struct tcm_loop_tpg *tl_tpg = se_tpg->se_tpg_fabric_ptr; - struct tcm_loop_hba *tl_hba = tl_tpg->tl_hba; - - switch (tl_hba->tl_proto_id) { - case SCSI_PROTOCOL_SAS: - return sas_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); - case SCSI_PROTOCOL_FCP: - return fc_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); - case SCSI_PROTOCOL_ISCSI: - return iscsi_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); - default: - pr_err("Unknown tl_proto_id: 0x%02x, using" - " SAS emulation\n", tl_hba->tl_proto_id); - break; - } - - return sas_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); + return tl_tpg(se_tpg)->tl_tpgt; } /* @@ -703,30 +580,6 @@ static int tcm_loop_check_prot_fabric_only(struct se_portal_group *se_tpg) return tl_tpg->tl_fabric_prot_type; } -static struct se_node_acl *tcm_loop_tpg_alloc_fabric_acl( - struct se_portal_group *se_tpg) -{ - struct tcm_loop_nacl *tl_nacl; - - tl_nacl = kzalloc(sizeof(struct tcm_loop_nacl), GFP_KERNEL); - if (!tl_nacl) { - pr_err("Unable to allocate struct tcm_loop_nacl\n"); - return NULL; - } - - return &tl_nacl->se_node_acl; -} - -static void tcm_loop_tpg_release_fabric_acl( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl) -{ - struct tcm_loop_nacl *tl_nacl = container_of(se_nacl, - struct tcm_loop_nacl, se_node_acl); - - kfree(tl_nacl); -} - static u32 tcm_loop_get_inst_index(struct se_portal_group *se_tpg) { return 1; @@ -742,14 +595,6 @@ static void tcm_loop_set_default_node_attributes(struct se_node_acl *se_acl) return; } -static u32 tcm_loop_get_task_tag(struct se_cmd *se_cmd) -{ - struct tcm_loop_cmd *tl_cmd = container_of(se_cmd, - struct tcm_loop_cmd, tl_se_cmd); - - return tl_cmd->sc_cmd_tag; -} - static int tcm_loop_get_cmd_state(struct se_cmd *se_cmd) { struct tcm_loop_cmd *tl_cmd = container_of(se_cmd, @@ -902,7 +747,7 @@ static void tcm_loop_port_unlink( se_lun->unpacked_lun); if (!sd) { pr_err("Unable to locate struct scsi_device for %d:%d:" - "%d\n", 0, tl_tpg->tl_tpgt, se_lun->unpacked_lun); + "%llu\n", 0, tl_tpg->tl_tpgt, se_lun->unpacked_lun); return; } /* @@ -1234,8 +1079,7 @@ static struct se_portal_group *tcm_loop_make_naa_tpg( /* * Register the tl_tpg as a emulated SAS TCM Target Endpoint */ - ret = core_tpg_register(&loop_ops, wwn, &tl_tpg->tl_se_tpg, tl_tpg, - TRANSPORT_TPG_TYPE_NORMAL); + ret = core_tpg_register(wwn, &tl_tpg->tl_se_tpg, tl_hba->tl_proto_id); if (ret < 0) return ERR_PTR(-ENOMEM); @@ -1386,13 +1230,8 @@ static const struct target_core_fabric_ops loop_ops = { .module = THIS_MODULE, .name = "loopback", .get_fabric_name = tcm_loop_get_fabric_name, - .get_fabric_proto_ident = tcm_loop_get_fabric_proto_ident, .tpg_get_wwn = tcm_loop_get_endpoint_wwn, .tpg_get_tag = tcm_loop_get_tag, - .tpg_get_default_depth = tcm_loop_get_default_depth, - .tpg_get_pr_transport_id = tcm_loop_get_pr_transport_id, - .tpg_get_pr_transport_id_len = tcm_loop_get_pr_transport_id_len, - .tpg_parse_pr_out_transport_id = tcm_loop_parse_pr_out_transport_id, .tpg_check_demo_mode = tcm_loop_check_demo_mode, .tpg_check_demo_mode_cache = tcm_loop_check_demo_mode_cache, .tpg_check_demo_mode_write_protect = @@ -1400,8 +1239,6 @@ static const struct target_core_fabric_ops loop_ops = { .tpg_check_prod_mode_write_protect = tcm_loop_check_prod_mode_write_protect, .tpg_check_prot_fabric_only = tcm_loop_check_prot_fabric_only, - .tpg_alloc_fabric_acl = tcm_loop_tpg_alloc_fabric_acl, - .tpg_release_fabric_acl = tcm_loop_tpg_release_fabric_acl, .tpg_get_inst_index = tcm_loop_get_inst_index, .check_stop_free = tcm_loop_check_stop_free, .release_cmd = tcm_loop_release_cmd, @@ -1411,7 +1248,6 @@ static const struct target_core_fabric_ops loop_ops = { .write_pending = tcm_loop_write_pending, .write_pending_status = tcm_loop_write_pending_status, .set_default_node_attributes = tcm_loop_set_default_node_attributes, - .get_task_tag = tcm_loop_get_task_tag, .get_cmd_state = tcm_loop_get_cmd_state, .queue_data_in = tcm_loop_queue_data_in, .queue_status = tcm_loop_queue_status, diff --git a/drivers/target/loopback/tcm_loop.h b/drivers/target/loopback/tcm_loop.h index 1e72ff77cac9..4346462094a1 100644 --- a/drivers/target/loopback/tcm_loop.h +++ b/drivers/target/loopback/tcm_loop.h @@ -2,11 +2,6 @@ #define TL_WWN_ADDR_LEN 256 #define TL_TPGS_PER_HBA 32 -/* - * Used in tcm_loop_driver_probe() for struct Scsi_Host->max_cmd_len - */ -#define TL_SCSI_MAX_CMD_LEN 32 - struct tcm_loop_cmd { /* State of Linux/SCSI CDB+Data descriptor */ u32 sc_cmd_state; @@ -33,10 +28,6 @@ struct tcm_loop_nexus { struct se_session *se_sess; }; -struct tcm_loop_nacl { - struct se_node_acl se_node_acl; -}; - #define TCM_TRANSPORT_ONLINE 0 #define TCM_TRANSPORT_OFFLINE 1 diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index ce81f17ad1ba..0edf320fb685 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -36,7 +36,6 @@ #include <target/target_core_backend.h> #include <target/target_core_fabric.h> #include <target/target_core_fabric_configfs.h> -#include <target/target_core_configfs.h> #include <target/configfs_macros.h> #include <asm/unaligned.h> @@ -109,13 +108,13 @@ static struct sbp_session *sbp_session_find_by_guid( } static struct sbp_login_descriptor *sbp_login_find_by_lun( - struct sbp_session *session, struct se_lun *lun) + struct sbp_session *session, u32 unpacked_lun) { struct sbp_login_descriptor *login, *found = NULL; spin_lock_bh(&session->lock); list_for_each_entry(login, &session->login_list, link) { - if (login->lun == lun) + if (login->login_lun == unpacked_lun) found = login; } spin_unlock_bh(&session->lock); @@ -125,7 +124,7 @@ static struct sbp_login_descriptor *sbp_login_find_by_lun( static int sbp_login_count_all_by_lun( struct sbp_tpg *tpg, - struct se_lun *lun, + u32 unpacked_lun, int exclusive) { struct se_session *se_sess; @@ -139,7 +138,7 @@ static int sbp_login_count_all_by_lun( spin_lock_bh(&sess->lock); list_for_each_entry(login, &sess->login_list, link) { - if (login->lun != lun) + if (login->login_lun != unpacked_lun) continue; if (!exclusive || login->exclusive) @@ -175,23 +174,23 @@ static struct sbp_login_descriptor *sbp_login_find_by_id( return found; } -static struct se_lun *sbp_get_lun_from_tpg(struct sbp_tpg *tpg, int lun) +static u32 sbp_get_lun_from_tpg(struct sbp_tpg *tpg, u32 login_lun, int *err) { struct se_portal_group *se_tpg = &tpg->se_tpg; struct se_lun *se_lun; - if (lun >= TRANSPORT_MAX_LUNS_PER_TPG) - return ERR_PTR(-EINVAL); - - spin_lock(&se_tpg->tpg_lun_lock); - se_lun = se_tpg->tpg_lun_list[lun]; - - if (se_lun->lun_status != TRANSPORT_LUN_STATUS_ACTIVE) - se_lun = ERR_PTR(-ENODEV); - - spin_unlock(&se_tpg->tpg_lun_lock); + rcu_read_lock(); + hlist_for_each_entry_rcu(se_lun, &se_tpg->tpg_lun_hlist, link) { + if (se_lun->unpacked_lun == login_lun) { + rcu_read_unlock(); + *err = 0; + return login_lun; + } + } + rcu_read_unlock(); - return se_lun; + *err = -ENODEV; + return login_lun; } static struct sbp_session *sbp_session_create( @@ -295,17 +294,16 @@ static void sbp_management_request_login( { struct sbp_tport *tport = agent->tport; struct sbp_tpg *tpg = tport->tpg; - struct se_lun *se_lun; - int ret; - u64 guid; struct sbp_session *sess; struct sbp_login_descriptor *login; struct sbp_login_response_block *response; - int login_response_len; + u64 guid; + u32 unpacked_lun; + int login_response_len, ret; - se_lun = sbp_get_lun_from_tpg(tpg, - LOGIN_ORB_LUN(be32_to_cpu(req->orb.misc))); - if (IS_ERR(se_lun)) { + unpacked_lun = sbp_get_lun_from_tpg(tpg, + LOGIN_ORB_LUN(be32_to_cpu(req->orb.misc)), &ret); + if (ret) { pr_notice("login to unknown LUN: %d\n", LOGIN_ORB_LUN(be32_to_cpu(req->orb.misc))); @@ -326,11 +324,11 @@ static void sbp_management_request_login( } pr_notice("mgt_agent LOGIN to LUN %d from %016llx\n", - se_lun->unpacked_lun, guid); + unpacked_lun, guid); sess = sbp_session_find_by_guid(tpg, guid); if (sess) { - login = sbp_login_find_by_lun(sess, se_lun); + login = sbp_login_find_by_lun(sess, unpacked_lun); if (login) { pr_notice("initiator already logged-in\n"); @@ -358,7 +356,7 @@ static void sbp_management_request_login( * reject with access_denied if any logins present */ if (LOGIN_ORB_EXCLUSIVE(be32_to_cpu(req->orb.misc)) && - sbp_login_count_all_by_lun(tpg, se_lun, 0)) { + sbp_login_count_all_by_lun(tpg, unpacked_lun, 0)) { pr_warn("refusing exclusive login with other active logins\n"); req->status.status = cpu_to_be32( @@ -371,7 +369,7 @@ static void sbp_management_request_login( * check exclusive bit in any existing login descriptor * reject with access_denied if any exclusive logins present */ - if (sbp_login_count_all_by_lun(tpg, se_lun, 1)) { + if (sbp_login_count_all_by_lun(tpg, unpacked_lun, 1)) { pr_warn("refusing login while another exclusive login present\n"); req->status.status = cpu_to_be32( @@ -384,7 +382,7 @@ static void sbp_management_request_login( * check we haven't exceeded the number of allowed logins * reject with resources_unavailable if we have */ - if (sbp_login_count_all_by_lun(tpg, se_lun, 0) >= + if (sbp_login_count_all_by_lun(tpg, unpacked_lun, 0) >= tport->max_logins_per_lun) { pr_warn("max number of logins reached\n"); @@ -440,7 +438,7 @@ static void sbp_management_request_login( } login->sess = sess; - login->lun = se_lun; + login->login_lun = unpacked_lun; login->status_fifo_addr = sbp2_pointer_to_addr(&req->orb.status_fifo); login->exclusive = LOGIN_ORB_EXCLUSIVE(be32_to_cpu(req->orb.misc)); login->login_id = atomic_inc_return(&login_id); @@ -602,7 +600,7 @@ static void sbp_management_request_logout( } pr_info("mgt_agent LOGOUT from LUN %d session %d\n", - login->lun->unpacked_lun, login->login_id); + login->login_lun, login->login_id); if (req->node_addr != login->sess->node_id) { pr_warn("logout from different node ID\n"); @@ -1228,12 +1226,14 @@ static void sbp_handle_command(struct sbp_target_request *req) goto err; } - unpacked_lun = req->login->lun->unpacked_lun; + unpacked_lun = req->login->login_lun; sbp_calc_data_length_direction(req, &data_length, &data_dir); pr_debug("sbp_handle_command ORB:0x%llx unpacked_lun:%d data_len:%d data_dir:%d\n", req->orb_pointer, unpacked_lun, data_length, data_dir); + /* only used for printk until we do TMRs */ + req->se_cmd.tag = req->orb_pointer; if (target_submit_cmd(&req->se_cmd, sess->se_sess, req->cmd_buf, req->sense_buf, unpacked_lun, data_length, TCM_SIMPLE_TAG, data_dir, 0)) @@ -1707,33 +1707,6 @@ static u16 sbp_get_tag(struct se_portal_group *se_tpg) return tpg->tport_tpgt; } -static u32 sbp_get_default_depth(struct se_portal_group *se_tpg) -{ - return 1; -} - -static struct se_node_acl *sbp_alloc_fabric_acl(struct se_portal_group *se_tpg) -{ - struct sbp_nacl *nacl; - - nacl = kzalloc(sizeof(struct sbp_nacl), GFP_KERNEL); - if (!nacl) { - pr_err("Unable to allocate struct sbp_nacl\n"); - return NULL; - } - - return &nacl->se_node_acl; -} - -static void sbp_release_fabric_acl( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl) -{ - struct sbp_nacl *nacl = - container_of(se_nacl, struct sbp_nacl, se_node_acl); - kfree(nacl); -} - static u32 sbp_tpg_get_inst_index(struct se_portal_group *se_tpg) { return 1; @@ -1795,15 +1768,6 @@ static void sbp_set_default_node_attrs(struct se_node_acl *nacl) return; } -static u32 sbp_get_task_tag(struct se_cmd *se_cmd) -{ - struct sbp_target_request *req = container_of(se_cmd, - struct sbp_target_request, se_cmd); - - /* only used for printk until we do TMRs */ - return (u32)req->orb_pointer; -} - static int sbp_get_cmd_state(struct se_cmd *se_cmd) { return 0; @@ -1859,106 +1823,23 @@ static int sbp_check_stop_free(struct se_cmd *se_cmd) return 1; } -/* - * Handlers for Serial Bus Protocol 2/3 (SBP-2 / SBP-3) - */ -static u8 sbp_get_fabric_proto_ident(struct se_portal_group *se_tpg) -{ - /* - * Return a IEEE 1394 SCSI Protocol identifier for loopback operations - * This is defined in section 7.5.1 Table 362 in spc4r17 - */ - return SCSI_PROTOCOL_SBP; -} - -static u32 sbp_get_pr_transport_id( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code, - unsigned char *buf) -{ - int ret; - - /* - * Set PROTOCOL IDENTIFIER to 3h for SBP - */ - buf[0] = SCSI_PROTOCOL_SBP; - /* - * From spc4r17, 7.5.4.4 TransportID for initiator ports using SCSI - * over IEEE 1394 - */ - ret = hex2bin(&buf[8], se_nacl->initiatorname, 8); - if (ret < 0) - pr_debug("sbp transport_id: invalid hex string\n"); - - /* - * The IEEE 1394 Transport ID is a hardcoded 24-byte length - */ - return 24; -} - -static u32 sbp_get_pr_transport_id_len( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code) -{ - *format_code = 0; - /* - * From spc4r17, 7.5.4.4 TransportID for initiator ports using SCSI - * over IEEE 1394 - * - * The SBP Transport ID is a hardcoded 24-byte length - */ - return 24; -} - -/* - * Used for handling SCSI fabric dependent TransportIDs in SPC-3 and above - * Persistent Reservation SPEC_I_PT=1 and PROUT REGISTER_AND_MOVE operations. - */ -static char *sbp_parse_pr_out_transport_id( - struct se_portal_group *se_tpg, - const char *buf, - u32 *out_tid_len, - char **port_nexus_ptr) -{ - /* - * Assume the FORMAT CODE 00b from spc4r17, 7.5.4.4 TransportID - * for initiator ports using SCSI over SBP Serial SCSI Protocol - * - * The TransportID for a IEEE 1394 Initiator Port is of fixed size of - * 24 bytes, and IEEE 1394 does not contain a I_T nexus identifier, - * so we return the **port_nexus_ptr set to NULL. - */ - *port_nexus_ptr = NULL; - *out_tid_len = 24; - - return (char *)&buf[8]; -} - static int sbp_count_se_tpg_luns(struct se_portal_group *tpg) { - int i, count = 0; - - spin_lock(&tpg->tpg_lun_lock); - for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) { - struct se_lun *se_lun = tpg->tpg_lun_list[i]; - - if (se_lun->lun_status == TRANSPORT_LUN_STATUS_FREE) - continue; + struct se_lun *lun; + int count = 0; + rcu_read_lock(); + hlist_for_each_entry_rcu(lun, &tpg->tpg_lun_hlist, link) count++; - } - spin_unlock(&tpg->tpg_lun_lock); + rcu_read_unlock(); return count; } static int sbp_update_unit_directory(struct sbp_tport *tport) { - int num_luns, num_entries, idx = 0, mgt_agt_addr, ret, i; + struct se_lun *lun; + int num_luns, num_entries, idx = 0, mgt_agt_addr, ret; u32 *data; if (tport->unit_directory.data) { @@ -2020,28 +1901,23 @@ static int sbp_update_unit_directory(struct sbp_tport *tport) /* unit unique ID (leaf is just after LUNs) */ data[idx++] = 0x8d000000 | (num_luns + 1); - spin_lock(&tport->tpg->se_tpg.tpg_lun_lock); - for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) { - struct se_lun *se_lun = tport->tpg->se_tpg.tpg_lun_list[i]; + rcu_read_lock(); + hlist_for_each_entry_rcu(lun, &tport->tpg->se_tpg.tpg_lun_hlist, link) { struct se_device *dev; int type; - - if (se_lun->lun_status == TRANSPORT_LUN_STATUS_FREE) - continue; - - spin_unlock(&tport->tpg->se_tpg.tpg_lun_lock); - - dev = se_lun->lun_se_dev; + /* + * rcu_dereference_raw protected by se_lun->lun_group symlink + * reference to se_device->dev_group. + */ + dev = rcu_dereference_raw(lun->lun_se_dev); type = dev->transport->get_device_type(dev); /* logical_unit_number */ data[idx++] = 0x14000000 | ((type << 16) & 0x1f0000) | - (se_lun->unpacked_lun & 0xffff); - - spin_lock(&tport->tpg->se_tpg.tpg_lun_lock); + (lun->unpacked_lun & 0xffff); } - spin_unlock(&tport->tpg->se_tpg.tpg_lun_lock); + rcu_read_unlock(); /* unit unique ID leaf */ data[idx++] = 2 << 16; @@ -2100,48 +1976,13 @@ static ssize_t sbp_format_wwn(char *buf, size_t len, u64 wwn) return snprintf(buf, len, "%016llx", wwn); } -static struct se_node_acl *sbp_make_nodeacl( - struct se_portal_group *se_tpg, - struct config_group *group, - const char *name) +static int sbp_init_nodeacl(struct se_node_acl *se_nacl, const char *name) { - struct se_node_acl *se_nacl, *se_nacl_new; - struct sbp_nacl *nacl; u64 guid = 0; - u32 nexus_depth = 1; if (sbp_parse_wwn(name, &guid) < 0) - return ERR_PTR(-EINVAL); - - se_nacl_new = sbp_alloc_fabric_acl(se_tpg); - if (!se_nacl_new) - return ERR_PTR(-ENOMEM); - - /* - * se_nacl_new may be released by core_tpg_add_initiator_node_acl() - * when converting a NodeACL from demo mode -> explict - */ - se_nacl = core_tpg_add_initiator_node_acl(se_tpg, se_nacl_new, - name, nexus_depth); - if (IS_ERR(se_nacl)) { - sbp_release_fabric_acl(se_tpg, se_nacl_new); - return se_nacl; - } - - nacl = container_of(se_nacl, struct sbp_nacl, se_node_acl); - nacl->guid = guid; - sbp_format_wwn(nacl->iport_name, SBP_NAMELEN, guid); - - return se_nacl; -} - -static void sbp_drop_nodeacl(struct se_node_acl *se_acl) -{ - struct sbp_nacl *nacl = - container_of(se_acl, struct sbp_nacl, se_node_acl); - - core_tpg_del_initiator_node_acl(se_acl->se_tpg, se_acl, 1); - kfree(nacl); + return -EINVAL; + return 0; } static int sbp_post_link_lun( @@ -2214,8 +2055,7 @@ static struct se_portal_group *sbp_make_tpg( goto out_free_tpg; } - ret = core_tpg_register(&sbp_ops, wwn, &tpg->se_tpg, tpg, - TRANSPORT_TPG_TYPE_NORMAL); + ret = core_tpg_register(wwn, &tpg->se_tpg, SCSI_PROTOCOL_SBP); if (ret < 0) goto out_unreg_mgt_agt; @@ -2505,19 +2345,12 @@ static const struct target_core_fabric_ops sbp_ops = { .module = THIS_MODULE, .name = "sbp", .get_fabric_name = sbp_get_fabric_name, - .get_fabric_proto_ident = sbp_get_fabric_proto_ident, .tpg_get_wwn = sbp_get_fabric_wwn, .tpg_get_tag = sbp_get_tag, - .tpg_get_default_depth = sbp_get_default_depth, - .tpg_get_pr_transport_id = sbp_get_pr_transport_id, - .tpg_get_pr_transport_id_len = sbp_get_pr_transport_id_len, - .tpg_parse_pr_out_transport_id = sbp_parse_pr_out_transport_id, .tpg_check_demo_mode = sbp_check_true, .tpg_check_demo_mode_cache = sbp_check_true, .tpg_check_demo_mode_write_protect = sbp_check_false, .tpg_check_prod_mode_write_protect = sbp_check_false, - .tpg_alloc_fabric_acl = sbp_alloc_fabric_acl, - .tpg_release_fabric_acl = sbp_release_fabric_acl, .tpg_get_inst_index = sbp_tpg_get_inst_index, .release_cmd = sbp_release_cmd, .shutdown_session = sbp_shutdown_session, @@ -2526,7 +2359,6 @@ static const struct target_core_fabric_ops sbp_ops = { .write_pending = sbp_write_pending, .write_pending_status = sbp_write_pending_status, .set_default_node_attributes = sbp_set_default_node_attrs, - .get_task_tag = sbp_get_task_tag, .get_cmd_state = sbp_get_cmd_state, .queue_data_in = sbp_queue_data_in, .queue_status = sbp_queue_status, @@ -2542,8 +2374,7 @@ static const struct target_core_fabric_ops sbp_ops = { .fabric_pre_unlink = sbp_pre_unlink_lun, .fabric_make_np = NULL, .fabric_drop_np = NULL, - .fabric_make_nodeacl = sbp_make_nodeacl, - .fabric_drop_nodeacl = sbp_drop_nodeacl, + .fabric_init_nodeacl = sbp_init_nodeacl, .tfc_wwn_attrs = sbp_wwn_attrs, .tfc_tpg_base_attrs = sbp_tpg_base_attrs, diff --git a/drivers/target/sbp/sbp_target.h b/drivers/target/sbp/sbp_target.h index 6d0d74a2c545..73bcb1208832 100644 --- a/drivers/target/sbp/sbp_target.h +++ b/drivers/target/sbp/sbp_target.h @@ -125,7 +125,7 @@ struct sbp_login_descriptor { struct sbp_session *sess; struct list_head link; - struct se_lun *lun; + u32 login_lun; u64 status_fifo_addr; int exclusive; @@ -151,15 +151,6 @@ struct sbp_session { u64 reconnect_expires; }; -struct sbp_nacl { - /* Initiator EUI-64 */ - u64 guid; - /* ASCII formatted GUID for SBP Initiator port */ - char iport_name[SBP_NAMELEN]; - /* Returned by sbp_make_nodeacl() */ - struct se_node_acl se_node_acl; -}; - struct sbp_tpg { /* Target portal group tag for TCM */ u16 tport_tpgt; diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 8ca373774276..49aba4a31747 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -34,7 +34,6 @@ #include <target/target_core_base.h> #include <target/target_core_backend.h> #include <target/target_core_fabric.h> -#include <target/target_core_configfs.h> #include "target_core_internal.h" #include "target_core_alua.h" @@ -43,11 +42,13 @@ static sense_reason_t core_alua_check_transition(int state, int valid, int *primary); static int core_alua_set_tg_pt_secondary_state( - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem, - struct se_port *port, int explicit, int offline); + struct se_lun *lun, int explicit, int offline); static char *core_alua_dump_state(int state); +static void __target_attach_tg_pt_gp(struct se_lun *lun, + struct t10_alua_tg_pt_gp *tg_pt_gp); + static u16 alua_lu_gps_counter; static u32 alua_lu_gps_count; @@ -145,9 +146,8 @@ sense_reason_t target_emulate_report_target_port_groups(struct se_cmd *cmd) { struct se_device *dev = cmd->se_dev; - struct se_port *port; struct t10_alua_tg_pt_gp *tg_pt_gp; - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem; + struct se_lun *lun; unsigned char *buf; u32 rd_len = 0, off; int ext_hdr = (cmd->t_task_cdb[1] & 0x20); @@ -222,9 +222,8 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd) rd_len += 8; spin_lock(&tg_pt_gp->tg_pt_gp_lock); - list_for_each_entry(tg_pt_gp_mem, &tg_pt_gp->tg_pt_gp_mem_list, - tg_pt_gp_mem_list) { - port = tg_pt_gp_mem->tg_pt; + list_for_each_entry(lun, &tg_pt_gp->tg_pt_gp_lun_list, + lun_tg_pt_gp_link) { /* * Start Target Port descriptor format * @@ -234,8 +233,8 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd) /* * Set RELATIVE TARGET PORT IDENTIFIER */ - buf[off++] = ((port->sep_rtpi >> 8) & 0xff); - buf[off++] = (port->sep_rtpi & 0xff); + buf[off++] = ((lun->lun_rtpi >> 8) & 0xff); + buf[off++] = (lun->lun_rtpi & 0xff); rd_len += 4; } spin_unlock(&tg_pt_gp->tg_pt_gp_lock); @@ -259,15 +258,11 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd) * this CDB was received upon to determine this value individually * for ALUA target port group. */ - port = cmd->se_lun->lun_sep; - tg_pt_gp_mem = port->sep_alua_tg_pt_gp_mem; - if (tg_pt_gp_mem) { - spin_lock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); - tg_pt_gp = tg_pt_gp_mem->tg_pt_gp; - if (tg_pt_gp) - buf[5] = tg_pt_gp->tg_pt_gp_implicit_trans_secs; - spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); - } + spin_lock(&cmd->se_lun->lun_tg_pt_gp_lock); + tg_pt_gp = cmd->se_lun->lun_tg_pt_gp; + if (tg_pt_gp) + buf[5] = tg_pt_gp->tg_pt_gp_implicit_trans_secs; + spin_unlock(&cmd->se_lun->lun_tg_pt_gp_lock); } transport_kunmap_data_sg(cmd); @@ -284,10 +279,9 @@ sense_reason_t target_emulate_set_target_port_groups(struct se_cmd *cmd) { struct se_device *dev = cmd->se_dev; - struct se_port *port, *l_port = cmd->se_lun->lun_sep; + struct se_lun *l_lun = cmd->se_lun; struct se_node_acl *nacl = cmd->se_sess->se_node_acl; struct t10_alua_tg_pt_gp *tg_pt_gp = NULL, *l_tg_pt_gp; - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem, *l_tg_pt_gp_mem; unsigned char *buf; unsigned char *ptr; sense_reason_t rc = TCM_NO_SENSE; @@ -295,9 +289,6 @@ target_emulate_set_target_port_groups(struct se_cmd *cmd) int alua_access_state, primary = 0, valid_states; u16 tg_pt_id, rtpi; - if (!l_port) - return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - if (cmd->data_length < 4) { pr_warn("SET TARGET PORT GROUPS parameter list length %u too" " small\n", cmd->data_length); @@ -312,29 +303,24 @@ target_emulate_set_target_port_groups(struct se_cmd *cmd) * Determine if explicit ALUA via SET_TARGET_PORT_GROUPS is allowed * for the local tg_pt_gp. */ - l_tg_pt_gp_mem = l_port->sep_alua_tg_pt_gp_mem; - if (!l_tg_pt_gp_mem) { - pr_err("Unable to access l_port->sep_alua_tg_pt_gp_mem\n"); - rc = TCM_UNSUPPORTED_SCSI_OPCODE; - goto out; - } - spin_lock(&l_tg_pt_gp_mem->tg_pt_gp_mem_lock); - l_tg_pt_gp = l_tg_pt_gp_mem->tg_pt_gp; + spin_lock(&l_lun->lun_tg_pt_gp_lock); + l_tg_pt_gp = l_lun->lun_tg_pt_gp; if (!l_tg_pt_gp) { - spin_unlock(&l_tg_pt_gp_mem->tg_pt_gp_mem_lock); - pr_err("Unable to access *l_tg_pt_gp_mem->tg_pt_gp\n"); + spin_unlock(&l_lun->lun_tg_pt_gp_lock); + pr_err("Unable to access l_lun->tg_pt_gp\n"); rc = TCM_UNSUPPORTED_SCSI_OPCODE; goto out; } - spin_unlock(&l_tg_pt_gp_mem->tg_pt_gp_mem_lock); if (!(l_tg_pt_gp->tg_pt_gp_alua_access_type & TPGS_EXPLICIT_ALUA)) { + spin_unlock(&l_lun->lun_tg_pt_gp_lock); pr_debug("Unable to process SET_TARGET_PORT_GROUPS" " while TPGS_EXPLICIT_ALUA is disabled\n"); rc = TCM_UNSUPPORTED_SCSI_OPCODE; goto out; } valid_states = l_tg_pt_gp->tg_pt_gp_alua_supported_states; + spin_unlock(&l_lun->lun_tg_pt_gp_lock); ptr = &buf[4]; /* Skip over RESERVED area in header */ @@ -396,7 +382,7 @@ target_emulate_set_target_port_groups(struct se_cmd *cmd) spin_unlock(&dev->t10_alua.tg_pt_gps_lock); if (!core_alua_do_port_transition(tg_pt_gp, - dev, l_port, nacl, + dev, l_lun, nacl, alua_access_state, 1)) found = true; @@ -406,6 +392,8 @@ target_emulate_set_target_port_groups(struct se_cmd *cmd) } spin_unlock(&dev->t10_alua.tg_pt_gps_lock); } else { + struct se_lun *lun; + /* * Extract the RELATIVE TARGET PORT IDENTIFIER to identify * the Target Port in question for the the incoming @@ -417,17 +405,16 @@ target_emulate_set_target_port_groups(struct se_cmd *cmd) * for the struct se_device storage object. */ spin_lock(&dev->se_port_lock); - list_for_each_entry(port, &dev->dev_sep_list, - sep_list) { - if (port->sep_rtpi != rtpi) + list_for_each_entry(lun, &dev->dev_sep_list, + lun_dev_link) { + if (lun->lun_rtpi != rtpi) continue; - tg_pt_gp_mem = port->sep_alua_tg_pt_gp_mem; - + // XXX: racy unlock spin_unlock(&dev->se_port_lock); if (!core_alua_set_tg_pt_secondary_state( - tg_pt_gp_mem, port, 1, 1)) + lun, 1, 1)) found = true; spin_lock(&dev->se_port_lock); @@ -696,9 +683,7 @@ target_alua_state_check(struct se_cmd *cmd) struct se_device *dev = cmd->se_dev; unsigned char *cdb = cmd->t_task_cdb; struct se_lun *lun = cmd->se_lun; - struct se_port *port = lun->lun_sep; struct t10_alua_tg_pt_gp *tg_pt_gp; - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem; int out_alua_state, nonop_delay_msecs; if (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE) @@ -706,33 +691,27 @@ target_alua_state_check(struct se_cmd *cmd) if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return 0; - if (!port) - return 0; /* * First, check for a struct se_port specific secondary ALUA target port * access state: OFFLINE */ - if (atomic_read(&port->sep_tg_pt_secondary_offline)) { + if (atomic_read(&lun->lun_tg_pt_secondary_offline)) { pr_debug("ALUA: Got secondary offline status for local" " target port\n"); set_ascq(cmd, ASCQ_04H_ALUA_OFFLINE); return TCM_CHECK_CONDITION_NOT_READY; } - /* - * Second, obtain the struct t10_alua_tg_pt_gp_member pointer to the - * ALUA target port group, to obtain current ALUA access state. - * Otherwise look for the underlying struct se_device association with - * a ALUA logical unit group. - */ - tg_pt_gp_mem = port->sep_alua_tg_pt_gp_mem; - if (!tg_pt_gp_mem) + + if (!lun->lun_tg_pt_gp) return 0; - spin_lock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); - tg_pt_gp = tg_pt_gp_mem->tg_pt_gp; + spin_lock(&lun->lun_tg_pt_gp_lock); + tg_pt_gp = lun->lun_tg_pt_gp; out_alua_state = atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state); nonop_delay_msecs = tg_pt_gp->tg_pt_gp_nonop_delay_msecs; - spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); + + // XXX: keeps using tg_pt_gp witout reference after unlock + spin_unlock(&lun->lun_tg_pt_gp_lock); /* * Process ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED in a separate conditional * statement so the compiler knows explicitly to check this case first. @@ -764,7 +743,7 @@ target_alua_state_check(struct se_cmd *cmd) break; /* * OFFLINE is a secondary ALUA target port group access state, that is - * handled above with struct se_port->sep_tg_pt_secondary_offline=1 + * handled above with struct se_lun->lun_tg_pt_secondary_offline=1 */ case ALUA_ACCESS_STATE_OFFLINE: default: @@ -906,10 +885,6 @@ int core_alua_check_nonop_delay( } EXPORT_SYMBOL(core_alua_check_nonop_delay); -/* - * Called with tg_pt_gp->tg_pt_gp_md_mutex or tg_pt_gp_mem->sep_tg_pt_md_mutex - * - */ static int core_alua_write_tpg_metadata( const char *path, unsigned char *md_buf, @@ -965,22 +940,15 @@ static int core_alua_update_tpg_primary_metadata( return rc; } -static void core_alua_do_transition_tg_pt_work(struct work_struct *work) +static void core_alua_queue_state_change_ua(struct t10_alua_tg_pt_gp *tg_pt_gp) { - struct t10_alua_tg_pt_gp *tg_pt_gp = container_of(work, - struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work.work); - struct se_device *dev = tg_pt_gp->tg_pt_gp_dev; struct se_dev_entry *se_deve; + struct se_lun *lun; struct se_lun_acl *lacl; - struct se_port *port; - struct t10_alua_tg_pt_gp_member *mem; - bool explicit = (tg_pt_gp->tg_pt_gp_alua_access_status == - ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG); spin_lock(&tg_pt_gp->tg_pt_gp_lock); - list_for_each_entry(mem, &tg_pt_gp->tg_pt_gp_mem_list, - tg_pt_gp_mem_list) { - port = mem->tg_pt; + list_for_each_entry(lun, &tg_pt_gp->tg_pt_gp_lun_list, + lun_tg_pt_gp_link) { /* * After an implicit target port asymmetric access state * change, a device server shall establish a unit attention @@ -995,38 +963,58 @@ static void core_alua_do_transition_tg_pt_work(struct work_struct *work) * every I_T nexus other than the I_T nexus on which the SET * TARGET PORT GROUPS command */ - atomic_inc_mb(&mem->tg_pt_gp_mem_ref_cnt); + if (!percpu_ref_tryget_live(&lun->lun_ref)) + continue; spin_unlock(&tg_pt_gp->tg_pt_gp_lock); - spin_lock_bh(&port->sep_alua_lock); - list_for_each_entry(se_deve, &port->sep_alua_list, - alua_port_list) { - lacl = se_deve->se_lun_acl; + spin_lock(&lun->lun_deve_lock); + list_for_each_entry(se_deve, &lun->lun_deve_list, lun_link) { + lacl = rcu_dereference_check(se_deve->se_lun_acl, + lockdep_is_held(&lun->lun_deve_lock)); + /* - * se_deve->se_lun_acl pointer may be NULL for a - * entry created without explicit Node+MappedLUN ACLs + * spc4r37 p.242: + * After an explicit target port asymmetric access + * state change, a device server shall establish a + * unit attention condition with the additional sense + * code set to ASYMMETRIC ACCESS STATE CHANGED for + * the initiator port associated with every I_T nexus + * other than the I_T nexus on which the SET TARGET + * PORT GROUPS command was received. */ - if (!lacl) - continue; - if ((tg_pt_gp->tg_pt_gp_alua_access_status == ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG) && - (tg_pt_gp->tg_pt_gp_alua_nacl != NULL) && - (tg_pt_gp->tg_pt_gp_alua_nacl == lacl->se_lun_nacl) && - (tg_pt_gp->tg_pt_gp_alua_port != NULL) && - (tg_pt_gp->tg_pt_gp_alua_port == port)) + (tg_pt_gp->tg_pt_gp_alua_lun != NULL) && + (tg_pt_gp->tg_pt_gp_alua_lun == lun)) continue; - core_scsi3_ua_allocate(lacl->se_lun_nacl, - se_deve->mapped_lun, 0x2A, + /* + * se_deve->se_lun_acl pointer may be NULL for a + * entry created without explicit Node+MappedLUN ACLs + */ + if (lacl && (tg_pt_gp->tg_pt_gp_alua_nacl != NULL) && + (tg_pt_gp->tg_pt_gp_alua_nacl == lacl->se_lun_nacl)) + continue; + + core_scsi3_ua_allocate(se_deve, 0x2A, ASCQ_2AH_ASYMMETRIC_ACCESS_STATE_CHANGED); } - spin_unlock_bh(&port->sep_alua_lock); + spin_unlock(&lun->lun_deve_lock); spin_lock(&tg_pt_gp->tg_pt_gp_lock); - atomic_dec_mb(&mem->tg_pt_gp_mem_ref_cnt); + percpu_ref_put(&lun->lun_ref); } spin_unlock(&tg_pt_gp->tg_pt_gp_lock); +} + +static void core_alua_do_transition_tg_pt_work(struct work_struct *work) +{ + struct t10_alua_tg_pt_gp *tg_pt_gp = container_of(work, + struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work.work); + struct se_device *dev = tg_pt_gp->tg_pt_gp_dev; + bool explicit = (tg_pt_gp->tg_pt_gp_alua_access_status == + ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG); + /* * Update the ALUA metadata buf that has been allocated in * core_alua_do_port_transition(), this metadata will be written @@ -1056,6 +1044,9 @@ static void core_alua_do_transition_tg_pt_work(struct work_struct *work) tg_pt_gp->tg_pt_gp_id, core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_previous_state), core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_pending_state)); + + core_alua_queue_state_change_ua(tg_pt_gp); + spin_lock(&dev->t10_alua.tg_pt_gps_lock); atomic_dec(&tg_pt_gp->tg_pt_gp_ref_cnt); spin_unlock(&dev->t10_alua.tg_pt_gps_lock); @@ -1108,6 +1099,8 @@ static int core_alua_do_transition_tg_pt( ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG : ALUA_STATUS_ALTERED_BY_IMPLICIT_ALUA; + core_alua_queue_state_change_ua(tg_pt_gp); + /* * Check for the optional ALUA primary state transition delay */ @@ -1142,7 +1135,7 @@ static int core_alua_do_transition_tg_pt( int core_alua_do_port_transition( struct t10_alua_tg_pt_gp *l_tg_pt_gp, struct se_device *l_dev, - struct se_port *l_port, + struct se_lun *l_lun, struct se_node_acl *l_nacl, int new_state, int explicit) @@ -1172,7 +1165,7 @@ int core_alua_do_port_transition( * core_alua_do_transition_tg_pt() will always return * success. */ - l_tg_pt_gp->tg_pt_gp_alua_port = l_port; + l_tg_pt_gp->tg_pt_gp_alua_lun = l_lun; l_tg_pt_gp->tg_pt_gp_alua_nacl = l_nacl; rc = core_alua_do_transition_tg_pt(l_tg_pt_gp, new_state, explicit); @@ -1211,10 +1204,10 @@ int core_alua_do_port_transition( continue; if (l_tg_pt_gp == tg_pt_gp) { - tg_pt_gp->tg_pt_gp_alua_port = l_port; + tg_pt_gp->tg_pt_gp_alua_lun = l_lun; tg_pt_gp->tg_pt_gp_alua_nacl = l_nacl; } else { - tg_pt_gp->tg_pt_gp_alua_port = NULL; + tg_pt_gp->tg_pt_gp_alua_lun = NULL; tg_pt_gp->tg_pt_gp_alua_nacl = NULL; } atomic_inc_mb(&tg_pt_gp->tg_pt_gp_ref_cnt); @@ -1251,22 +1244,20 @@ int core_alua_do_port_transition( return rc; } -/* - * Called with tg_pt_gp_mem->sep_tg_pt_md_mutex held - */ -static int core_alua_update_tpg_secondary_metadata( - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem, - struct se_port *port) +static int core_alua_update_tpg_secondary_metadata(struct se_lun *lun) { + struct se_portal_group *se_tpg = lun->lun_tpg; unsigned char *md_buf; - struct se_portal_group *se_tpg = port->sep_tpg; char path[ALUA_METADATA_PATH_LEN], wwn[ALUA_SECONDARY_METADATA_WWN_LEN]; int len, rc; + mutex_lock(&lun->lun_tg_pt_md_mutex); + md_buf = kzalloc(ALUA_MD_BUF_LEN, GFP_KERNEL); if (!md_buf) { pr_err("Unable to allocate buf for ALUA metadata\n"); - return -ENOMEM; + rc = -ENOMEM; + goto out_unlock; } memset(path, 0, ALUA_METADATA_PATH_LEN); @@ -1281,32 +1272,33 @@ static int core_alua_update_tpg_secondary_metadata( len = snprintf(md_buf, ALUA_MD_BUF_LEN, "alua_tg_pt_offline=%d\n" "alua_tg_pt_status=0x%02x\n", - atomic_read(&port->sep_tg_pt_secondary_offline), - port->sep_tg_pt_secondary_stat); + atomic_read(&lun->lun_tg_pt_secondary_offline), + lun->lun_tg_pt_secondary_stat); - snprintf(path, ALUA_METADATA_PATH_LEN, "/var/target/alua/%s/%s/lun_%u", + snprintf(path, ALUA_METADATA_PATH_LEN, "/var/target/alua/%s/%s/lun_%llu", se_tpg->se_tpg_tfo->get_fabric_name(), wwn, - port->sep_lun->unpacked_lun); + lun->unpacked_lun); rc = core_alua_write_tpg_metadata(path, md_buf, len); kfree(md_buf); +out_unlock: + mutex_unlock(&lun->lun_tg_pt_md_mutex); return rc; } static int core_alua_set_tg_pt_secondary_state( - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem, - struct se_port *port, + struct se_lun *lun, int explicit, int offline) { struct t10_alua_tg_pt_gp *tg_pt_gp; int trans_delay_msecs; - spin_lock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); - tg_pt_gp = tg_pt_gp_mem->tg_pt_gp; + spin_lock(&lun->lun_tg_pt_gp_lock); + tg_pt_gp = lun->lun_tg_pt_gp; if (!tg_pt_gp) { - spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); + spin_unlock(&lun->lun_tg_pt_gp_lock); pr_err("Unable to complete secondary state" " transition\n"); return -EINVAL; @@ -1314,14 +1306,14 @@ static int core_alua_set_tg_pt_secondary_state( trans_delay_msecs = tg_pt_gp->tg_pt_gp_trans_delay_msecs; /* * Set the secondary ALUA target port access state to OFFLINE - * or release the previously secondary state for struct se_port + * or release the previously secondary state for struct se_lun */ if (offline) - atomic_set(&port->sep_tg_pt_secondary_offline, 1); + atomic_set(&lun->lun_tg_pt_secondary_offline, 1); else - atomic_set(&port->sep_tg_pt_secondary_offline, 0); + atomic_set(&lun->lun_tg_pt_secondary_offline, 0); - port->sep_tg_pt_secondary_stat = (explicit) ? + lun->lun_tg_pt_secondary_stat = (explicit) ? ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG : ALUA_STATUS_ALTERED_BY_IMPLICIT_ALUA; @@ -1330,7 +1322,7 @@ static int core_alua_set_tg_pt_secondary_state( "implicit", config_item_name(&tg_pt_gp->tg_pt_gp_group.cg_item), tg_pt_gp->tg_pt_gp_id, (offline) ? "OFFLINE" : "ONLINE"); - spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); + spin_unlock(&lun->lun_tg_pt_gp_lock); /* * Do the optional transition delay after we set the secondary * ALUA access state. @@ -1341,11 +1333,8 @@ static int core_alua_set_tg_pt_secondary_state( * See if we need to update the ALUA fabric port metadata for * secondary state and status */ - if (port->sep_tg_pt_secondary_write_md) { - mutex_lock(&port->sep_tg_pt_md_mutex); - core_alua_update_tpg_secondary_metadata(tg_pt_gp_mem, port); - mutex_unlock(&port->sep_tg_pt_md_mutex); - } + if (lun->lun_tg_pt_secondary_write_md) + core_alua_update_tpg_secondary_metadata(lun); return 0; } @@ -1699,7 +1688,7 @@ struct t10_alua_tg_pt_gp *core_alua_allocate_tg_pt_gp(struct se_device *dev, return NULL; } INIT_LIST_HEAD(&tg_pt_gp->tg_pt_gp_list); - INIT_LIST_HEAD(&tg_pt_gp->tg_pt_gp_mem_list); + INIT_LIST_HEAD(&tg_pt_gp->tg_pt_gp_lun_list); mutex_init(&tg_pt_gp->tg_pt_gp_md_mutex); spin_lock_init(&tg_pt_gp->tg_pt_gp_lock); atomic_set(&tg_pt_gp->tg_pt_gp_ref_cnt, 0); @@ -1793,32 +1782,11 @@ again: return 0; } -struct t10_alua_tg_pt_gp_member *core_alua_allocate_tg_pt_gp_mem( - struct se_port *port) -{ - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem; - - tg_pt_gp_mem = kmem_cache_zalloc(t10_alua_tg_pt_gp_mem_cache, - GFP_KERNEL); - if (!tg_pt_gp_mem) { - pr_err("Unable to allocate struct t10_alua_tg_pt_gp_member\n"); - return ERR_PTR(-ENOMEM); - } - INIT_LIST_HEAD(&tg_pt_gp_mem->tg_pt_gp_mem_list); - spin_lock_init(&tg_pt_gp_mem->tg_pt_gp_mem_lock); - atomic_set(&tg_pt_gp_mem->tg_pt_gp_mem_ref_cnt, 0); - - tg_pt_gp_mem->tg_pt = port; - port->sep_alua_tg_pt_gp_mem = tg_pt_gp_mem; - - return tg_pt_gp_mem; -} - void core_alua_free_tg_pt_gp( struct t10_alua_tg_pt_gp *tg_pt_gp) { struct se_device *dev = tg_pt_gp->tg_pt_gp_dev; - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem, *tg_pt_gp_mem_tmp; + struct se_lun *lun, *next; /* * Once we have reached this point, config_item_put() has already @@ -1849,30 +1817,24 @@ void core_alua_free_tg_pt_gp( * struct se_port. */ spin_lock(&tg_pt_gp->tg_pt_gp_lock); - list_for_each_entry_safe(tg_pt_gp_mem, tg_pt_gp_mem_tmp, - &tg_pt_gp->tg_pt_gp_mem_list, tg_pt_gp_mem_list) { - if (tg_pt_gp_mem->tg_pt_gp_assoc) { - list_del(&tg_pt_gp_mem->tg_pt_gp_mem_list); - tg_pt_gp->tg_pt_gp_members--; - tg_pt_gp_mem->tg_pt_gp_assoc = 0; - } + list_for_each_entry_safe(lun, next, + &tg_pt_gp->tg_pt_gp_lun_list, lun_tg_pt_gp_link) { + list_del_init(&lun->lun_tg_pt_gp_link); + tg_pt_gp->tg_pt_gp_members--; + spin_unlock(&tg_pt_gp->tg_pt_gp_lock); /* - * tg_pt_gp_mem is associated with a single - * se_port->sep_alua_tg_pt_gp_mem, and is released via - * core_alua_free_tg_pt_gp_mem(). - * * If the passed tg_pt_gp does NOT match the default_tg_pt_gp, * assume we want to re-associate a given tg_pt_gp_mem with * default_tg_pt_gp. */ - spin_lock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); + spin_lock(&lun->lun_tg_pt_gp_lock); if (tg_pt_gp != dev->t10_alua.default_tg_pt_gp) { - __core_alua_attach_tg_pt_gp_mem(tg_pt_gp_mem, + __target_attach_tg_pt_gp(lun, dev->t10_alua.default_tg_pt_gp); } else - tg_pt_gp_mem->tg_pt_gp = NULL; - spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); + lun->lun_tg_pt_gp = NULL; + spin_unlock(&lun->lun_tg_pt_gp_lock); spin_lock(&tg_pt_gp->tg_pt_gp_lock); } @@ -1881,35 +1843,6 @@ void core_alua_free_tg_pt_gp( kmem_cache_free(t10_alua_tg_pt_gp_cache, tg_pt_gp); } -void core_alua_free_tg_pt_gp_mem(struct se_port *port) -{ - struct t10_alua_tg_pt_gp *tg_pt_gp; - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem; - - tg_pt_gp_mem = port->sep_alua_tg_pt_gp_mem; - if (!tg_pt_gp_mem) - return; - - while (atomic_read(&tg_pt_gp_mem->tg_pt_gp_mem_ref_cnt)) - cpu_relax(); - - spin_lock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); - tg_pt_gp = tg_pt_gp_mem->tg_pt_gp; - if (tg_pt_gp) { - spin_lock(&tg_pt_gp->tg_pt_gp_lock); - if (tg_pt_gp_mem->tg_pt_gp_assoc) { - list_del(&tg_pt_gp_mem->tg_pt_gp_mem_list); - tg_pt_gp->tg_pt_gp_members--; - tg_pt_gp_mem->tg_pt_gp_assoc = 0; - } - spin_unlock(&tg_pt_gp->tg_pt_gp_lock); - tg_pt_gp_mem->tg_pt_gp = NULL; - } - spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); - - kmem_cache_free(t10_alua_tg_pt_gp_mem_cache, tg_pt_gp_mem); -} - static struct t10_alua_tg_pt_gp *core_alua_get_tg_pt_gp_by_name( struct se_device *dev, const char *name) { @@ -1943,50 +1876,65 @@ static void core_alua_put_tg_pt_gp_from_name( spin_unlock(&dev->t10_alua.tg_pt_gps_lock); } -/* - * Called with struct t10_alua_tg_pt_gp_member->tg_pt_gp_mem_lock held - */ -void __core_alua_attach_tg_pt_gp_mem( - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem, - struct t10_alua_tg_pt_gp *tg_pt_gp) +static void __target_attach_tg_pt_gp(struct se_lun *lun, + struct t10_alua_tg_pt_gp *tg_pt_gp) { + struct se_dev_entry *se_deve; + + assert_spin_locked(&lun->lun_tg_pt_gp_lock); + spin_lock(&tg_pt_gp->tg_pt_gp_lock); - tg_pt_gp_mem->tg_pt_gp = tg_pt_gp; - tg_pt_gp_mem->tg_pt_gp_assoc = 1; - list_add_tail(&tg_pt_gp_mem->tg_pt_gp_mem_list, - &tg_pt_gp->tg_pt_gp_mem_list); + lun->lun_tg_pt_gp = tg_pt_gp; + list_add_tail(&lun->lun_tg_pt_gp_link, &tg_pt_gp->tg_pt_gp_lun_list); tg_pt_gp->tg_pt_gp_members++; + spin_lock(&lun->lun_deve_lock); + list_for_each_entry(se_deve, &lun->lun_deve_list, lun_link) + core_scsi3_ua_allocate(se_deve, 0x3f, + ASCQ_3FH_INQUIRY_DATA_HAS_CHANGED); + spin_unlock(&lun->lun_deve_lock); spin_unlock(&tg_pt_gp->tg_pt_gp_lock); } -/* - * Called with struct t10_alua_tg_pt_gp_member->tg_pt_gp_mem_lock held - */ -static void __core_alua_drop_tg_pt_gp_mem( - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem, - struct t10_alua_tg_pt_gp *tg_pt_gp) +void target_attach_tg_pt_gp(struct se_lun *lun, + struct t10_alua_tg_pt_gp *tg_pt_gp) { + spin_lock(&lun->lun_tg_pt_gp_lock); + __target_attach_tg_pt_gp(lun, tg_pt_gp); + spin_unlock(&lun->lun_tg_pt_gp_lock); +} + +static void __target_detach_tg_pt_gp(struct se_lun *lun, + struct t10_alua_tg_pt_gp *tg_pt_gp) +{ + assert_spin_locked(&lun->lun_tg_pt_gp_lock); + spin_lock(&tg_pt_gp->tg_pt_gp_lock); - list_del(&tg_pt_gp_mem->tg_pt_gp_mem_list); - tg_pt_gp_mem->tg_pt_gp = NULL; - tg_pt_gp_mem->tg_pt_gp_assoc = 0; + list_del_init(&lun->lun_tg_pt_gp_link); tg_pt_gp->tg_pt_gp_members--; spin_unlock(&tg_pt_gp->tg_pt_gp_lock); + + lun->lun_tg_pt_gp = NULL; } -ssize_t core_alua_show_tg_pt_gp_info(struct se_port *port, char *page) +void target_detach_tg_pt_gp(struct se_lun *lun) +{ + struct t10_alua_tg_pt_gp *tg_pt_gp; + + spin_lock(&lun->lun_tg_pt_gp_lock); + tg_pt_gp = lun->lun_tg_pt_gp; + if (tg_pt_gp) + __target_detach_tg_pt_gp(lun, tg_pt_gp); + spin_unlock(&lun->lun_tg_pt_gp_lock); +} + +ssize_t core_alua_show_tg_pt_gp_info(struct se_lun *lun, char *page) { struct config_item *tg_pt_ci; struct t10_alua_tg_pt_gp *tg_pt_gp; - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem; ssize_t len = 0; - tg_pt_gp_mem = port->sep_alua_tg_pt_gp_mem; - if (!tg_pt_gp_mem) - return len; - - spin_lock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); - tg_pt_gp = tg_pt_gp_mem->tg_pt_gp; + spin_lock(&lun->lun_tg_pt_gp_lock); + tg_pt_gp = lun->lun_tg_pt_gp; if (tg_pt_gp) { tg_pt_ci = &tg_pt_gp->tg_pt_gp_group.cg_item; len += sprintf(page, "TG Port Alias: %s\nTG Port Group ID:" @@ -1998,34 +1946,33 @@ ssize_t core_alua_show_tg_pt_gp_info(struct se_port *port, char *page) &tg_pt_gp->tg_pt_gp_alua_access_state)), core_alua_dump_status( tg_pt_gp->tg_pt_gp_alua_access_status), - (atomic_read(&port->sep_tg_pt_secondary_offline)) ? + atomic_read(&lun->lun_tg_pt_secondary_offline) ? "Offline" : "None", - core_alua_dump_status(port->sep_tg_pt_secondary_stat)); + core_alua_dump_status(lun->lun_tg_pt_secondary_stat)); } - spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); + spin_unlock(&lun->lun_tg_pt_gp_lock); return len; } ssize_t core_alua_store_tg_pt_gp_info( - struct se_port *port, + struct se_lun *lun, const char *page, size_t count) { - struct se_portal_group *tpg; - struct se_lun *lun; - struct se_device *dev = port->sep_lun->lun_se_dev; + struct se_portal_group *tpg = lun->lun_tpg; + /* + * rcu_dereference_raw protected by se_lun->lun_group symlink + * reference to se_device->dev_group. + */ + struct se_device *dev = rcu_dereference_raw(lun->lun_se_dev); struct t10_alua_tg_pt_gp *tg_pt_gp = NULL, *tg_pt_gp_new = NULL; - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem; unsigned char buf[TG_PT_GROUP_NAME_BUF]; int move = 0; - tpg = port->sep_tpg; - lun = port->sep_lun; - - tg_pt_gp_mem = port->sep_alua_tg_pt_gp_mem; - if (!tg_pt_gp_mem) - return 0; + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH || + (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) + return -ENODEV; if (count > TG_PT_GROUP_NAME_BUF) { pr_err("ALUA Target Port Group alias too large!\n"); @@ -2049,8 +1996,8 @@ ssize_t core_alua_store_tg_pt_gp_info( return -ENODEV; } - spin_lock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); - tg_pt_gp = tg_pt_gp_mem->tg_pt_gp; + spin_lock(&lun->lun_tg_pt_gp_lock); + tg_pt_gp = lun->lun_tg_pt_gp; if (tg_pt_gp) { /* * Clearing an existing tg_pt_gp association, and replacing @@ -2068,24 +2015,19 @@ ssize_t core_alua_store_tg_pt_gp_info( &tg_pt_gp->tg_pt_gp_group.cg_item), tg_pt_gp->tg_pt_gp_id); - __core_alua_drop_tg_pt_gp_mem(tg_pt_gp_mem, tg_pt_gp); - __core_alua_attach_tg_pt_gp_mem(tg_pt_gp_mem, + __target_detach_tg_pt_gp(lun, tg_pt_gp); + __target_attach_tg_pt_gp(lun, dev->t10_alua.default_tg_pt_gp); - spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); + spin_unlock(&lun->lun_tg_pt_gp_lock); return count; } - /* - * Removing existing association of tg_pt_gp_mem with tg_pt_gp - */ - __core_alua_drop_tg_pt_gp_mem(tg_pt_gp_mem, tg_pt_gp); + __target_detach_tg_pt_gp(lun, tg_pt_gp); move = 1; } - /* - * Associate tg_pt_gp_mem with tg_pt_gp_new. - */ - __core_alua_attach_tg_pt_gp_mem(tg_pt_gp_mem, tg_pt_gp_new); - spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); + + __target_attach_tg_pt_gp(lun, tg_pt_gp_new); + spin_unlock(&lun->lun_tg_pt_gp_lock); pr_debug("Target_Core_ConfigFS: %s %s/tpgt_%hu/%s to ALUA" " Target Port Group: alua/%s, ID: %hu\n", (move) ? "Moving" : "Adding", tpg->se_tpg_tfo->tpg_get_wwn(tpg), @@ -2268,11 +2210,8 @@ ssize_t core_alua_store_preferred_bit( ssize_t core_alua_show_offline_bit(struct se_lun *lun, char *page) { - if (!lun->lun_sep) - return -ENODEV; - return sprintf(page, "%d\n", - atomic_read(&lun->lun_sep->sep_tg_pt_secondary_offline)); + atomic_read(&lun->lun_tg_pt_secondary_offline)); } ssize_t core_alua_store_offline_bit( @@ -2280,11 +2219,16 @@ ssize_t core_alua_store_offline_bit( const char *page, size_t count) { - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem; + /* + * rcu_dereference_raw protected by se_lun->lun_group symlink + * reference to se_device->dev_group. + */ + struct se_device *dev = rcu_dereference_raw(lun->lun_se_dev); unsigned long tmp; int ret; - if (!lun->lun_sep) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH || + (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) return -ENODEV; ret = kstrtoul(page, 0, &tmp); @@ -2297,14 +2241,8 @@ ssize_t core_alua_store_offline_bit( tmp); return -EINVAL; } - tg_pt_gp_mem = lun->lun_sep->sep_alua_tg_pt_gp_mem; - if (!tg_pt_gp_mem) { - pr_err("Unable to locate *tg_pt_gp_mem\n"); - return -EINVAL; - } - ret = core_alua_set_tg_pt_secondary_state(tg_pt_gp_mem, - lun->lun_sep, 0, (int)tmp); + ret = core_alua_set_tg_pt_secondary_state(lun, 0, (int)tmp); if (ret < 0) return -EINVAL; @@ -2315,7 +2253,7 @@ ssize_t core_alua_show_secondary_status( struct se_lun *lun, char *page) { - return sprintf(page, "%d\n", lun->lun_sep->sep_tg_pt_secondary_stat); + return sprintf(page, "%d\n", lun->lun_tg_pt_secondary_stat); } ssize_t core_alua_store_secondary_status( @@ -2338,7 +2276,7 @@ ssize_t core_alua_store_secondary_status( tmp); return -EINVAL; } - lun->lun_sep->sep_tg_pt_secondary_stat = (int)tmp; + lun->lun_tg_pt_secondary_stat = (int)tmp; return count; } @@ -2347,8 +2285,7 @@ ssize_t core_alua_show_secondary_write_metadata( struct se_lun *lun, char *page) { - return sprintf(page, "%d\n", - lun->lun_sep->sep_tg_pt_secondary_write_md); + return sprintf(page, "%d\n", lun->lun_tg_pt_secondary_write_md); } ssize_t core_alua_store_secondary_write_metadata( @@ -2369,7 +2306,7 @@ ssize_t core_alua_store_secondary_write_metadata( " %lu\n", tmp); return -EINVAL; } - lun->lun_sep->sep_tg_pt_secondary_write_md = (int)tmp; + lun->lun_tg_pt_secondary_write_md = (int)tmp; return count; } diff --git a/drivers/target/target_core_alua.h b/drivers/target/target_core_alua.h index 0a7d65e80404..9b250f9b33bf 100644 --- a/drivers/target/target_core_alua.h +++ b/drivers/target/target_core_alua.h @@ -85,7 +85,6 @@ extern struct kmem_cache *t10_alua_lu_gp_cache; extern struct kmem_cache *t10_alua_lu_gp_mem_cache; extern struct kmem_cache *t10_alua_tg_pt_gp_cache; -extern struct kmem_cache *t10_alua_tg_pt_gp_mem_cache; extern struct kmem_cache *t10_alua_lba_map_cache; extern struct kmem_cache *t10_alua_lba_map_mem_cache; @@ -94,7 +93,7 @@ extern sense_reason_t target_emulate_set_target_port_groups(struct se_cmd *); extern sense_reason_t target_emulate_report_referrals(struct se_cmd *); extern int core_alua_check_nonop_delay(struct se_cmd *); extern int core_alua_do_port_transition(struct t10_alua_tg_pt_gp *, - struct se_device *, struct se_port *, + struct se_device *, struct se_lun *, struct se_node_acl *, int, int); extern char *core_alua_dump_status(int); extern struct t10_alua_lba_map *core_alua_allocate_lba_map( @@ -117,14 +116,11 @@ extern void core_alua_drop_lu_gp_dev(struct se_device *); extern struct t10_alua_tg_pt_gp *core_alua_allocate_tg_pt_gp( struct se_device *, const char *, int); extern int core_alua_set_tg_pt_gp_id(struct t10_alua_tg_pt_gp *, u16); -extern struct t10_alua_tg_pt_gp_member *core_alua_allocate_tg_pt_gp_mem( - struct se_port *); extern void core_alua_free_tg_pt_gp(struct t10_alua_tg_pt_gp *); -extern void core_alua_free_tg_pt_gp_mem(struct se_port *); -extern void __core_alua_attach_tg_pt_gp_mem(struct t10_alua_tg_pt_gp_member *, - struct t10_alua_tg_pt_gp *); -extern ssize_t core_alua_show_tg_pt_gp_info(struct se_port *, char *); -extern ssize_t core_alua_store_tg_pt_gp_info(struct se_port *, const char *, +extern void target_detach_tg_pt_gp(struct se_lun *); +extern void target_attach_tg_pt_gp(struct se_lun *, struct t10_alua_tg_pt_gp *); +extern ssize_t core_alua_show_tg_pt_gp_info(struct se_lun *, char *); +extern ssize_t core_alua_store_tg_pt_gp_info(struct se_lun *, const char *, size_t); extern ssize_t core_alua_show_access_type(struct t10_alua_tg_pt_gp *, char *); extern ssize_t core_alua_store_access_type(struct t10_alua_tg_pt_gp *, diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index e7b0430a0575..0b0de3647478 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -41,7 +41,6 @@ #include <target/target_core_backend.h> #include <target/target_core_fabric.h> #include <target/target_core_fabric_configfs.h> -#include <target/target_core_configfs.h> #include <target/configfs_macros.h> #include "target_core_internal.h" @@ -51,15 +50,26 @@ #include "target_core_xcopy.h" #define TB_CIT_SETUP(_name, _item_ops, _group_ops, _attrs) \ -static void target_core_setup_##_name##_cit(struct se_subsystem_api *sa) \ +static void target_core_setup_##_name##_cit(struct target_backend *tb) \ { \ - struct target_backend_cits *tbc = &sa->tb_cits; \ - struct config_item_type *cit = &tbc->tb_##_name##_cit; \ + struct config_item_type *cit = &tb->tb_##_name##_cit; \ \ cit->ct_item_ops = _item_ops; \ cit->ct_group_ops = _group_ops; \ cit->ct_attrs = _attrs; \ - cit->ct_owner = sa->owner; \ + cit->ct_owner = tb->ops->owner; \ + pr_debug("Setup generic %s\n", __stringify(_name)); \ +} + +#define TB_CIT_SETUP_DRV(_name, _item_ops, _group_ops) \ +static void target_core_setup_##_name##_cit(struct target_backend *tb) \ +{ \ + struct config_item_type *cit = &tb->tb_##_name##_cit; \ + \ + cit->ct_item_ops = _item_ops; \ + cit->ct_group_ops = _group_ops; \ + cit->ct_attrs = tb->ops->tb_##_name##_attrs; \ + cit->ct_owner = tb->ops->owner; \ pr_debug("Setup generic %s\n", __stringify(_name)); \ } @@ -92,7 +102,7 @@ static ssize_t target_core_attr_show(struct config_item *item, char *page) { return sprintf(page, "Target Engine Core ConfigFS Infrastructure %s" - " on %s/%s on "UTS_RELEASE"\n", TARGET_CORE_CONFIGFS_VERSION, + " on %s/%s on "UTS_RELEASE"\n", TARGET_CORE_VERSION, utsname()->sysname, utsname()->machine); } @@ -116,7 +126,7 @@ static struct target_fabric_configfs *target_core_get_fabric( mutex_lock(&g_tf_lock); list_for_each_entry(tf, &g_tf_list, tf_list) { - if (!strcmp(tf->tf_name, name)) { + if (!strcmp(tf->tf_ops->name, name)) { atomic_inc(&tf->tf_access_cnt); mutex_unlock(&g_tf_lock); return tf; @@ -193,29 +203,24 @@ static struct config_group *target_core_register_fabric( return ERR_PTR(-EINVAL); } pr_debug("Target_Core_ConfigFS: REGISTER -> Located fabric:" - " %s\n", tf->tf_name); + " %s\n", tf->tf_ops->name); /* * On a successful target_core_get_fabric() look, the returned * struct target_fabric_configfs *tf will contain a usage reference. */ pr_debug("Target_Core_ConfigFS: REGISTER tfc_wwn_cit -> %p\n", - &tf->tf_cit_tmpl.tfc_wwn_cit); + &tf->tf_wwn_cit); tf->tf_group.default_groups = tf->tf_default_groups; tf->tf_group.default_groups[0] = &tf->tf_disc_group; tf->tf_group.default_groups[1] = NULL; - config_group_init_type_name(&tf->tf_group, name, - &tf->tf_cit_tmpl.tfc_wwn_cit); + config_group_init_type_name(&tf->tf_group, name, &tf->tf_wwn_cit); config_group_init_type_name(&tf->tf_disc_group, "discovery_auth", - &tf->tf_cit_tmpl.tfc_discovery_cit); + &tf->tf_discovery_cit); pr_debug("Target_Core_ConfigFS: REGISTER -> Allocated Fabric:" " %s\n", tf->tf_group.cg_item.ci_name); - tf->tf_fabric = &tf->tf_group.cg_item; - pr_debug("Target_Core_ConfigFS: REGISTER -> Set tf->tf_fabric" - " for %s\n", name); - return &tf->tf_group; } @@ -236,13 +241,9 @@ static void target_core_deregister_fabric( " tf list\n", config_item_name(item)); pr_debug("Target_Core_ConfigFS: DEREGISTER -> located fabric:" - " %s\n", tf->tf_name); + " %s\n", tf->tf_ops->name); atomic_dec(&tf->tf_access_cnt); - pr_debug("Target_Core_ConfigFS: DEREGISTER -> Releasing" - " tf->tf_fabric for %s\n", tf->tf_name); - tf->tf_fabric = NULL; - pr_debug("Target_Core_ConfigFS: DEREGISTER -> Releasing ci" " %s\n", config_item_name(item)); @@ -318,10 +319,6 @@ static int target_fabric_tf_ops_check(const struct target_core_fabric_ops *tfo) pr_err("Missing tfo->get_fabric_name()\n"); return -EINVAL; } - if (!tfo->get_fabric_proto_ident) { - pr_err("Missing tfo->get_fabric_proto_ident()\n"); - return -EINVAL; - } if (!tfo->tpg_get_wwn) { pr_err("Missing tfo->tpg_get_wwn()\n"); return -EINVAL; @@ -330,18 +327,6 @@ static int target_fabric_tf_ops_check(const struct target_core_fabric_ops *tfo) pr_err("Missing tfo->tpg_get_tag()\n"); return -EINVAL; } - if (!tfo->tpg_get_default_depth) { - pr_err("Missing tfo->tpg_get_default_depth()\n"); - return -EINVAL; - } - if (!tfo->tpg_get_pr_transport_id) { - pr_err("Missing tfo->tpg_get_pr_transport_id()\n"); - return -EINVAL; - } - if (!tfo->tpg_get_pr_transport_id_len) { - pr_err("Missing tfo->tpg_get_pr_transport_id_len()\n"); - return -EINVAL; - } if (!tfo->tpg_check_demo_mode) { pr_err("Missing tfo->tpg_check_demo_mode()\n"); return -EINVAL; @@ -358,14 +343,6 @@ static int target_fabric_tf_ops_check(const struct target_core_fabric_ops *tfo) pr_err("Missing tfo->tpg_check_prod_mode_write_protect()\n"); return -EINVAL; } - if (!tfo->tpg_alloc_fabric_acl) { - pr_err("Missing tfo->tpg_alloc_fabric_acl()\n"); - return -EINVAL; - } - if (!tfo->tpg_release_fabric_acl) { - pr_err("Missing tfo->tpg_release_fabric_acl()\n"); - return -EINVAL; - } if (!tfo->tpg_get_inst_index) { pr_err("Missing tfo->tpg_get_inst_index()\n"); return -EINVAL; @@ -398,10 +375,6 @@ static int target_fabric_tf_ops_check(const struct target_core_fabric_ops *tfo) pr_err("Missing tfo->set_default_node_attributes()\n"); return -EINVAL; } - if (!tfo->get_task_tag) { - pr_err("Missing tfo->get_task_tag()\n"); - return -EINVAL; - } if (!tfo->get_cmd_state) { pr_err("Missing tfo->get_cmd_state()\n"); return -EINVAL; @@ -464,15 +437,7 @@ int target_register_template(const struct target_core_fabric_ops *fo) INIT_LIST_HEAD(&tf->tf_list); atomic_set(&tf->tf_access_cnt, 0); - - /* - * Setup the default generic struct config_item_type's (cits) in - * struct target_fabric_configfs->tf_cit_tmpl - */ - tf->tf_module = fo->module; - snprintf(tf->tf_name, TARGET_FABRIC_NAME_SIZE, "%s", fo->name); - - tf->tf_ops = *fo; + tf->tf_ops = fo; target_fabric_setup_cits(tf); mutex_lock(&g_tf_lock); @@ -489,7 +454,7 @@ void target_unregister_template(const struct target_core_fabric_ops *fo) mutex_lock(&g_tf_lock); list_for_each_entry(t, &g_tf_list, tf_list) { - if (!strcmp(t->tf_name, fo->name)) { + if (!strcmp(t->tf_ops->name, fo->name)) { BUG_ON(atomic_read(&t->tf_access_cnt)); list_del(&t->tf_list); kfree(t); @@ -505,16 +470,605 @@ EXPORT_SYMBOL(target_unregister_template); //############################################################################*/ /* Start functions for struct config_item_type tb_dev_attrib_cit */ +#define DEF_TB_DEV_ATTRIB_SHOW(_name) \ +static ssize_t show_##_name(struct se_dev_attrib *da, char *page) \ +{ \ + return snprintf(page, PAGE_SIZE, "%u\n", da->_name); \ +} + +DEF_TB_DEV_ATTRIB_SHOW(emulate_model_alias); +DEF_TB_DEV_ATTRIB_SHOW(emulate_dpo); +DEF_TB_DEV_ATTRIB_SHOW(emulate_fua_write); +DEF_TB_DEV_ATTRIB_SHOW(emulate_fua_read); +DEF_TB_DEV_ATTRIB_SHOW(emulate_write_cache); +DEF_TB_DEV_ATTRIB_SHOW(emulate_ua_intlck_ctrl); +DEF_TB_DEV_ATTRIB_SHOW(emulate_tas); +DEF_TB_DEV_ATTRIB_SHOW(emulate_tpu); +DEF_TB_DEV_ATTRIB_SHOW(emulate_tpws); +DEF_TB_DEV_ATTRIB_SHOW(emulate_caw); +DEF_TB_DEV_ATTRIB_SHOW(emulate_3pc); +DEF_TB_DEV_ATTRIB_SHOW(pi_prot_type); +DEF_TB_DEV_ATTRIB_SHOW(hw_pi_prot_type); +DEF_TB_DEV_ATTRIB_SHOW(pi_prot_format); +DEF_TB_DEV_ATTRIB_SHOW(enforce_pr_isids); +DEF_TB_DEV_ATTRIB_SHOW(is_nonrot); +DEF_TB_DEV_ATTRIB_SHOW(emulate_rest_reord); +DEF_TB_DEV_ATTRIB_SHOW(force_pr_aptpl); +DEF_TB_DEV_ATTRIB_SHOW(hw_block_size); +DEF_TB_DEV_ATTRIB_SHOW(block_size); +DEF_TB_DEV_ATTRIB_SHOW(hw_max_sectors); +DEF_TB_DEV_ATTRIB_SHOW(optimal_sectors); +DEF_TB_DEV_ATTRIB_SHOW(hw_queue_depth); +DEF_TB_DEV_ATTRIB_SHOW(queue_depth); +DEF_TB_DEV_ATTRIB_SHOW(max_unmap_lba_count); +DEF_TB_DEV_ATTRIB_SHOW(max_unmap_block_desc_count); +DEF_TB_DEV_ATTRIB_SHOW(unmap_granularity); +DEF_TB_DEV_ATTRIB_SHOW(unmap_granularity_alignment); +DEF_TB_DEV_ATTRIB_SHOW(max_write_same_len); + +#define DEF_TB_DEV_ATTRIB_STORE_U32(_name) \ +static ssize_t store_##_name(struct se_dev_attrib *da, const char *page,\ + size_t count) \ +{ \ + u32 val; \ + int ret; \ + \ + ret = kstrtou32(page, 0, &val); \ + if (ret < 0) \ + return ret; \ + da->_name = val; \ + return count; \ +} + +DEF_TB_DEV_ATTRIB_STORE_U32(max_unmap_lba_count); +DEF_TB_DEV_ATTRIB_STORE_U32(max_unmap_block_desc_count); +DEF_TB_DEV_ATTRIB_STORE_U32(unmap_granularity); +DEF_TB_DEV_ATTRIB_STORE_U32(unmap_granularity_alignment); +DEF_TB_DEV_ATTRIB_STORE_U32(max_write_same_len); + +#define DEF_TB_DEV_ATTRIB_STORE_BOOL(_name) \ +static ssize_t store_##_name(struct se_dev_attrib *da, const char *page,\ + size_t count) \ +{ \ + bool flag; \ + int ret; \ + \ + ret = strtobool(page, &flag); \ + if (ret < 0) \ + return ret; \ + da->_name = flag; \ + return count; \ +} + +DEF_TB_DEV_ATTRIB_STORE_BOOL(emulate_fua_write); +DEF_TB_DEV_ATTRIB_STORE_BOOL(emulate_caw); +DEF_TB_DEV_ATTRIB_STORE_BOOL(emulate_3pc); +DEF_TB_DEV_ATTRIB_STORE_BOOL(enforce_pr_isids); +DEF_TB_DEV_ATTRIB_STORE_BOOL(is_nonrot); + +#define DEF_TB_DEV_ATTRIB_STORE_STUB(_name) \ +static ssize_t store_##_name(struct se_dev_attrib *da, const char *page,\ + size_t count) \ +{ \ + printk_once(KERN_WARNING \ + "ignoring deprecated ##_name## attribute\n"); \ + return count; \ +} + +DEF_TB_DEV_ATTRIB_STORE_STUB(emulate_dpo); +DEF_TB_DEV_ATTRIB_STORE_STUB(emulate_fua_read); + +static void dev_set_t10_wwn_model_alias(struct se_device *dev) +{ + const char *configname; + + configname = config_item_name(&dev->dev_group.cg_item); + if (strlen(configname) >= 16) { + pr_warn("dev[%p]: Backstore name '%s' is too long for " + "INQUIRY_MODEL, truncating to 16 bytes\n", dev, + configname); + } + snprintf(&dev->t10_wwn.model[0], 16, "%s", configname); +} + +static ssize_t store_emulate_model_alias(struct se_dev_attrib *da, + const char *page, size_t count) +{ + struct se_device *dev = da->da_dev; + bool flag; + int ret; + + if (dev->export_count) { + pr_err("dev[%p]: Unable to change model alias" + " while export_count is %d\n", + dev, dev->export_count); + return -EINVAL; + } + + ret = strtobool(page, &flag); + if (ret < 0) + return ret; + + if (flag) { + dev_set_t10_wwn_model_alias(dev); + } else { + strncpy(&dev->t10_wwn.model[0], + dev->transport->inquiry_prod, 16); + } + da->emulate_model_alias = flag; + return count; +} + +static ssize_t store_emulate_write_cache(struct se_dev_attrib *da, + const char *page, size_t count) +{ + bool flag; + int ret; + + ret = strtobool(page, &flag); + if (ret < 0) + return ret; + + if (flag && da->da_dev->transport->get_write_cache) { + pr_err("emulate_write_cache not supported for this device\n"); + return -EINVAL; + } + + da->emulate_write_cache = flag; + pr_debug("dev[%p]: SE Device WRITE_CACHE_EMULATION flag: %d\n", + da->da_dev, flag); + return count; +} + +static ssize_t store_emulate_ua_intlck_ctrl(struct se_dev_attrib *da, + const char *page, size_t count) +{ + u32 val; + int ret; + + ret = kstrtou32(page, 0, &val); + if (ret < 0) + return ret; + + if (val != 0 && val != 1 && val != 2) { + pr_err("Illegal value %d\n", val); + return -EINVAL; + } + + if (da->da_dev->export_count) { + pr_err("dev[%p]: Unable to change SE Device" + " UA_INTRLCK_CTRL while export_count is %d\n", + da->da_dev, da->da_dev->export_count); + return -EINVAL; + } + da->emulate_ua_intlck_ctrl = val; + pr_debug("dev[%p]: SE Device UA_INTRLCK_CTRL flag: %d\n", + da->da_dev, val); + return count; +} + +static ssize_t store_emulate_tas(struct se_dev_attrib *da, + const char *page, size_t count) +{ + bool flag; + int ret; + + ret = strtobool(page, &flag); + if (ret < 0) + return ret; + + if (da->da_dev->export_count) { + pr_err("dev[%p]: Unable to change SE Device TAS while" + " export_count is %d\n", + da->da_dev, da->da_dev->export_count); + return -EINVAL; + } + da->emulate_tas = flag; + pr_debug("dev[%p]: SE Device TASK_ABORTED status bit: %s\n", + da->da_dev, flag ? "Enabled" : "Disabled"); + + return count; +} + +static ssize_t store_emulate_tpu(struct se_dev_attrib *da, + const char *page, size_t count) +{ + bool flag; + int ret; + + ret = strtobool(page, &flag); + if (ret < 0) + return ret; + + /* + * We expect this value to be non-zero when generic Block Layer + * Discard supported is detected iblock_create_virtdevice(). + */ + if (flag && !da->max_unmap_block_desc_count) { + pr_err("Generic Block Discard not supported\n"); + return -ENOSYS; + } + + da->emulate_tpu = flag; + pr_debug("dev[%p]: SE Device Thin Provisioning UNMAP bit: %d\n", + da->da_dev, flag); + return count; +} + +static ssize_t store_emulate_tpws(struct se_dev_attrib *da, + const char *page, size_t count) +{ + bool flag; + int ret; + + ret = strtobool(page, &flag); + if (ret < 0) + return ret; + + /* + * We expect this value to be non-zero when generic Block Layer + * Discard supported is detected iblock_create_virtdevice(). + */ + if (flag && !da->max_unmap_block_desc_count) { + pr_err("Generic Block Discard not supported\n"); + return -ENOSYS; + } + + da->emulate_tpws = flag; + pr_debug("dev[%p]: SE Device Thin Provisioning WRITE_SAME: %d\n", + da->da_dev, flag); + return count; +} + +static ssize_t store_pi_prot_type(struct se_dev_attrib *da, + const char *page, size_t count) +{ + int old_prot = da->pi_prot_type, ret; + struct se_device *dev = da->da_dev; + u32 flag; + + ret = kstrtou32(page, 0, &flag); + if (ret < 0) + return ret; + + if (flag != 0 && flag != 1 && flag != 2 && flag != 3) { + pr_err("Illegal value %d for pi_prot_type\n", flag); + return -EINVAL; + } + if (flag == 2) { + pr_err("DIF TYPE2 protection currently not supported\n"); + return -ENOSYS; + } + if (da->hw_pi_prot_type) { + pr_warn("DIF protection enabled on underlying hardware," + " ignoring\n"); + return count; + } + if (!dev->transport->init_prot || !dev->transport->free_prot) { + /* 0 is only allowed value for non-supporting backends */ + if (flag == 0) + return 0; + + pr_err("DIF protection not supported by backend: %s\n", + dev->transport->name); + return -ENOSYS; + } + if (!(dev->dev_flags & DF_CONFIGURED)) { + pr_err("DIF protection requires device to be configured\n"); + return -ENODEV; + } + if (dev->export_count) { + pr_err("dev[%p]: Unable to change SE Device PROT type while" + " export_count is %d\n", dev, dev->export_count); + return -EINVAL; + } + + da->pi_prot_type = flag; + + if (flag && !old_prot) { + ret = dev->transport->init_prot(dev); + if (ret) { + da->pi_prot_type = old_prot; + return ret; + } + + } else if (!flag && old_prot) { + dev->transport->free_prot(dev); + } + + pr_debug("dev[%p]: SE Device Protection Type: %d\n", dev, flag); + return count; +} + +static ssize_t store_pi_prot_format(struct se_dev_attrib *da, + const char *page, size_t count) +{ + struct se_device *dev = da->da_dev; + bool flag; + int ret; + + ret = strtobool(page, &flag); + if (ret < 0) + return ret; + + if (!flag) + return count; + + if (!dev->transport->format_prot) { + pr_err("DIF protection format not supported by backend %s\n", + dev->transport->name); + return -ENOSYS; + } + if (!(dev->dev_flags & DF_CONFIGURED)) { + pr_err("DIF protection format requires device to be configured\n"); + return -ENODEV; + } + if (dev->export_count) { + pr_err("dev[%p]: Unable to format SE Device PROT type while" + " export_count is %d\n", dev, dev->export_count); + return -EINVAL; + } + + ret = dev->transport->format_prot(dev); + if (ret) + return ret; + + pr_debug("dev[%p]: SE Device Protection Format complete\n", dev); + return count; +} + +static ssize_t store_force_pr_aptpl(struct se_dev_attrib *da, + const char *page, size_t count) +{ + bool flag; + int ret; + + ret = strtobool(page, &flag); + if (ret < 0) + return ret; + if (da->da_dev->export_count) { + pr_err("dev[%p]: Unable to set force_pr_aptpl while" + " export_count is %d\n", + da->da_dev, da->da_dev->export_count); + return -EINVAL; + } + + da->force_pr_aptpl = flag; + pr_debug("dev[%p]: SE Device force_pr_aptpl: %d\n", da->da_dev, flag); + return count; +} + +static ssize_t store_emulate_rest_reord(struct se_dev_attrib *da, + const char *page, size_t count) +{ + bool flag; + int ret; + + ret = strtobool(page, &flag); + if (ret < 0) + return ret; + + if (flag != 0) { + printk(KERN_ERR "dev[%p]: SE Device emulation of restricted" + " reordering not implemented\n", da->da_dev); + return -ENOSYS; + } + da->emulate_rest_reord = flag; + pr_debug("dev[%p]: SE Device emulate_rest_reord: %d\n", + da->da_dev, flag); + return count; +} + +/* + * Note, this can only be called on unexported SE Device Object. + */ +static ssize_t store_queue_depth(struct se_dev_attrib *da, + const char *page, size_t count) +{ + struct se_device *dev = da->da_dev; + u32 val; + int ret; + + ret = kstrtou32(page, 0, &val); + if (ret < 0) + return ret; + + if (dev->export_count) { + pr_err("dev[%p]: Unable to change SE Device TCQ while" + " export_count is %d\n", + dev, dev->export_count); + return -EINVAL; + } + if (!val) { + pr_err("dev[%p]: Illegal ZERO value for queue_depth\n", dev); + return -EINVAL; + } + + if (val > dev->dev_attrib.queue_depth) { + if (val > dev->dev_attrib.hw_queue_depth) { + pr_err("dev[%p]: Passed queue_depth:" + " %u exceeds TCM/SE_Device MAX" + " TCQ: %u\n", dev, val, + dev->dev_attrib.hw_queue_depth); + return -EINVAL; + } + } + da->queue_depth = dev->queue_depth = val; + pr_debug("dev[%p]: SE Device TCQ Depth changed to: %u\n", dev, val); + return count; +} + +static ssize_t store_optimal_sectors(struct se_dev_attrib *da, + const char *page, size_t count) +{ + u32 val; + int ret; + + ret = kstrtou32(page, 0, &val); + if (ret < 0) + return ret; + + if (da->da_dev->export_count) { + pr_err("dev[%p]: Unable to change SE Device" + " optimal_sectors while export_count is %d\n", + da->da_dev, da->da_dev->export_count); + return -EINVAL; + } + if (val > da->hw_max_sectors) { + pr_err("dev[%p]: Passed optimal_sectors %u cannot be" + " greater than hw_max_sectors: %u\n", + da->da_dev, val, da->hw_max_sectors); + return -EINVAL; + } + + da->optimal_sectors = val; + pr_debug("dev[%p]: SE Device optimal_sectors changed to %u\n", + da->da_dev, val); + return count; +} + +static ssize_t store_block_size(struct se_dev_attrib *da, + const char *page, size_t count) +{ + u32 val; + int ret; + + ret = kstrtou32(page, 0, &val); + if (ret < 0) + return ret; + + if (da->da_dev->export_count) { + pr_err("dev[%p]: Unable to change SE Device block_size" + " while export_count is %d\n", + da->da_dev, da->da_dev->export_count); + return -EINVAL; + } + + if (val != 512 && val != 1024 && val != 2048 && val != 4096) { + pr_err("dev[%p]: Illegal value for block_device: %u" + " for SE device, must be 512, 1024, 2048 or 4096\n", + da->da_dev, val); + return -EINVAL; + } + + da->block_size = val; + if (da->max_bytes_per_io) + da->hw_max_sectors = da->max_bytes_per_io / val; + + pr_debug("dev[%p]: SE Device block_size changed to %u\n", + da->da_dev, val); + return count; +} + +CONFIGFS_EATTR_STRUCT(target_backend_dev_attrib, se_dev_attrib); +#define TB_DEV_ATTR(_backend, _name, _mode) \ +static struct target_backend_dev_attrib_attribute _backend##_dev_attrib_##_name = \ + __CONFIGFS_EATTR(_name, _mode, \ + show_##_name, \ + store_##_name); + +#define TB_DEV_ATTR_RO(_backend, _name) \ +static struct target_backend_dev_attrib_attribute _backend##_dev_attrib_##_name = \ + __CONFIGFS_EATTR_RO(_name, \ + show_##_name); + +TB_DEV_ATTR(target_core, emulate_model_alias, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, emulate_dpo, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, emulate_fua_write, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, emulate_fua_read, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, emulate_write_cache, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, emulate_ua_intlck_ctrl, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, emulate_tas, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, emulate_tpu, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, emulate_tpws, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, emulate_caw, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, emulate_3pc, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, pi_prot_type, S_IRUGO | S_IWUSR); +TB_DEV_ATTR_RO(target_core, hw_pi_prot_type); +TB_DEV_ATTR(target_core, pi_prot_format, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, enforce_pr_isids, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, is_nonrot, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, emulate_rest_reord, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, force_pr_aptpl, S_IRUGO | S_IWUSR) +TB_DEV_ATTR_RO(target_core, hw_block_size); +TB_DEV_ATTR(target_core, block_size, S_IRUGO | S_IWUSR) +TB_DEV_ATTR_RO(target_core, hw_max_sectors); +TB_DEV_ATTR(target_core, optimal_sectors, S_IRUGO | S_IWUSR); +TB_DEV_ATTR_RO(target_core, hw_queue_depth); +TB_DEV_ATTR(target_core, queue_depth, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, max_unmap_lba_count, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, max_unmap_block_desc_count, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, unmap_granularity, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, unmap_granularity_alignment, S_IRUGO | S_IWUSR); +TB_DEV_ATTR(target_core, max_write_same_len, S_IRUGO | S_IWUSR); CONFIGFS_EATTR_STRUCT(target_core_dev_attrib, se_dev_attrib); CONFIGFS_EATTR_OPS(target_core_dev_attrib, se_dev_attrib, da_group); +/* + * dev_attrib attributes for devices using the target core SBC/SPC + * interpreter. Any backend using spc_parse_cdb should be using + * these. + */ +struct configfs_attribute *sbc_attrib_attrs[] = { + &target_core_dev_attrib_emulate_model_alias.attr, + &target_core_dev_attrib_emulate_dpo.attr, + &target_core_dev_attrib_emulate_fua_write.attr, + &target_core_dev_attrib_emulate_fua_read.attr, + &target_core_dev_attrib_emulate_write_cache.attr, + &target_core_dev_attrib_emulate_ua_intlck_ctrl.attr, + &target_core_dev_attrib_emulate_tas.attr, + &target_core_dev_attrib_emulate_tpu.attr, + &target_core_dev_attrib_emulate_tpws.attr, + &target_core_dev_attrib_emulate_caw.attr, + &target_core_dev_attrib_emulate_3pc.attr, + &target_core_dev_attrib_pi_prot_type.attr, + &target_core_dev_attrib_hw_pi_prot_type.attr, + &target_core_dev_attrib_pi_prot_format.attr, + &target_core_dev_attrib_enforce_pr_isids.attr, + &target_core_dev_attrib_is_nonrot.attr, + &target_core_dev_attrib_emulate_rest_reord.attr, + &target_core_dev_attrib_force_pr_aptpl.attr, + &target_core_dev_attrib_hw_block_size.attr, + &target_core_dev_attrib_block_size.attr, + &target_core_dev_attrib_hw_max_sectors.attr, + &target_core_dev_attrib_optimal_sectors.attr, + &target_core_dev_attrib_hw_queue_depth.attr, + &target_core_dev_attrib_queue_depth.attr, + &target_core_dev_attrib_max_unmap_lba_count.attr, + &target_core_dev_attrib_max_unmap_block_desc_count.attr, + &target_core_dev_attrib_unmap_granularity.attr, + &target_core_dev_attrib_unmap_granularity_alignment.attr, + &target_core_dev_attrib_max_write_same_len.attr, + NULL, +}; +EXPORT_SYMBOL(sbc_attrib_attrs); + +TB_DEV_ATTR_RO(target_pt, hw_pi_prot_type); +TB_DEV_ATTR_RO(target_pt, hw_block_size); +TB_DEV_ATTR_RO(target_pt, hw_max_sectors); +TB_DEV_ATTR_RO(target_pt, hw_queue_depth); + +/* + * Minimal dev_attrib attributes for devices passing through CDBs. + * In this case we only provide a few read-only attributes for + * backwards compatibility. + */ +struct configfs_attribute *passthrough_attrib_attrs[] = { + &target_pt_dev_attrib_hw_pi_prot_type.attr, + &target_pt_dev_attrib_hw_block_size.attr, + &target_pt_dev_attrib_hw_max_sectors.attr, + &target_pt_dev_attrib_hw_queue_depth.attr, + NULL, +}; +EXPORT_SYMBOL(passthrough_attrib_attrs); + static struct configfs_item_operations target_core_dev_attrib_ops = { .show_attribute = target_core_dev_attrib_attr_show, .store_attribute = target_core_dev_attrib_attr_store, }; -TB_CIT_SETUP(dev_attrib, &target_core_dev_attrib_ops, NULL, NULL); +TB_CIT_SETUP_DRV(dev_attrib, &target_core_dev_attrib_ops, NULL); /* End functions for struct config_item_type tb_dev_attrib_cit */ @@ -862,7 +1416,6 @@ static ssize_t target_core_dev_pr_show_attr_res_pr_holder_tg_port( struct se_device *dev, char *page) { struct se_node_acl *se_nacl; - struct se_lun *lun; struct se_portal_group *se_tpg; struct t10_pr_registration *pr_reg; const struct target_core_fabric_ops *tfo; @@ -877,7 +1430,6 @@ static ssize_t target_core_dev_pr_show_attr_res_pr_holder_tg_port( se_nacl = pr_reg->pr_reg_nacl; se_tpg = se_nacl->se_tpg; - lun = pr_reg->pr_reg_tg_pt_lun; tfo = se_tpg->se_tpg_tfo; len += sprintf(page+len, "SPC-3 Reservation: %s" @@ -885,9 +1437,9 @@ static ssize_t target_core_dev_pr_show_attr_res_pr_holder_tg_port( tfo->tpg_get_wwn(se_tpg)); len += sprintf(page+len, "SPC-3 Reservation: Relative Port" " Identifier Tag: %hu %s Portal Group Tag: %hu" - " %s Logical Unit: %u\n", lun->lun_sep->sep_rtpi, + " %s Logical Unit: %llu\n", pr_reg->tg_pt_sep_rtpi, tfo->get_fabric_name(), tfo->tpg_get_tag(se_tpg), - tfo->get_fabric_name(), lun->unpacked_lun); + tfo->get_fabric_name(), pr_reg->pr_aptpl_target_lun); out_unlock: spin_unlock(&dev->dev_reservation_lock); @@ -1012,12 +1564,12 @@ static match_table_t tokens = { {Opt_res_type, "res_type=%d"}, {Opt_res_scope, "res_scope=%d"}, {Opt_res_all_tg_pt, "res_all_tg_pt=%d"}, - {Opt_mapped_lun, "mapped_lun=%d"}, + {Opt_mapped_lun, "mapped_lun=%lld"}, {Opt_target_fabric, "target_fabric=%s"}, {Opt_target_node, "target_node=%s"}, {Opt_tpgt, "tpgt=%d"}, {Opt_port_rtpi, "port_rtpi=%d"}, - {Opt_target_lun, "target_lun=%d"}, + {Opt_target_lun, "target_lun=%lld"}, {Opt_err, NULL} }; @@ -1032,10 +1584,10 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata( substring_t args[MAX_OPT_ARGS]; unsigned long long tmp_ll; u64 sa_res_key = 0; - u32 mapped_lun = 0, target_lun = 0; + u64 mapped_lun = 0, target_lun = 0; int ret = -1, res_holder = 0, all_tg_pt = 0, arg, token; - u16 port_rpti = 0, tpgt = 0; - u8 type = 0, scope; + u16 tpgt = 0; + u8 type = 0; if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) return 0; @@ -1115,7 +1667,6 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata( break; case Opt_res_scope: match_int(args, &arg); - scope = (u8)arg; break; case Opt_res_all_tg_pt: match_int(args, &arg); @@ -1123,7 +1674,7 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata( break; case Opt_mapped_lun: match_int(args, &arg); - mapped_lun = (u32)arg; + mapped_lun = (u64)arg; break; /* * PR APTPL Metadata for Target Port @@ -1155,11 +1706,10 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata( break; case Opt_port_rtpi: match_int(args, &arg); - port_rpti = (u16)arg; break; case Opt_target_lun: match_int(args, &arg); - target_lun = (u32)arg; + target_lun = (u64)arg; break; default: break; @@ -1223,13 +1773,13 @@ TB_CIT_SETUP(dev_pr, &target_core_dev_pr_ops, NULL, target_core_dev_pr_attrs); static ssize_t target_core_show_dev_info(void *p, char *page) { struct se_device *dev = p; - struct se_subsystem_api *t = dev->transport; int bl = 0; ssize_t read_bytes = 0; transport_dump_dev_state(dev, page, &bl); read_bytes += bl; - read_bytes += t->show_configfs_dev_params(dev, page+read_bytes); + read_bytes += dev->transport->show_configfs_dev_params(dev, + page+read_bytes); return read_bytes; } @@ -1247,9 +1797,8 @@ static ssize_t target_core_store_dev_control( size_t count) { struct se_device *dev = p; - struct se_subsystem_api *t = dev->transport; - return t->set_configfs_dev_params(dev, page, count); + return dev->transport->set_configfs_dev_params(dev, page, count); } static struct target_core_configfs_attribute target_core_attr_dev_control = { @@ -2339,21 +2888,16 @@ static ssize_t target_core_alua_tg_pt_gp_show_attr_members( struct t10_alua_tg_pt_gp *tg_pt_gp, char *page) { - struct se_port *port; - struct se_portal_group *tpg; struct se_lun *lun; - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem; ssize_t len = 0, cur_len; unsigned char buf[TG_PT_GROUP_NAME_BUF]; memset(buf, 0, TG_PT_GROUP_NAME_BUF); spin_lock(&tg_pt_gp->tg_pt_gp_lock); - list_for_each_entry(tg_pt_gp_mem, &tg_pt_gp->tg_pt_gp_mem_list, - tg_pt_gp_mem_list) { - port = tg_pt_gp_mem->tg_pt; - tpg = port->sep_tpg; - lun = port->sep_lun; + list_for_each_entry(lun, &tg_pt_gp->tg_pt_gp_lun_list, + lun_tg_pt_gp_link) { + struct se_portal_group *tpg = lun->lun_tpg; cur_len = snprintf(buf, TG_PT_GROUP_NAME_BUF, "%s/%s/tpgt_%hu" "/%s\n", tpg->se_tpg_tfo->get_fabric_name(), @@ -2526,9 +3070,9 @@ static struct config_group *target_core_make_subdev( const char *name) { struct t10_alua_tg_pt_gp *tg_pt_gp; - struct se_subsystem_api *t; struct config_item *hba_ci = &group->cg_item; struct se_hba *hba = item_to_hba(hba_ci); + struct target_backend *tb = hba->backend; struct se_device *dev; struct config_group *dev_cg = NULL, *tg_pt_gp_cg = NULL; struct config_group *dev_stat_grp = NULL; @@ -2537,10 +3081,6 @@ static struct config_group *target_core_make_subdev( ret = mutex_lock_interruptible(&hba->hba_access_mutex); if (ret) return ERR_PTR(ret); - /* - * Locate the struct se_subsystem_api from parent's struct se_hba. - */ - t = hba->transport; dev = target_alloc_device(hba, name); if (!dev) @@ -2553,17 +3093,17 @@ static struct config_group *target_core_make_subdev( if (!dev_cg->default_groups) goto out_free_device; - config_group_init_type_name(dev_cg, name, &t->tb_cits.tb_dev_cit); + config_group_init_type_name(dev_cg, name, &tb->tb_dev_cit); config_group_init_type_name(&dev->dev_attrib.da_group, "attrib", - &t->tb_cits.tb_dev_attrib_cit); + &tb->tb_dev_attrib_cit); config_group_init_type_name(&dev->dev_pr_group, "pr", - &t->tb_cits.tb_dev_pr_cit); + &tb->tb_dev_pr_cit); config_group_init_type_name(&dev->t10_wwn.t10_wwn_group, "wwn", - &t->tb_cits.tb_dev_wwn_cit); + &tb->tb_dev_wwn_cit); config_group_init_type_name(&dev->t10_alua.alua_tg_pt_gps_group, - "alua", &t->tb_cits.tb_dev_alua_tg_pt_gps_cit); + "alua", &tb->tb_dev_alua_tg_pt_gps_cit); config_group_init_type_name(&dev->dev_stat_grps.stat_group, - "statistics", &t->tb_cits.tb_dev_stat_cit); + "statistics", &tb->tb_dev_stat_cit); dev_cg->default_groups[0] = &dev->dev_attrib.da_group; dev_cg->default_groups[1] = &dev->dev_pr_group; @@ -2693,8 +3233,8 @@ static ssize_t target_core_hba_show_attr_hba_info( char *page) { return sprintf(page, "HBA Index: %d plugin: %s version: %s\n", - hba->hba_id, hba->transport->name, - TARGET_CORE_CONFIGFS_VERSION); + hba->hba_id, hba->backend->ops->name, + TARGET_CORE_VERSION); } SE_HBA_ATTR_RO(hba_info); @@ -2713,11 +3253,10 @@ static ssize_t target_core_hba_show_attr_hba_mode(struct se_hba *hba, static ssize_t target_core_hba_store_attr_hba_mode(struct se_hba *hba, const char *page, size_t count) { - struct se_subsystem_api *transport = hba->transport; unsigned long mode_flag; int ret; - if (transport->pmode_enable_hba == NULL) + if (hba->backend->ops->pmode_enable_hba == NULL) return -EINVAL; ret = kstrtoul(page, 0, &mode_flag); @@ -2731,7 +3270,7 @@ static ssize_t target_core_hba_store_attr_hba_mode(struct se_hba *hba, return -EINVAL; } - ret = transport->pmode_enable_hba(hba, mode_flag); + ret = hba->backend->ops->pmode_enable_hba(hba, mode_flag); if (ret < 0) return -EINVAL; if (ret > 0) @@ -2857,16 +3396,15 @@ static struct config_item_type target_core_cit = { /* Stop functions for struct config_item_type target_core_hba_cit */ -void target_core_setup_sub_cits(struct se_subsystem_api *sa) +void target_setup_backend_cits(struct target_backend *tb) { - target_core_setup_dev_cit(sa); - target_core_setup_dev_attrib_cit(sa); - target_core_setup_dev_pr_cit(sa); - target_core_setup_dev_wwn_cit(sa); - target_core_setup_dev_alua_tg_pt_gps_cit(sa); - target_core_setup_dev_stat_cit(sa); + target_core_setup_dev_cit(tb); + target_core_setup_dev_attrib_cit(tb); + target_core_setup_dev_pr_cit(tb); + target_core_setup_dev_wwn_cit(tb); + target_core_setup_dev_alua_tg_pt_gps_cit(tb); + target_core_setup_dev_stat_cit(tb); } -EXPORT_SYMBOL(target_core_setup_sub_cits); static int __init target_core_init_configfs(void) { @@ -2968,7 +3506,7 @@ static int __init target_core_init_configfs(void) goto out_global; } pr_debug("TARGET_CORE[0]: Initialized ConfigFS Fabric" - " Infrastructure: "TARGET_CORE_CONFIGFS_VERSION" on %s/%s" + " Infrastructure: "TARGET_CORE_VERSION" on %s/%s" " on "UTS_RELEASE"\n", utsname()->sysname, utsname()->machine); /* * Register built-in RAMDISK subsystem logic for virtual LUN 0 diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 417f88b498c7..09e682b1c549 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -56,40 +56,37 @@ static struct se_hba *lun0_hba; struct se_device *g_lun0_dev; sense_reason_t -transport_lookup_cmd_lun(struct se_cmd *se_cmd, u32 unpacked_lun) +transport_lookup_cmd_lun(struct se_cmd *se_cmd, u64 unpacked_lun) { struct se_lun *se_lun = NULL; struct se_session *se_sess = se_cmd->se_sess; - struct se_device *dev; - unsigned long flags; - - if (unpacked_lun >= TRANSPORT_MAX_LUNS_PER_TPG) - return TCM_NON_EXISTENT_LUN; - - spin_lock_irqsave(&se_sess->se_node_acl->device_list_lock, flags); - se_cmd->se_deve = se_sess->se_node_acl->device_list[unpacked_lun]; - if (se_cmd->se_deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS) { - struct se_dev_entry *deve = se_cmd->se_deve; + struct se_node_acl *nacl = se_sess->se_node_acl; + struct se_dev_entry *deve; - deve->total_cmds++; + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, unpacked_lun); + if (deve) { + atomic_long_inc(&deve->total_cmds); if ((se_cmd->data_direction == DMA_TO_DEVICE) && (deve->lun_flags & TRANSPORT_LUNFLAGS_READ_ONLY)) { pr_err("TARGET_CORE[%s]: Detected WRITE_PROTECTED LUN" - " Access for 0x%08x\n", + " Access for 0x%08llx\n", se_cmd->se_tfo->get_fabric_name(), unpacked_lun); - spin_unlock_irqrestore(&se_sess->se_node_acl->device_list_lock, flags); + rcu_read_unlock(); return TCM_WRITE_PROTECTED; } if (se_cmd->data_direction == DMA_TO_DEVICE) - deve->write_bytes += se_cmd->data_length; + atomic_long_add(se_cmd->data_length, + &deve->write_bytes); else if (se_cmd->data_direction == DMA_FROM_DEVICE) - deve->read_bytes += se_cmd->data_length; + atomic_long_add(se_cmd->data_length, + &deve->read_bytes); - se_lun = deve->se_lun; - se_cmd->se_lun = deve->se_lun; + se_lun = rcu_dereference(deve->se_lun); + se_cmd->se_lun = rcu_dereference(deve->se_lun); se_cmd->pr_res_key = deve->pr_res_key; se_cmd->orig_fe_lun = unpacked_lun; se_cmd->se_cmd_flags |= SCF_SE_LUN_CMD; @@ -97,7 +94,7 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u32 unpacked_lun) percpu_ref_get(&se_lun->lun_ref); se_cmd->lun_ref_active = true; } - spin_unlock_irqrestore(&se_sess->se_node_acl->device_list_lock, flags); + rcu_read_unlock(); if (!se_lun) { /* @@ -107,7 +104,7 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u32 unpacked_lun) */ if (unpacked_lun != 0) { pr_err("TARGET_CORE[%s]: Detected NON_EXISTENT_LUN" - " Access for 0x%08x\n", + " Access for 0x%08llx\n", se_cmd->se_tfo->get_fabric_name(), unpacked_lun); return TCM_NON_EXISTENT_LUN; @@ -119,64 +116,66 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u32 unpacked_lun) (se_cmd->data_direction != DMA_NONE)) return TCM_WRITE_PROTECTED; - se_lun = &se_sess->se_tpg->tpg_virt_lun0; - se_cmd->se_lun = &se_sess->se_tpg->tpg_virt_lun0; + se_lun = se_sess->se_tpg->tpg_virt_lun0; + se_cmd->se_lun = se_sess->se_tpg->tpg_virt_lun0; se_cmd->orig_fe_lun = 0; se_cmd->se_cmd_flags |= SCF_SE_LUN_CMD; percpu_ref_get(&se_lun->lun_ref); se_cmd->lun_ref_active = true; } + /* + * RCU reference protected by percpu se_lun->lun_ref taken above that + * must drop to zero (including initial reference) before this se_lun + * pointer can be kfree_rcu() by the final se_lun->lun_group put via + * target_core_fabric_configfs.c:target_fabric_port_release + */ + se_cmd->se_dev = rcu_dereference_raw(se_lun->lun_se_dev); + atomic_long_inc(&se_cmd->se_dev->num_cmds); - /* Directly associate cmd with se_dev */ - se_cmd->se_dev = se_lun->lun_se_dev; - - dev = se_lun->lun_se_dev; - atomic_long_inc(&dev->num_cmds); if (se_cmd->data_direction == DMA_TO_DEVICE) - atomic_long_add(se_cmd->data_length, &dev->write_bytes); + atomic_long_add(se_cmd->data_length, + &se_cmd->se_dev->write_bytes); else if (se_cmd->data_direction == DMA_FROM_DEVICE) - atomic_long_add(se_cmd->data_length, &dev->read_bytes); + atomic_long_add(se_cmd->data_length, + &se_cmd->se_dev->read_bytes); return 0; } EXPORT_SYMBOL(transport_lookup_cmd_lun); -int transport_lookup_tmr_lun(struct se_cmd *se_cmd, u32 unpacked_lun) +int transport_lookup_tmr_lun(struct se_cmd *se_cmd, u64 unpacked_lun) { struct se_dev_entry *deve; struct se_lun *se_lun = NULL; struct se_session *se_sess = se_cmd->se_sess; + struct se_node_acl *nacl = se_sess->se_node_acl; struct se_tmr_req *se_tmr = se_cmd->se_tmr_req; unsigned long flags; - if (unpacked_lun >= TRANSPORT_MAX_LUNS_PER_TPG) - return -ENODEV; - - spin_lock_irqsave(&se_sess->se_node_acl->device_list_lock, flags); - se_cmd->se_deve = se_sess->se_node_acl->device_list[unpacked_lun]; - deve = se_cmd->se_deve; - - if (deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS) { - se_tmr->tmr_lun = deve->se_lun; - se_cmd->se_lun = deve->se_lun; - se_lun = deve->se_lun; + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, unpacked_lun); + if (deve) { + se_tmr->tmr_lun = rcu_dereference(deve->se_lun); + se_cmd->se_lun = rcu_dereference(deve->se_lun); + se_lun = rcu_dereference(deve->se_lun); se_cmd->pr_res_key = deve->pr_res_key; se_cmd->orig_fe_lun = unpacked_lun; } - spin_unlock_irqrestore(&se_sess->se_node_acl->device_list_lock, flags); + rcu_read_unlock(); if (!se_lun) { pr_debug("TARGET_CORE[%s]: Detected NON_EXISTENT_LUN" - " Access for 0x%08x\n", + " Access for 0x%08llx\n", se_cmd->se_tfo->get_fabric_name(), unpacked_lun); return -ENODEV; } - - /* Directly associate cmd with se_dev */ - se_cmd->se_dev = se_lun->lun_se_dev; - se_tmr->tmr_dev = se_lun->lun_se_dev; + /* + * XXX: Add percpu se_lun->lun_ref reference count for TMR + */ + se_cmd->se_dev = rcu_dereference_raw(se_lun->lun_se_dev); + se_tmr->tmr_dev = rcu_dereference_raw(se_lun->lun_se_dev); spin_lock_irqsave(&se_tmr->tmr_dev->se_tmr_lock, flags); list_add_tail(&se_tmr->tmr_list, &se_tmr->tmr_dev->dev_tmr_list); @@ -186,9 +185,24 @@ int transport_lookup_tmr_lun(struct se_cmd *se_cmd, u32 unpacked_lun) } EXPORT_SYMBOL(transport_lookup_tmr_lun); +bool target_lun_is_rdonly(struct se_cmd *cmd) +{ + struct se_session *se_sess = cmd->se_sess; + struct se_dev_entry *deve; + bool ret; + + rcu_read_lock(); + deve = target_nacl_find_deve(se_sess->se_node_acl, cmd->orig_fe_lun); + ret = (deve && deve->lun_flags & TRANSPORT_LUNFLAGS_READ_ONLY); + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL(target_lun_is_rdonly); + /* * This function is called from core_scsi3_emulate_pro_register_and_move() - * and core_scsi3_decode_spec_i_port(), and will increment &deve->pr_ref_count + * and core_scsi3_decode_spec_i_port(), and will increment &deve->pr_kref * when a matching rtpi is found. */ struct se_dev_entry *core_get_se_deve_from_rtpi( @@ -197,231 +211,238 @@ struct se_dev_entry *core_get_se_deve_from_rtpi( { struct se_dev_entry *deve; struct se_lun *lun; - struct se_port *port; struct se_portal_group *tpg = nacl->se_tpg; - u32 i; - - spin_lock_irq(&nacl->device_list_lock); - for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) { - deve = nacl->device_list[i]; - if (!(deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS)) - continue; - - lun = deve->se_lun; + rcu_read_lock(); + hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link) { + lun = rcu_dereference(deve->se_lun); if (!lun) { pr_err("%s device entries device pointer is" " NULL, but Initiator has access.\n", tpg->se_tpg_tfo->get_fabric_name()); continue; } - port = lun->lun_sep; - if (!port) { - pr_err("%s device entries device pointer is" - " NULL, but Initiator has access.\n", - tpg->se_tpg_tfo->get_fabric_name()); - continue; - } - if (port->sep_rtpi != rtpi) + if (lun->lun_rtpi != rtpi) continue; - atomic_inc_mb(&deve->pr_ref_count); - spin_unlock_irq(&nacl->device_list_lock); + kref_get(&deve->pr_kref); + rcu_read_unlock(); return deve; } - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return NULL; } -int core_free_device_list_for_node( +void core_free_device_list_for_node( struct se_node_acl *nacl, struct se_portal_group *tpg) { struct se_dev_entry *deve; - struct se_lun *lun; - u32 i; - - if (!nacl->device_list) - return 0; - - spin_lock_irq(&nacl->device_list_lock); - for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) { - deve = nacl->device_list[i]; - - if (!(deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS)) - continue; - - if (!deve->se_lun) { - pr_err("%s device entries device pointer is" - " NULL, but Initiator has access.\n", - tpg->se_tpg_tfo->get_fabric_name()); - continue; - } - lun = deve->se_lun; - spin_unlock_irq(&nacl->device_list_lock); - core_disable_device_list_for_node(lun, NULL, deve->mapped_lun, - TRANSPORT_LUNFLAGS_NO_ACCESS, nacl, tpg); - spin_lock_irq(&nacl->device_list_lock); + mutex_lock(&nacl->lun_entry_mutex); + hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link) { + struct se_lun *lun = rcu_dereference_check(deve->se_lun, + lockdep_is_held(&nacl->lun_entry_mutex)); + core_disable_device_list_for_node(lun, deve, nacl, tpg); } - spin_unlock_irq(&nacl->device_list_lock); - - array_free(nacl->device_list, TRANSPORT_MAX_LUNS_PER_TPG); - nacl->device_list = NULL; - - return 0; + mutex_unlock(&nacl->lun_entry_mutex); } void core_update_device_list_access( - u32 mapped_lun, + u64 mapped_lun, u32 lun_access, struct se_node_acl *nacl) { struct se_dev_entry *deve; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[mapped_lun]; - if (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE) { - deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_ONLY; - deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_WRITE; - } else { - deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_WRITE; - deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_ONLY; + mutex_lock(&nacl->lun_entry_mutex); + deve = target_nacl_find_deve(nacl, mapped_lun); + if (deve) { + if (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE) { + deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_ONLY; + deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_WRITE; + } else { + deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_WRITE; + deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_ONLY; + } } - spin_unlock_irq(&nacl->device_list_lock); + mutex_unlock(&nacl->lun_entry_mutex); } -/* core_enable_device_list_for_node(): - * - * +/* + * Called with rcu_read_lock or nacl->device_list_lock held. */ +struct se_dev_entry *target_nacl_find_deve(struct se_node_acl *nacl, u64 mapped_lun) +{ + struct se_dev_entry *deve; + + hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link) + if (deve->mapped_lun == mapped_lun) + return deve; + + return NULL; +} +EXPORT_SYMBOL(target_nacl_find_deve); + +void target_pr_kref_release(struct kref *kref) +{ + struct se_dev_entry *deve = container_of(kref, struct se_dev_entry, + pr_kref); + complete(&deve->pr_comp); +} + +static void +target_luns_data_has_changed(struct se_node_acl *nacl, struct se_dev_entry *new, + bool skip_new) +{ + struct se_dev_entry *tmp; + + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp, &nacl->lun_entry_hlist, link) { + if (skip_new && tmp == new) + continue; + core_scsi3_ua_allocate(tmp, 0x3F, + ASCQ_3FH_REPORTED_LUNS_DATA_HAS_CHANGED); + } + rcu_read_unlock(); +} + int core_enable_device_list_for_node( struct se_lun *lun, struct se_lun_acl *lun_acl, - u32 mapped_lun, + u64 mapped_lun, u32 lun_access, struct se_node_acl *nacl, struct se_portal_group *tpg) { - struct se_port *port = lun->lun_sep; - struct se_dev_entry *deve; - - spin_lock_irq(&nacl->device_list_lock); - - deve = nacl->device_list[mapped_lun]; - - /* - * Check if the call is handling demo mode -> explicit LUN ACL - * transition. This transition must be for the same struct se_lun - * + mapped_lun that was setup in demo mode.. - */ - if (deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS) { - if (deve->se_lun_acl != NULL) { - pr_err("struct se_dev_entry->se_lun_acl" - " already set for demo mode -> explicit" - " LUN ACL transition\n"); - spin_unlock_irq(&nacl->device_list_lock); + struct se_dev_entry *orig, *new; + + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) { + pr_err("Unable to allocate se_dev_entry memory\n"); + return -ENOMEM; + } + + atomic_set(&new->ua_count, 0); + spin_lock_init(&new->ua_lock); + INIT_LIST_HEAD(&new->ua_list); + INIT_LIST_HEAD(&new->lun_link); + + new->mapped_lun = mapped_lun; + kref_init(&new->pr_kref); + init_completion(&new->pr_comp); + + if (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE) + new->lun_flags |= TRANSPORT_LUNFLAGS_READ_WRITE; + else + new->lun_flags |= TRANSPORT_LUNFLAGS_READ_ONLY; + + new->creation_time = get_jiffies_64(); + new->attach_count++; + + mutex_lock(&nacl->lun_entry_mutex); + orig = target_nacl_find_deve(nacl, mapped_lun); + if (orig && orig->se_lun) { + struct se_lun *orig_lun = rcu_dereference_check(orig->se_lun, + lockdep_is_held(&nacl->lun_entry_mutex)); + + if (orig_lun != lun) { + pr_err("Existing orig->se_lun doesn't match new lun" + " for dynamic -> explicit NodeACL conversion:" + " %s\n", nacl->initiatorname); + mutex_unlock(&nacl->lun_entry_mutex); + kfree(new); return -EINVAL; } - if (deve->se_lun != lun) { - pr_err("struct se_dev_entry->se_lun does" - " match passed struct se_lun for demo mode" - " -> explicit LUN ACL transition\n"); - spin_unlock_irq(&nacl->device_list_lock); - return -EINVAL; - } - deve->se_lun_acl = lun_acl; + BUG_ON(orig->se_lun_acl != NULL); - if (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE) { - deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_ONLY; - deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_WRITE; - } else { - deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_WRITE; - deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_ONLY; - } + rcu_assign_pointer(new->se_lun, lun); + rcu_assign_pointer(new->se_lun_acl, lun_acl); + hlist_del_rcu(&orig->link); + hlist_add_head_rcu(&new->link, &nacl->lun_entry_hlist); + mutex_unlock(&nacl->lun_entry_mutex); - spin_unlock_irq(&nacl->device_list_lock); - return 0; - } + spin_lock(&lun->lun_deve_lock); + list_del(&orig->lun_link); + list_add_tail(&new->lun_link, &lun->lun_deve_list); + spin_unlock(&lun->lun_deve_lock); + + kref_put(&orig->pr_kref, target_pr_kref_release); + wait_for_completion(&orig->pr_comp); - deve->se_lun = lun; - deve->se_lun_acl = lun_acl; - deve->mapped_lun = mapped_lun; - deve->lun_flags |= TRANSPORT_LUNFLAGS_INITIATOR_ACCESS; - - if (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE) { - deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_ONLY; - deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_WRITE; - } else { - deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_WRITE; - deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_ONLY; + target_luns_data_has_changed(nacl, new, true); + kfree_rcu(orig, rcu_head); + return 0; } - deve->creation_time = get_jiffies_64(); - deve->attach_count++; - spin_unlock_irq(&nacl->device_list_lock); + rcu_assign_pointer(new->se_lun, lun); + rcu_assign_pointer(new->se_lun_acl, lun_acl); + hlist_add_head_rcu(&new->link, &nacl->lun_entry_hlist); + mutex_unlock(&nacl->lun_entry_mutex); - spin_lock_bh(&port->sep_alua_lock); - list_add_tail(&deve->alua_port_list, &port->sep_alua_list); - spin_unlock_bh(&port->sep_alua_lock); + spin_lock(&lun->lun_deve_lock); + list_add_tail(&new->lun_link, &lun->lun_deve_list); + spin_unlock(&lun->lun_deve_lock); + target_luns_data_has_changed(nacl, new, true); return 0; } -/* core_disable_device_list_for_node(): - * - * +/* + * Called with se_node_acl->lun_entry_mutex held. */ -int core_disable_device_list_for_node( +void core_disable_device_list_for_node( struct se_lun *lun, - struct se_lun_acl *lun_acl, - u32 mapped_lun, - u32 lun_access, + struct se_dev_entry *orig, struct se_node_acl *nacl, struct se_portal_group *tpg) { - struct se_port *port = lun->lun_sep; - struct se_dev_entry *deve = nacl->device_list[mapped_lun]; - + /* + * rcu_dereference_raw protected by se_lun->lun_group symlink + * reference to se_device->dev_group. + */ + struct se_device *dev = rcu_dereference_raw(lun->lun_se_dev); /* * If the MappedLUN entry is being disabled, the entry in - * port->sep_alua_list must be removed now before clearing the + * lun->lun_deve_list must be removed now before clearing the * struct se_dev_entry pointers below as logic in * core_alua_do_transition_tg_pt() depends on these being present. * * deve->se_lun_acl will be NULL for demo-mode created LUNs * that have not been explicitly converted to MappedLUNs -> - * struct se_lun_acl, but we remove deve->alua_port_list from - * port->sep_alua_list. This also means that active UAs and + * struct se_lun_acl, but we remove deve->lun_link from + * lun->lun_deve_list. This also means that active UAs and * NodeACL context specific PR metadata for demo-mode * MappedLUN *deve will be released below.. */ - spin_lock_bh(&port->sep_alua_lock); - list_del(&deve->alua_port_list); - spin_unlock_bh(&port->sep_alua_lock); + spin_lock(&lun->lun_deve_lock); + list_del(&orig->lun_link); + spin_unlock(&lun->lun_deve_lock); /* - * Wait for any in process SPEC_I_PT=1 or REGISTER_AND_MOVE - * PR operation to complete. + * Disable struct se_dev_entry LUN ACL mapping */ - while (atomic_read(&deve->pr_ref_count) != 0) - cpu_relax(); - - spin_lock_irq(&nacl->device_list_lock); + core_scsi3_ua_release_all(orig); + + hlist_del_rcu(&orig->link); + clear_bit(DEF_PR_REG_ACTIVE, &orig->deve_flags); + rcu_assign_pointer(orig->se_lun, NULL); + rcu_assign_pointer(orig->se_lun_acl, NULL); + orig->lun_flags = 0; + orig->creation_time = 0; + orig->attach_count--; /* - * Disable struct se_dev_entry LUN ACL mapping + * Before firing off RCU callback, wait for any in process SPEC_I_PT=1 + * or REGISTER_AND_MOVE PR operation to complete. */ - core_scsi3_ua_release_all(deve); - deve->se_lun = NULL; - deve->se_lun_acl = NULL; - deve->lun_flags = 0; - deve->creation_time = 0; - deve->attach_count--; - spin_unlock_irq(&nacl->device_list_lock); - - core_scsi3_free_pr_reg_from_nacl(lun->lun_se_dev, nacl); - return 0; + kref_put(&orig->pr_kref, target_pr_kref_release); + wait_for_completion(&orig->pr_comp); + + kfree_rcu(orig, rcu_head); + + core_scsi3_free_pr_reg_from_nacl(dev, nacl); + target_luns_data_has_changed(nacl, NULL, false); } /* core_clear_lun_from_tpg(): @@ -432,53 +453,35 @@ void core_clear_lun_from_tpg(struct se_lun *lun, struct se_portal_group *tpg) { struct se_node_acl *nacl; struct se_dev_entry *deve; - u32 i; - spin_lock_irq(&tpg->acl_node_lock); + mutex_lock(&tpg->acl_node_mutex); list_for_each_entry(nacl, &tpg->acl_node_list, acl_list) { - spin_unlock_irq(&tpg->acl_node_lock); - spin_lock_irq(&nacl->device_list_lock); - for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) { - deve = nacl->device_list[i]; - if (lun != deve->se_lun) - continue; - spin_unlock_irq(&nacl->device_list_lock); + mutex_lock(&nacl->lun_entry_mutex); + hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link) { + struct se_lun *tmp_lun = rcu_dereference_check(deve->se_lun, + lockdep_is_held(&nacl->lun_entry_mutex)); - core_disable_device_list_for_node(lun, NULL, - deve->mapped_lun, TRANSPORT_LUNFLAGS_NO_ACCESS, - nacl, tpg); + if (lun != tmp_lun) + continue; - spin_lock_irq(&nacl->device_list_lock); + core_disable_device_list_for_node(lun, deve, nacl, tpg); } - spin_unlock_irq(&nacl->device_list_lock); - - spin_lock_irq(&tpg->acl_node_lock); + mutex_unlock(&nacl->lun_entry_mutex); } - spin_unlock_irq(&tpg->acl_node_lock); + mutex_unlock(&tpg->acl_node_mutex); } -static struct se_port *core_alloc_port(struct se_device *dev) +int core_alloc_rtpi(struct se_lun *lun, struct se_device *dev) { - struct se_port *port, *port_tmp; - - port = kzalloc(sizeof(struct se_port), GFP_KERNEL); - if (!port) { - pr_err("Unable to allocate struct se_port\n"); - return ERR_PTR(-ENOMEM); - } - INIT_LIST_HEAD(&port->sep_alua_list); - INIT_LIST_HEAD(&port->sep_list); - atomic_set(&port->sep_tg_pt_secondary_offline, 0); - spin_lock_init(&port->sep_alua_lock); - mutex_init(&port->sep_tg_pt_md_mutex); + struct se_lun *tmp; spin_lock(&dev->se_port_lock); - if (dev->dev_port_count == 0x0000ffff) { + if (dev->export_count == 0x0000ffff) { pr_warn("Reached dev->dev_port_count ==" " 0x0000ffff\n"); spin_unlock(&dev->se_port_lock); - return ERR_PTR(-ENOSPC); + return -ENOSPC; } again: /* @@ -493,133 +496,23 @@ again: * 2h Relative port 2, historically known as port B * 3h to FFFFh Relative port 3 through 65 535 */ - port->sep_rtpi = dev->dev_rpti_counter++; - if (!port->sep_rtpi) + lun->lun_rtpi = dev->dev_rpti_counter++; + if (!lun->lun_rtpi) goto again; - list_for_each_entry(port_tmp, &dev->dev_sep_list, sep_list) { + list_for_each_entry(tmp, &dev->dev_sep_list, lun_dev_link) { /* * Make sure RELATIVE TARGET PORT IDENTIFIER is unique * for 16-bit wrap.. */ - if (port->sep_rtpi == port_tmp->sep_rtpi) + if (lun->lun_rtpi == tmp->lun_rtpi) goto again; } spin_unlock(&dev->se_port_lock); - return port; -} - -static void core_export_port( - struct se_device *dev, - struct se_portal_group *tpg, - struct se_port *port, - struct se_lun *lun) -{ - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem = NULL; - - spin_lock(&dev->se_port_lock); - spin_lock(&lun->lun_sep_lock); - port->sep_tpg = tpg; - port->sep_lun = lun; - lun->lun_sep = port; - spin_unlock(&lun->lun_sep_lock); - - list_add_tail(&port->sep_list, &dev->dev_sep_list); - spin_unlock(&dev->se_port_lock); - - if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) && - !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) { - tg_pt_gp_mem = core_alua_allocate_tg_pt_gp_mem(port); - if (IS_ERR(tg_pt_gp_mem) || !tg_pt_gp_mem) { - pr_err("Unable to allocate t10_alua_tg_pt" - "_gp_member_t\n"); - return; - } - spin_lock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); - __core_alua_attach_tg_pt_gp_mem(tg_pt_gp_mem, - dev->t10_alua.default_tg_pt_gp); - spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); - pr_debug("%s/%s: Adding to default ALUA Target Port" - " Group: alua/default_tg_pt_gp\n", - dev->transport->name, tpg->se_tpg_tfo->get_fabric_name()); - } - - dev->dev_port_count++; - port->sep_index = port->sep_rtpi; /* RELATIVE TARGET PORT IDENTIFIER */ -} - -/* - * Called with struct se_device->se_port_lock spinlock held. - */ -static void core_release_port(struct se_device *dev, struct se_port *port) - __releases(&dev->se_port_lock) __acquires(&dev->se_port_lock) -{ - /* - * Wait for any port reference for PR ALL_TG_PT=1 operation - * to complete in __core_scsi3_alloc_registration() - */ - spin_unlock(&dev->se_port_lock); - if (atomic_read(&port->sep_tg_pt_ref_cnt)) - cpu_relax(); - spin_lock(&dev->se_port_lock); - - core_alua_free_tg_pt_gp_mem(port); - - list_del(&port->sep_list); - dev->dev_port_count--; - kfree(port); -} - -int core_dev_export( - struct se_device *dev, - struct se_portal_group *tpg, - struct se_lun *lun) -{ - struct se_hba *hba = dev->se_hba; - struct se_port *port; - - port = core_alloc_port(dev); - if (IS_ERR(port)) - return PTR_ERR(port); - - lun->lun_se_dev = dev; - - spin_lock(&hba->device_lock); - dev->export_count++; - spin_unlock(&hba->device_lock); - - core_export_port(dev, tpg, port, lun); return 0; } -void core_dev_unexport( - struct se_device *dev, - struct se_portal_group *tpg, - struct se_lun *lun) -{ - struct se_hba *hba = dev->se_hba; - struct se_port *port = lun->lun_sep; - - spin_lock(&lun->lun_sep_lock); - if (lun->lun_se_dev == NULL) { - spin_unlock(&lun->lun_sep_lock); - return; - } - spin_unlock(&lun->lun_sep_lock); - - spin_lock(&dev->se_port_lock); - core_release_port(dev, port); - spin_unlock(&dev->se_port_lock); - - spin_lock(&hba->device_lock); - dev->export_count--; - spin_unlock(&hba->device_lock); - - lun->lun_sep = NULL; - lun->lun_se_dev = NULL; -} - static void se_release_vpd_for_dev(struct se_device *dev) { struct t10_vpd *vpd, *vpd_tmp; @@ -651,556 +544,19 @@ static u32 se_dev_align_max_sectors(u32 max_sectors, u32 block_size) return aligned_max_sectors; } -bool se_dev_check_wce(struct se_device *dev) -{ - bool wce = false; - - if (dev->transport->get_write_cache) - wce = dev->transport->get_write_cache(dev); - else if (dev->dev_attrib.emulate_write_cache > 0) - wce = true; - - return wce; -} - -int se_dev_set_max_unmap_lba_count( - struct se_device *dev, - u32 max_unmap_lba_count) -{ - dev->dev_attrib.max_unmap_lba_count = max_unmap_lba_count; - pr_debug("dev[%p]: Set max_unmap_lba_count: %u\n", - dev, dev->dev_attrib.max_unmap_lba_count); - return 0; -} -EXPORT_SYMBOL(se_dev_set_max_unmap_lba_count); - -int se_dev_set_max_unmap_block_desc_count( - struct se_device *dev, - u32 max_unmap_block_desc_count) -{ - dev->dev_attrib.max_unmap_block_desc_count = - max_unmap_block_desc_count; - pr_debug("dev[%p]: Set max_unmap_block_desc_count: %u\n", - dev, dev->dev_attrib.max_unmap_block_desc_count); - return 0; -} -EXPORT_SYMBOL(se_dev_set_max_unmap_block_desc_count); - -int se_dev_set_unmap_granularity( - struct se_device *dev, - u32 unmap_granularity) -{ - dev->dev_attrib.unmap_granularity = unmap_granularity; - pr_debug("dev[%p]: Set unmap_granularity: %u\n", - dev, dev->dev_attrib.unmap_granularity); - return 0; -} -EXPORT_SYMBOL(se_dev_set_unmap_granularity); - -int se_dev_set_unmap_granularity_alignment( - struct se_device *dev, - u32 unmap_granularity_alignment) -{ - dev->dev_attrib.unmap_granularity_alignment = unmap_granularity_alignment; - pr_debug("dev[%p]: Set unmap_granularity_alignment: %u\n", - dev, dev->dev_attrib.unmap_granularity_alignment); - return 0; -} -EXPORT_SYMBOL(se_dev_set_unmap_granularity_alignment); - -int se_dev_set_max_write_same_len( - struct se_device *dev, - u32 max_write_same_len) -{ - dev->dev_attrib.max_write_same_len = max_write_same_len; - pr_debug("dev[%p]: Set max_write_same_len: %u\n", - dev, dev->dev_attrib.max_write_same_len); - return 0; -} -EXPORT_SYMBOL(se_dev_set_max_write_same_len); - -static void dev_set_t10_wwn_model_alias(struct se_device *dev) -{ - const char *configname; - - configname = config_item_name(&dev->dev_group.cg_item); - if (strlen(configname) >= 16) { - pr_warn("dev[%p]: Backstore name '%s' is too long for " - "INQUIRY_MODEL, truncating to 16 bytes\n", dev, - configname); - } - snprintf(&dev->t10_wwn.model[0], 16, "%s", configname); -} - -int se_dev_set_emulate_model_alias(struct se_device *dev, int flag) -{ - if (dev->export_count) { - pr_err("dev[%p]: Unable to change model alias" - " while export_count is %d\n", - dev, dev->export_count); - return -EINVAL; - } - - if (flag != 0 && flag != 1) { - pr_err("Illegal value %d\n", flag); - return -EINVAL; - } - - if (flag) { - dev_set_t10_wwn_model_alias(dev); - } else { - strncpy(&dev->t10_wwn.model[0], - dev->transport->inquiry_prod, 16); - } - dev->dev_attrib.emulate_model_alias = flag; - - return 0; -} -EXPORT_SYMBOL(se_dev_set_emulate_model_alias); - -int se_dev_set_emulate_dpo(struct se_device *dev, int flag) -{ - if (flag != 0 && flag != 1) { - pr_err("Illegal value %d\n", flag); - return -EINVAL; - } - - if (flag) { - pr_err("dpo_emulated not supported\n"); - return -EINVAL; - } - - return 0; -} -EXPORT_SYMBOL(se_dev_set_emulate_dpo); - -int se_dev_set_emulate_fua_write(struct se_device *dev, int flag) -{ - if (flag != 0 && flag != 1) { - pr_err("Illegal value %d\n", flag); - return -EINVAL; - } - if (flag && - dev->transport->get_write_cache) { - pr_warn("emulate_fua_write not supported for this device, ignoring\n"); - return 0; - } - if (dev->export_count) { - pr_err("emulate_fua_write cannot be changed with active" - " exports: %d\n", dev->export_count); - return -EINVAL; - } - dev->dev_attrib.emulate_fua_write = flag; - pr_debug("dev[%p]: SE Device Forced Unit Access WRITEs: %d\n", - dev, dev->dev_attrib.emulate_fua_write); - return 0; -} -EXPORT_SYMBOL(se_dev_set_emulate_fua_write); - -int se_dev_set_emulate_fua_read(struct se_device *dev, int flag) -{ - if (flag != 0 && flag != 1) { - pr_err("Illegal value %d\n", flag); - return -EINVAL; - } - - if (flag) { - pr_err("ua read emulated not supported\n"); - return -EINVAL; - } - - return 0; -} -EXPORT_SYMBOL(se_dev_set_emulate_fua_read); - -int se_dev_set_emulate_write_cache(struct se_device *dev, int flag) -{ - if (flag != 0 && flag != 1) { - pr_err("Illegal value %d\n", flag); - return -EINVAL; - } - if (flag && - dev->transport->get_write_cache) { - pr_err("emulate_write_cache not supported for this device\n"); - return -EINVAL; - } - if (dev->export_count) { - pr_err("emulate_write_cache cannot be changed with active" - " exports: %d\n", dev->export_count); - return -EINVAL; - } - dev->dev_attrib.emulate_write_cache = flag; - pr_debug("dev[%p]: SE Device WRITE_CACHE_EMULATION flag: %d\n", - dev, dev->dev_attrib.emulate_write_cache); - return 0; -} -EXPORT_SYMBOL(se_dev_set_emulate_write_cache); - -int se_dev_set_emulate_ua_intlck_ctrl(struct se_device *dev, int flag) -{ - if ((flag != 0) && (flag != 1) && (flag != 2)) { - pr_err("Illegal value %d\n", flag); - return -EINVAL; - } - - if (dev->export_count) { - pr_err("dev[%p]: Unable to change SE Device" - " UA_INTRLCK_CTRL while export_count is %d\n", - dev, dev->export_count); - return -EINVAL; - } - dev->dev_attrib.emulate_ua_intlck_ctrl = flag; - pr_debug("dev[%p]: SE Device UA_INTRLCK_CTRL flag: %d\n", - dev, dev->dev_attrib.emulate_ua_intlck_ctrl); - - return 0; -} -EXPORT_SYMBOL(se_dev_set_emulate_ua_intlck_ctrl); - -int se_dev_set_emulate_tas(struct se_device *dev, int flag) -{ - if ((flag != 0) && (flag != 1)) { - pr_err("Illegal value %d\n", flag); - return -EINVAL; - } - - if (dev->export_count) { - pr_err("dev[%p]: Unable to change SE Device TAS while" - " export_count is %d\n", - dev, dev->export_count); - return -EINVAL; - } - dev->dev_attrib.emulate_tas = flag; - pr_debug("dev[%p]: SE Device TASK_ABORTED status bit: %s\n", - dev, (dev->dev_attrib.emulate_tas) ? "Enabled" : "Disabled"); - - return 0; -} -EXPORT_SYMBOL(se_dev_set_emulate_tas); - -int se_dev_set_emulate_tpu(struct se_device *dev, int flag) -{ - if ((flag != 0) && (flag != 1)) { - pr_err("Illegal value %d\n", flag); - return -EINVAL; - } - /* - * We expect this value to be non-zero when generic Block Layer - * Discard supported is detected iblock_create_virtdevice(). - */ - if (flag && !dev->dev_attrib.max_unmap_block_desc_count) { - pr_err("Generic Block Discard not supported\n"); - return -ENOSYS; - } - - dev->dev_attrib.emulate_tpu = flag; - pr_debug("dev[%p]: SE Device Thin Provisioning UNMAP bit: %d\n", - dev, flag); - return 0; -} -EXPORT_SYMBOL(se_dev_set_emulate_tpu); - -int se_dev_set_emulate_tpws(struct se_device *dev, int flag) -{ - if ((flag != 0) && (flag != 1)) { - pr_err("Illegal value %d\n", flag); - return -EINVAL; - } - /* - * We expect this value to be non-zero when generic Block Layer - * Discard supported is detected iblock_create_virtdevice(). - */ - if (flag && !dev->dev_attrib.max_unmap_block_desc_count) { - pr_err("Generic Block Discard not supported\n"); - return -ENOSYS; - } - - dev->dev_attrib.emulate_tpws = flag; - pr_debug("dev[%p]: SE Device Thin Provisioning WRITE_SAME: %d\n", - dev, flag); - return 0; -} -EXPORT_SYMBOL(se_dev_set_emulate_tpws); - -int se_dev_set_emulate_caw(struct se_device *dev, int flag) -{ - if (flag != 0 && flag != 1) { - pr_err("Illegal value %d\n", flag); - return -EINVAL; - } - dev->dev_attrib.emulate_caw = flag; - pr_debug("dev[%p]: SE Device CompareAndWrite (AtomicTestandSet): %d\n", - dev, flag); - - return 0; -} -EXPORT_SYMBOL(se_dev_set_emulate_caw); - -int se_dev_set_emulate_3pc(struct se_device *dev, int flag) -{ - if (flag != 0 && flag != 1) { - pr_err("Illegal value %d\n", flag); - return -EINVAL; - } - dev->dev_attrib.emulate_3pc = flag; - pr_debug("dev[%p]: SE Device 3rd Party Copy (EXTENDED_COPY): %d\n", - dev, flag); - - return 0; -} -EXPORT_SYMBOL(se_dev_set_emulate_3pc); - -int se_dev_set_pi_prot_type(struct se_device *dev, int flag) -{ - int rc, old_prot = dev->dev_attrib.pi_prot_type; - - if (flag != 0 && flag != 1 && flag != 2 && flag != 3) { - pr_err("Illegal value %d for pi_prot_type\n", flag); - return -EINVAL; - } - if (flag == 2) { - pr_err("DIF TYPE2 protection currently not supported\n"); - return -ENOSYS; - } - if (dev->dev_attrib.hw_pi_prot_type) { - pr_warn("DIF protection enabled on underlying hardware," - " ignoring\n"); - return 0; - } - if (!dev->transport->init_prot || !dev->transport->free_prot) { - /* 0 is only allowed value for non-supporting backends */ - if (flag == 0) - return 0; - - pr_err("DIF protection not supported by backend: %s\n", - dev->transport->name); - return -ENOSYS; - } - if (!(dev->dev_flags & DF_CONFIGURED)) { - pr_err("DIF protection requires device to be configured\n"); - return -ENODEV; - } - if (dev->export_count) { - pr_err("dev[%p]: Unable to change SE Device PROT type while" - " export_count is %d\n", dev, dev->export_count); - return -EINVAL; - } - - dev->dev_attrib.pi_prot_type = flag; - - if (flag && !old_prot) { - rc = dev->transport->init_prot(dev); - if (rc) { - dev->dev_attrib.pi_prot_type = old_prot; - return rc; - } - - } else if (!flag && old_prot) { - dev->transport->free_prot(dev); - } - pr_debug("dev[%p]: SE Device Protection Type: %d\n", dev, flag); - - return 0; -} -EXPORT_SYMBOL(se_dev_set_pi_prot_type); - -int se_dev_set_pi_prot_format(struct se_device *dev, int flag) -{ - int rc; - - if (!flag) - return 0; - - if (flag != 1) { - pr_err("Illegal value %d for pi_prot_format\n", flag); - return -EINVAL; - } - if (!dev->transport->format_prot) { - pr_err("DIF protection format not supported by backend %s\n", - dev->transport->name); - return -ENOSYS; - } - if (!(dev->dev_flags & DF_CONFIGURED)) { - pr_err("DIF protection format requires device to be configured\n"); - return -ENODEV; - } - if (dev->export_count) { - pr_err("dev[%p]: Unable to format SE Device PROT type while" - " export_count is %d\n", dev, dev->export_count); - return -EINVAL; - } - - rc = dev->transport->format_prot(dev); - if (rc) - return rc; - - pr_debug("dev[%p]: SE Device Protection Format complete\n", dev); - - return 0; -} -EXPORT_SYMBOL(se_dev_set_pi_prot_format); - -int se_dev_set_enforce_pr_isids(struct se_device *dev, int flag) -{ - if ((flag != 0) && (flag != 1)) { - pr_err("Illegal value %d\n", flag); - return -EINVAL; - } - dev->dev_attrib.enforce_pr_isids = flag; - pr_debug("dev[%p]: SE Device enforce_pr_isids bit: %s\n", dev, - (dev->dev_attrib.enforce_pr_isids) ? "Enabled" : "Disabled"); - return 0; -} -EXPORT_SYMBOL(se_dev_set_enforce_pr_isids); - -int se_dev_set_force_pr_aptpl(struct se_device *dev, int flag) -{ - if ((flag != 0) && (flag != 1)) { - printk(KERN_ERR "Illegal value %d\n", flag); - return -EINVAL; - } - if (dev->export_count) { - pr_err("dev[%p]: Unable to set force_pr_aptpl while" - " export_count is %d\n", dev, dev->export_count); - return -EINVAL; - } - - dev->dev_attrib.force_pr_aptpl = flag; - pr_debug("dev[%p]: SE Device force_pr_aptpl: %d\n", dev, flag); - return 0; -} -EXPORT_SYMBOL(se_dev_set_force_pr_aptpl); - -int se_dev_set_is_nonrot(struct se_device *dev, int flag) -{ - if ((flag != 0) && (flag != 1)) { - printk(KERN_ERR "Illegal value %d\n", flag); - return -EINVAL; - } - dev->dev_attrib.is_nonrot = flag; - pr_debug("dev[%p]: SE Device is_nonrot bit: %d\n", - dev, flag); - return 0; -} -EXPORT_SYMBOL(se_dev_set_is_nonrot); - -int se_dev_set_emulate_rest_reord(struct se_device *dev, int flag) -{ - if (flag != 0) { - printk(KERN_ERR "dev[%p]: SE Device emulatation of restricted" - " reordering not implemented\n", dev); - return -ENOSYS; - } - dev->dev_attrib.emulate_rest_reord = flag; - pr_debug("dev[%p]: SE Device emulate_rest_reord: %d\n", dev, flag); - return 0; -} -EXPORT_SYMBOL(se_dev_set_emulate_rest_reord); - -/* - * Note, this can only be called on unexported SE Device Object. - */ -int se_dev_set_queue_depth(struct se_device *dev, u32 queue_depth) -{ - if (dev->export_count) { - pr_err("dev[%p]: Unable to change SE Device TCQ while" - " export_count is %d\n", - dev, dev->export_count); - return -EINVAL; - } - if (!queue_depth) { - pr_err("dev[%p]: Illegal ZERO value for queue" - "_depth\n", dev); - return -EINVAL; - } - - if (queue_depth > dev->dev_attrib.queue_depth) { - if (queue_depth > dev->dev_attrib.hw_queue_depth) { - pr_err("dev[%p]: Passed queue_depth:" - " %u exceeds TCM/SE_Device MAX" - " TCQ: %u\n", dev, queue_depth, - dev->dev_attrib.hw_queue_depth); - return -EINVAL; - } - } - dev->dev_attrib.queue_depth = dev->queue_depth = queue_depth; - pr_debug("dev[%p]: SE Device TCQ Depth changed to: %u\n", - dev, queue_depth); - return 0; -} -EXPORT_SYMBOL(se_dev_set_queue_depth); - -int se_dev_set_optimal_sectors(struct se_device *dev, u32 optimal_sectors) -{ - if (dev->export_count) { - pr_err("dev[%p]: Unable to change SE Device" - " optimal_sectors while export_count is %d\n", - dev, dev->export_count); - return -EINVAL; - } - if (optimal_sectors > dev->dev_attrib.hw_max_sectors) { - pr_err("dev[%p]: Passed optimal_sectors %u cannot be" - " greater than hw_max_sectors: %u\n", dev, - optimal_sectors, dev->dev_attrib.hw_max_sectors); - return -EINVAL; - } - - dev->dev_attrib.optimal_sectors = optimal_sectors; - pr_debug("dev[%p]: SE Device optimal_sectors changed to %u\n", - dev, optimal_sectors); - return 0; -} -EXPORT_SYMBOL(se_dev_set_optimal_sectors); - -int se_dev_set_block_size(struct se_device *dev, u32 block_size) -{ - if (dev->export_count) { - pr_err("dev[%p]: Unable to change SE Device block_size" - " while export_count is %d\n", - dev, dev->export_count); - return -EINVAL; - } - - if ((block_size != 512) && - (block_size != 1024) && - (block_size != 2048) && - (block_size != 4096)) { - pr_err("dev[%p]: Illegal value for block_device: %u" - " for SE device, must be 512, 1024, 2048 or 4096\n", - dev, block_size); - return -EINVAL; - } - - dev->dev_attrib.block_size = block_size; - pr_debug("dev[%p]: SE Device block_size changed to %u\n", - dev, block_size); - - if (dev->dev_attrib.max_bytes_per_io) - dev->dev_attrib.hw_max_sectors = - dev->dev_attrib.max_bytes_per_io / block_size; - - return 0; -} -EXPORT_SYMBOL(se_dev_set_block_size); - -struct se_lun *core_dev_add_lun( +int core_dev_add_lun( struct se_portal_group *tpg, struct se_device *dev, - u32 unpacked_lun) + struct se_lun *lun) { - struct se_lun *lun; int rc; - lun = core_tpg_alloc_lun(tpg, unpacked_lun); - if (IS_ERR(lun)) - return lun; - rc = core_tpg_add_lun(tpg, lun, TRANSPORT_LUNFLAGS_READ_WRITE, dev); if (rc < 0) - return ERR_PTR(rc); + return rc; - pr_debug("%s_TPG[%u]_LUN[%u] - Activated %s Logical Unit from" + pr_debug("%s_TPG[%u]_LUN[%llu] - Activated %s Logical Unit from" " CORE HBA: %u\n", tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun, tpg->se_tpg_tfo->get_fabric_name(), dev->se_hba->hba_id); @@ -1210,20 +566,19 @@ struct se_lun *core_dev_add_lun( */ if (tpg->se_tpg_tfo->tpg_check_demo_mode(tpg)) { struct se_node_acl *acl; - spin_lock_irq(&tpg->acl_node_lock); + + mutex_lock(&tpg->acl_node_mutex); list_for_each_entry(acl, &tpg->acl_node_list, acl_list) { if (acl->dynamic_node_acl && (!tpg->se_tpg_tfo->tpg_check_demo_mode_login_only || !tpg->se_tpg_tfo->tpg_check_demo_mode_login_only(tpg))) { - spin_unlock_irq(&tpg->acl_node_lock); - core_tpg_add_node_to_devs(acl, tpg); - spin_lock_irq(&tpg->acl_node_lock); + core_tpg_add_node_to_devs(acl, tpg, lun); } } - spin_unlock_irq(&tpg->acl_node_lock); + mutex_unlock(&tpg->acl_node_mutex); } - return lun; + return 0; } /* core_dev_del_lun(): @@ -1234,7 +589,7 @@ void core_dev_del_lun( struct se_portal_group *tpg, struct se_lun *lun) { - pr_debug("%s_TPG[%u]_LUN[%u] - Deactivating %s Logical Unit from" + pr_debug("%s_TPG[%u]_LUN[%llu] - Deactivating %s Logical Unit from" " device object\n", tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun, tpg->se_tpg_tfo->get_fabric_name()); @@ -1242,72 +597,10 @@ void core_dev_del_lun( core_tpg_remove_lun(tpg, lun); } -struct se_lun *core_get_lun_from_tpg(struct se_portal_group *tpg, u32 unpacked_lun) -{ - struct se_lun *lun; - - spin_lock(&tpg->tpg_lun_lock); - if (unpacked_lun > (TRANSPORT_MAX_LUNS_PER_TPG-1)) { - pr_err("%s LUN: %u exceeds TRANSPORT_MAX_LUNS" - "_PER_TPG-1: %u for Target Portal Group: %hu\n", - tpg->se_tpg_tfo->get_fabric_name(), unpacked_lun, - TRANSPORT_MAX_LUNS_PER_TPG-1, - tpg->se_tpg_tfo->tpg_get_tag(tpg)); - spin_unlock(&tpg->tpg_lun_lock); - return NULL; - } - lun = tpg->tpg_lun_list[unpacked_lun]; - - if (lun->lun_status != TRANSPORT_LUN_STATUS_FREE) { - pr_err("%s Logical Unit Number: %u is not free on" - " Target Portal Group: %hu, ignoring request.\n", - tpg->se_tpg_tfo->get_fabric_name(), unpacked_lun, - tpg->se_tpg_tfo->tpg_get_tag(tpg)); - spin_unlock(&tpg->tpg_lun_lock); - return NULL; - } - spin_unlock(&tpg->tpg_lun_lock); - - return lun; -} - -/* core_dev_get_lun(): - * - * - */ -static struct se_lun *core_dev_get_lun(struct se_portal_group *tpg, u32 unpacked_lun) -{ - struct se_lun *lun; - - spin_lock(&tpg->tpg_lun_lock); - if (unpacked_lun > (TRANSPORT_MAX_LUNS_PER_TPG-1)) { - pr_err("%s LUN: %u exceeds TRANSPORT_MAX_LUNS_PER" - "_TPG-1: %u for Target Portal Group: %hu\n", - tpg->se_tpg_tfo->get_fabric_name(), unpacked_lun, - TRANSPORT_MAX_LUNS_PER_TPG-1, - tpg->se_tpg_tfo->tpg_get_tag(tpg)); - spin_unlock(&tpg->tpg_lun_lock); - return NULL; - } - lun = tpg->tpg_lun_list[unpacked_lun]; - - if (lun->lun_status != TRANSPORT_LUN_STATUS_ACTIVE) { - pr_err("%s Logical Unit Number: %u is not active on" - " Target Portal Group: %hu, ignoring request.\n", - tpg->se_tpg_tfo->get_fabric_name(), unpacked_lun, - tpg->se_tpg_tfo->tpg_get_tag(tpg)); - spin_unlock(&tpg->tpg_lun_lock); - return NULL; - } - spin_unlock(&tpg->tpg_lun_lock); - - return lun; -} - struct se_lun_acl *core_dev_init_initiator_node_lun_acl( struct se_portal_group *tpg, struct se_node_acl *nacl, - u32 mapped_lun, + u64 mapped_lun, int *ret) { struct se_lun_acl *lacl; @@ -1325,7 +618,6 @@ struct se_lun_acl *core_dev_init_initiator_node_lun_acl( return NULL; } - INIT_LIST_HEAD(&lacl->lacl_list); lacl->mapped_lun = mapped_lun; lacl->se_lun_nacl = nacl; snprintf(lacl->initiatorname, TRANSPORT_IQN_LEN, "%s", @@ -1337,22 +629,16 @@ struct se_lun_acl *core_dev_init_initiator_node_lun_acl( int core_dev_add_initiator_node_lun_acl( struct se_portal_group *tpg, struct se_lun_acl *lacl, - u32 unpacked_lun, + struct se_lun *lun, u32 lun_access) { - struct se_lun *lun; - struct se_node_acl *nacl; - - lun = core_dev_get_lun(tpg, unpacked_lun); - if (!lun) { - pr_err("%s Logical Unit Number: %u is not active on" - " Target Portal Group: %hu, ignoring request.\n", - tpg->se_tpg_tfo->get_fabric_name(), unpacked_lun, - tpg->se_tpg_tfo->tpg_get_tag(tpg)); - return -EINVAL; - } + struct se_node_acl *nacl = lacl->se_lun_nacl; + /* + * rcu_dereference_raw protected by se_lun->lun_group symlink + * reference to se_device->dev_group. + */ + struct se_device *dev = rcu_dereference_raw(lun->lun_se_dev); - nacl = lacl->se_lun_nacl; if (!nacl) return -EINVAL; @@ -1366,52 +652,40 @@ int core_dev_add_initiator_node_lun_acl( lun_access, nacl, tpg) < 0) return -EINVAL; - spin_lock(&lun->lun_acl_lock); - list_add_tail(&lacl->lacl_list, &lun->lun_acl_list); - atomic_inc_mb(&lun->lun_acl_count); - spin_unlock(&lun->lun_acl_lock); - - pr_debug("%s_TPG[%hu]_LUN[%u->%u] - Added %s ACL for " + pr_debug("%s_TPG[%hu]_LUN[%llu->%llu] - Added %s ACL for " " InitiatorNode: %s\n", tpg->se_tpg_tfo->get_fabric_name(), - tpg->se_tpg_tfo->tpg_get_tag(tpg), unpacked_lun, lacl->mapped_lun, + tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun, lacl->mapped_lun, (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE) ? "RW" : "RO", lacl->initiatorname); /* * Check to see if there are any existing persistent reservation APTPL * pre-registrations that need to be enabled for this LUN ACL.. */ - core_scsi3_check_aptpl_registration(lun->lun_se_dev, tpg, lun, nacl, + core_scsi3_check_aptpl_registration(dev, tpg, lun, nacl, lacl->mapped_lun); return 0; } -/* core_dev_del_initiator_node_lun_acl(): - * - * - */ int core_dev_del_initiator_node_lun_acl( - struct se_portal_group *tpg, struct se_lun *lun, struct se_lun_acl *lacl) { + struct se_portal_group *tpg = lun->lun_tpg; struct se_node_acl *nacl; + struct se_dev_entry *deve; nacl = lacl->se_lun_nacl; if (!nacl) return -EINVAL; - spin_lock(&lun->lun_acl_lock); - list_del(&lacl->lacl_list); - atomic_dec_mb(&lun->lun_acl_count); - spin_unlock(&lun->lun_acl_lock); - - core_disable_device_list_for_node(lun, NULL, lacl->mapped_lun, - TRANSPORT_LUNFLAGS_NO_ACCESS, nacl, tpg); - - lacl->se_lun = NULL; + mutex_lock(&nacl->lun_entry_mutex); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (deve) + core_disable_device_list_for_node(lun, deve, nacl, tpg); + mutex_unlock(&nacl->lun_entry_mutex); - pr_debug("%s_TPG[%hu]_LUN[%u] - Removed ACL for" - " InitiatorNode: %s Mapped LUN: %u\n", + pr_debug("%s_TPG[%hu]_LUN[%llu] - Removed ACL for" + " InitiatorNode: %s Mapped LUN: %llu\n", tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun, lacl->initiatorname, lacl->mapped_lun); @@ -1424,7 +698,7 @@ void core_dev_free_initiator_node_lun_acl( struct se_lun_acl *lacl) { pr_debug("%s_TPG[%hu] - Freeing ACL for %s InitiatorNode: %s" - " Mapped LUN: %u\n", tpg->se_tpg_tfo->get_fabric_name(), + " Mapped LUN: %llu\n", tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_tag(tpg), tpg->se_tpg_tfo->get_fabric_name(), lacl->initiatorname, lacl->mapped_lun); @@ -1473,14 +747,15 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) struct se_device *dev; struct se_lun *xcopy_lun; - dev = hba->transport->alloc_device(hba, name); + dev = hba->backend->ops->alloc_device(hba, name); if (!dev) return NULL; dev->dev_link_magic = SE_DEV_LINK_MAGIC; dev->se_hba = hba; - dev->transport = hba->transport; + dev->transport = hba->backend->ops; dev->prot_length = sizeof(struct se_dif_v1_tuple); + dev->hba_index = hba->hba_index; INIT_LIST_HEAD(&dev->dev_list); INIT_LIST_HEAD(&dev->dev_sep_list); @@ -1513,9 +788,9 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) dev->dev_attrib.da_dev = dev; dev->dev_attrib.emulate_model_alias = DA_EMULATE_MODEL_ALIAS; - dev->dev_attrib.emulate_dpo = DA_EMULATE_DPO; - dev->dev_attrib.emulate_fua_write = DA_EMULATE_FUA_WRITE; - dev->dev_attrib.emulate_fua_read = DA_EMULATE_FUA_READ; + dev->dev_attrib.emulate_dpo = 1; + dev->dev_attrib.emulate_fua_write = 1; + dev->dev_attrib.emulate_fua_read = 1; dev->dev_attrib.emulate_write_cache = DA_EMULATE_WRITE_CACHE; dev->dev_attrib.emulate_ua_intlck_ctrl = DA_EMULATE_UA_INTLLCK_CTRL; dev->dev_attrib.emulate_tas = DA_EMULATE_TAS; @@ -1537,12 +812,12 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) dev->dev_attrib.max_write_same_len = DA_MAX_WRITE_SAME_LEN; xcopy_lun = &dev->xcopy_lun; - xcopy_lun->lun_se_dev = dev; - init_completion(&xcopy_lun->lun_shutdown_comp); - INIT_LIST_HEAD(&xcopy_lun->lun_acl_list); - spin_lock_init(&xcopy_lun->lun_acl_lock); - spin_lock_init(&xcopy_lun->lun_sep_lock); + rcu_assign_pointer(xcopy_lun->lun_se_dev, dev); init_completion(&xcopy_lun->lun_ref_comp); + INIT_LIST_HEAD(&xcopy_lun->lun_deve_list); + INIT_LIST_HEAD(&xcopy_lun->lun_dev_link); + mutex_init(&xcopy_lun->lun_tg_pt_md_mutex); + xcopy_lun->lun_tpg = &xcopy_pt_tpg; return dev; } @@ -1679,7 +954,7 @@ int core_dev_setup_virtual_lun0(void) goto out_free_hba; } - hba->transport->set_configfs_dev_params(dev, buf, sizeof(buf)); + hba->backend->ops->set_configfs_dev_params(dev, buf, sizeof(buf)); ret = target_configure_device(dev); if (ret) diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c index 1f7886bb16bf..48a36989c1a6 100644 --- a/drivers/target/target_core_fabric_configfs.c +++ b/drivers/target/target_core_fabric_configfs.c @@ -36,7 +36,6 @@ #include <target/target_core_base.h> #include <target/target_core_fabric.h> #include <target/target_core_fabric_configfs.h> -#include <target/target_core_configfs.h> #include <target/configfs_macros.h> #include "target_core_internal.h" @@ -46,27 +45,25 @@ #define TF_CIT_SETUP(_name, _item_ops, _group_ops, _attrs) \ static void target_fabric_setup_##_name##_cit(struct target_fabric_configfs *tf) \ { \ - struct target_fabric_configfs_template *tfc = &tf->tf_cit_tmpl; \ - struct config_item_type *cit = &tfc->tfc_##_name##_cit; \ + struct config_item_type *cit = &tf->tf_##_name##_cit; \ \ cit->ct_item_ops = _item_ops; \ cit->ct_group_ops = _group_ops; \ cit->ct_attrs = _attrs; \ - cit->ct_owner = tf->tf_module; \ + cit->ct_owner = tf->tf_ops->module; \ pr_debug("Setup generic %s\n", __stringify(_name)); \ } #define TF_CIT_SETUP_DRV(_name, _item_ops, _group_ops) \ static void target_fabric_setup_##_name##_cit(struct target_fabric_configfs *tf) \ { \ - struct target_fabric_configfs_template *tfc = &tf->tf_cit_tmpl; \ - struct config_item_type *cit = &tfc->tfc_##_name##_cit; \ - struct configfs_attribute **attrs = tf->tf_ops.tfc_##_name##_attrs; \ + struct config_item_type *cit = &tf->tf_##_name##_cit; \ + struct configfs_attribute **attrs = tf->tf_ops->tfc_##_name##_attrs; \ \ cit->ct_item_ops = _item_ops; \ cit->ct_group_ops = _group_ops; \ cit->ct_attrs = attrs; \ - cit->ct_owner = tf->tf_module; \ + cit->ct_owner = tf->tf_ops->module; \ pr_debug("Setup generic %s\n", __stringify(_name)); \ } @@ -83,7 +80,7 @@ static int target_fabric_mappedlun_link( struct se_lun_acl, se_lun_group); struct se_portal_group *se_tpg; struct config_item *nacl_ci, *tpg_ci, *tpg_ci_s, *wwn_ci, *wwn_ci_s; - int ret = 0, lun_access; + int lun_access; if (lun->lun_link_magic != SE_LUN_LINK_MAGIC) { pr_err("Bad lun->lun_link_magic, not a valid lun_ci pointer:" @@ -93,12 +90,11 @@ static int target_fabric_mappedlun_link( /* * Ensure that the source port exists */ - if (!lun->lun_sep || !lun->lun_sep->sep_tpg) { - pr_err("Source se_lun->lun_sep or lun->lun_sep->sep" - "_tpg does not exist\n"); + if (!lun->lun_se_dev) { + pr_err("Source se_lun->lun_se_dev does not exist\n"); return -EINVAL; } - se_tpg = lun->lun_sep->sep_tpg; + se_tpg = lun->lun_tpg; nacl_ci = &lun_acl_ci->ci_parent->ci_group->cg_item; tpg_ci = &nacl_ci->ci_group->cg_item; @@ -125,49 +121,35 @@ static int target_fabric_mappedlun_link( * which be will write protected (READ-ONLY) when * tpg_1/attrib/demo_mode_write_protect=1 */ - spin_lock_irq(&lacl->se_lun_nacl->device_list_lock); - deve = lacl->se_lun_nacl->device_list[lacl->mapped_lun]; - if (deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS) + rcu_read_lock(); + deve = target_nacl_find_deve(lacl->se_lun_nacl, lacl->mapped_lun); + if (deve) lun_access = deve->lun_flags; else lun_access = (se_tpg->se_tpg_tfo->tpg_check_prod_mode_write_protect( se_tpg)) ? TRANSPORT_LUNFLAGS_READ_ONLY : TRANSPORT_LUNFLAGS_READ_WRITE; - spin_unlock_irq(&lacl->se_lun_nacl->device_list_lock); + rcu_read_unlock(); /* * Determine the actual mapped LUN value user wants.. * * This value is what the SCSI Initiator actually sees the - * iscsi/$IQN/$TPGT/lun/lun_* as on their SCSI Initiator Ports. + * $FABRIC/$WWPN/$TPGT/lun/lun_* as on their SCSI Initiator Ports. */ - ret = core_dev_add_initiator_node_lun_acl(se_tpg, lacl, - lun->unpacked_lun, lun_access); - - return (ret < 0) ? -EINVAL : 0; + return core_dev_add_initiator_node_lun_acl(se_tpg, lacl, lun, lun_access); } static int target_fabric_mappedlun_unlink( struct config_item *lun_acl_ci, struct config_item *lun_ci) { - struct se_lun *lun; struct se_lun_acl *lacl = container_of(to_config_group(lun_acl_ci), struct se_lun_acl, se_lun_group); - struct se_node_acl *nacl = lacl->se_lun_nacl; - struct se_dev_entry *deve = nacl->device_list[lacl->mapped_lun]; - struct se_portal_group *se_tpg; - /* - * Determine if the underlying MappedLUN has already been released.. - */ - if (!deve->se_lun) - return 0; - - lun = container_of(to_config_group(lun_ci), struct se_lun, lun_group); - se_tpg = lun->lun_sep->sep_tpg; + struct se_lun *lun = container_of(to_config_group(lun_ci), + struct se_lun, lun_group); - core_dev_del_initiator_node_lun_acl(se_tpg, lun, lacl); - return 0; + return core_dev_del_initiator_node_lun_acl(lun, lacl); } CONFIGFS_EATTR_STRUCT(target_fabric_mappedlun, se_lun_acl); @@ -183,14 +165,15 @@ static ssize_t target_fabric_mappedlun_show_write_protect( { struct se_node_acl *se_nacl = lacl->se_lun_nacl; struct se_dev_entry *deve; - ssize_t len; + ssize_t len = 0; - spin_lock_irq(&se_nacl->device_list_lock); - deve = se_nacl->device_list[lacl->mapped_lun]; - len = sprintf(page, "%d\n", - (deve->lun_flags & TRANSPORT_LUNFLAGS_READ_ONLY) ? - 1 : 0); - spin_unlock_irq(&se_nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(se_nacl, lacl->mapped_lun); + if (deve) { + len = sprintf(page, "%d\n", + (deve->lun_flags & TRANSPORT_LUNFLAGS_READ_ONLY) ? 1 : 0); + } + rcu_read_unlock(); return len; } @@ -218,7 +201,7 @@ static ssize_t target_fabric_mappedlun_store_write_protect( lacl->se_lun_nacl); pr_debug("%s_ConfigFS: Changed Initiator ACL: %s" - " Mapped LUN: %u Write Protect bit to %s\n", + " Mapped LUN: %llu Write Protect bit to %s\n", se_tpg->se_tpg_tfo->get_fabric_name(), lacl->initiatorname, lacl->mapped_lun, (op) ? "ON" : "OFF"); @@ -338,7 +321,7 @@ static struct config_group *target_fabric_make_mappedlun( struct config_item *acl_ci; struct config_group *lacl_cg = NULL, *ml_stat_grp = NULL; char *buf; - unsigned long mapped_lun; + unsigned long long mapped_lun; int ret = 0; acl_ci = &group->cg_item; @@ -366,21 +349,9 @@ static struct config_group *target_fabric_make_mappedlun( * Determine the Mapped LUN value. This is what the SCSI Initiator * Port will actually see. */ - ret = kstrtoul(buf + 4, 0, &mapped_lun); + ret = kstrtoull(buf + 4, 0, &mapped_lun); if (ret) goto out; - if (mapped_lun > UINT_MAX) { - ret = -EINVAL; - goto out; - } - if (mapped_lun > (TRANSPORT_MAX_LUNS_PER_TPG-1)) { - pr_err("Mapped LUN: %lu exceeds TRANSPORT_MAX_LUNS_PER_TPG" - "-1: %u for Target Portal Group: %u\n", mapped_lun, - TRANSPORT_MAX_LUNS_PER_TPG-1, - se_tpg->se_tpg_tfo->tpg_get_tag(se_tpg)); - ret = -EINVAL; - goto out; - } lacl = core_dev_init_initiator_node_lun_acl(se_tpg, se_nacl, mapped_lun, &ret); @@ -399,9 +370,9 @@ static struct config_group *target_fabric_make_mappedlun( } config_group_init_type_name(&lacl->se_lun_group, name, - &tf->tf_cit_tmpl.tfc_tpg_mappedlun_cit); + &tf->tf_tpg_mappedlun_cit); config_group_init_type_name(&lacl->ml_stat_grps.stat_group, - "statistics", &tf->tf_cit_tmpl.tfc_tpg_mappedlun_stat_cit); + "statistics", &tf->tf_tpg_mappedlun_stat_cit); lacl_cg->default_groups[0] = &lacl->ml_stat_grps.stat_group; lacl_cg->default_groups[1] = NULL; @@ -458,10 +429,11 @@ static void target_fabric_nacl_base_release(struct config_item *item) { struct se_node_acl *se_nacl = container_of(to_config_group(item), struct se_node_acl, acl_group); - struct se_portal_group *se_tpg = se_nacl->se_tpg; - struct target_fabric_configfs *tf = se_tpg->se_tpg_wwn->wwn_tf; + struct target_fabric_configfs *tf = se_nacl->se_tpg->se_tpg_wwn->wwn_tf; - tf->tf_ops.fabric_drop_nodeacl(se_nacl); + if (tf->tf_ops->fabric_cleanup_nodeacl) + tf->tf_ops->fabric_cleanup_nodeacl(se_nacl); + core_tpg_del_initiator_node_acl(se_nacl); } static struct configfs_item_operations target_fabric_nacl_base_item_ops = { @@ -501,15 +473,18 @@ static struct config_group *target_fabric_make_nodeacl( struct se_node_acl *se_nacl; struct config_group *nacl_cg; - if (!tf->tf_ops.fabric_make_nodeacl) { - pr_err("tf->tf_ops.fabric_make_nodeacl is NULL\n"); - return ERR_PTR(-ENOSYS); - } - - se_nacl = tf->tf_ops.fabric_make_nodeacl(se_tpg, group, name); + se_nacl = core_tpg_add_initiator_node_acl(se_tpg, name); if (IS_ERR(se_nacl)) return ERR_CAST(se_nacl); + if (tf->tf_ops->fabric_init_nodeacl) { + int ret = tf->tf_ops->fabric_init_nodeacl(se_nacl, name); + if (ret) { + core_tpg_del_initiator_node_acl(se_nacl); + return ERR_PTR(ret); + } + } + nacl_cg = &se_nacl->acl_group; nacl_cg->default_groups = se_nacl->acl_default_groups; nacl_cg->default_groups[0] = &se_nacl->acl_attrib_group; @@ -519,16 +494,15 @@ static struct config_group *target_fabric_make_nodeacl( nacl_cg->default_groups[4] = NULL; config_group_init_type_name(&se_nacl->acl_group, name, - &tf->tf_cit_tmpl.tfc_tpg_nacl_base_cit); + &tf->tf_tpg_nacl_base_cit); config_group_init_type_name(&se_nacl->acl_attrib_group, "attrib", - &tf->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit); + &tf->tf_tpg_nacl_attrib_cit); config_group_init_type_name(&se_nacl->acl_auth_group, "auth", - &tf->tf_cit_tmpl.tfc_tpg_nacl_auth_cit); + &tf->tf_tpg_nacl_auth_cit); config_group_init_type_name(&se_nacl->acl_param_group, "param", - &tf->tf_cit_tmpl.tfc_tpg_nacl_param_cit); + &tf->tf_tpg_nacl_param_cit); config_group_init_type_name(&se_nacl->acl_fabric_stat_group, - "fabric_statistics", - &tf->tf_cit_tmpl.tfc_tpg_nacl_stat_cit); + "fabric_statistics", &tf->tf_tpg_nacl_stat_cit); return &se_nacl->acl_group; } @@ -575,7 +549,7 @@ static void target_fabric_np_base_release(struct config_item *item) struct se_portal_group *se_tpg = se_tpg_np->tpg_np_parent; struct target_fabric_configfs *tf = se_tpg->se_tpg_wwn->wwn_tf; - tf->tf_ops.fabric_drop_np(se_tpg_np); + tf->tf_ops->fabric_drop_np(se_tpg_np); } static struct configfs_item_operations target_fabric_np_base_item_ops = { @@ -599,18 +573,18 @@ static struct config_group *target_fabric_make_np( struct target_fabric_configfs *tf = se_tpg->se_tpg_wwn->wwn_tf; struct se_tpg_np *se_tpg_np; - if (!tf->tf_ops.fabric_make_np) { + if (!tf->tf_ops->fabric_make_np) { pr_err("tf->tf_ops.fabric_make_np is NULL\n"); return ERR_PTR(-ENOSYS); } - se_tpg_np = tf->tf_ops.fabric_make_np(se_tpg, group, name); + se_tpg_np = tf->tf_ops->fabric_make_np(se_tpg, group, name); if (!se_tpg_np || IS_ERR(se_tpg_np)) return ERR_PTR(-EINVAL); se_tpg_np->tpg_np_parent = se_tpg; config_group_init_type_name(&se_tpg_np->tpg_np_group, name, - &tf->tf_cit_tmpl.tfc_tpg_np_base_cit); + &tf->tf_tpg_np_base_cit); return &se_tpg_np->tpg_np_group; } @@ -654,10 +628,10 @@ static ssize_t target_fabric_port_show_attr_alua_tg_pt_gp( struct se_lun *lun, char *page) { - if (!lun || !lun->lun_sep) + if (!lun || !lun->lun_se_dev) return -ENODEV; - return core_alua_show_tg_pt_gp_info(lun->lun_sep, page); + return core_alua_show_tg_pt_gp_info(lun, page); } static ssize_t target_fabric_port_store_attr_alua_tg_pt_gp( @@ -665,10 +639,10 @@ static ssize_t target_fabric_port_store_attr_alua_tg_pt_gp( const char *page, size_t count) { - if (!lun || !lun->lun_sep) + if (!lun || !lun->lun_se_dev) return -ENODEV; - return core_alua_store_tg_pt_gp_info(lun->lun_sep, page, count); + return core_alua_store_tg_pt_gp_info(lun, page, count); } TCM_PORT_ATTR(alua_tg_pt_gp, S_IRUGO | S_IWUSR); @@ -680,7 +654,7 @@ static ssize_t target_fabric_port_show_attr_alua_tg_pt_offline( struct se_lun *lun, char *page) { - if (!lun || !lun->lun_sep) + if (!lun || !lun->lun_se_dev) return -ENODEV; return core_alua_show_offline_bit(lun, page); @@ -691,7 +665,7 @@ static ssize_t target_fabric_port_store_attr_alua_tg_pt_offline( const char *page, size_t count) { - if (!lun || !lun->lun_sep) + if (!lun || !lun->lun_se_dev) return -ENODEV; return core_alua_store_offline_bit(lun, page, count); @@ -706,7 +680,7 @@ static ssize_t target_fabric_port_show_attr_alua_tg_pt_status( struct se_lun *lun, char *page) { - if (!lun || !lun->lun_sep) + if (!lun || !lun->lun_se_dev) return -ENODEV; return core_alua_show_secondary_status(lun, page); @@ -717,7 +691,7 @@ static ssize_t target_fabric_port_store_attr_alua_tg_pt_status( const char *page, size_t count) { - if (!lun || !lun->lun_sep) + if (!lun || !lun->lun_se_dev) return -ENODEV; return core_alua_store_secondary_status(lun, page, count); @@ -732,7 +706,7 @@ static ssize_t target_fabric_port_show_attr_alua_tg_pt_write_md( struct se_lun *lun, char *page) { - if (!lun || !lun->lun_sep) + if (!lun || !lun->lun_se_dev) return -ENODEV; return core_alua_show_secondary_write_metadata(lun, page); @@ -743,7 +717,7 @@ static ssize_t target_fabric_port_store_attr_alua_tg_pt_write_md( const char *page, size_t count) { - if (!lun || !lun->lun_sep) + if (!lun || !lun->lun_se_dev) return -ENODEV; return core_alua_store_secondary_write_metadata(lun, page, count); @@ -769,7 +743,6 @@ static int target_fabric_port_link( struct config_item *tpg_ci; struct se_lun *lun = container_of(to_config_group(lun_ci), struct se_lun, lun_group); - struct se_lun *lun_p; struct se_portal_group *se_tpg; struct se_device *dev = container_of(to_config_group(se_dev_ci), struct se_device, dev_group); @@ -797,20 +770,19 @@ static int target_fabric_port_link( return -EEXIST; } - lun_p = core_dev_add_lun(se_tpg, dev, lun->unpacked_lun); - if (IS_ERR(lun_p)) { - pr_err("core_dev_add_lun() failed\n"); - ret = PTR_ERR(lun_p); + ret = core_dev_add_lun(se_tpg, dev, lun); + if (ret) { + pr_err("core_dev_add_lun() failed: %d\n", ret); goto out; } - if (tf->tf_ops.fabric_post_link) { + if (tf->tf_ops->fabric_post_link) { /* * Call the optional fabric_post_link() to allow a * fabric module to setup any additional state once * core_dev_add_lun() has been called.. */ - tf->tf_ops.fabric_post_link(se_tpg, lun); + tf->tf_ops->fabric_post_link(se_tpg, lun); } return 0; @@ -824,25 +796,34 @@ static int target_fabric_port_unlink( { struct se_lun *lun = container_of(to_config_group(lun_ci), struct se_lun, lun_group); - struct se_portal_group *se_tpg = lun->lun_sep->sep_tpg; + struct se_portal_group *se_tpg = lun->lun_tpg; struct target_fabric_configfs *tf = se_tpg->se_tpg_wwn->wwn_tf; - if (tf->tf_ops.fabric_pre_unlink) { + if (tf->tf_ops->fabric_pre_unlink) { /* * Call the optional fabric_pre_unlink() to allow a * fabric module to release any additional stat before * core_dev_del_lun() is called. */ - tf->tf_ops.fabric_pre_unlink(se_tpg, lun); + tf->tf_ops->fabric_pre_unlink(se_tpg, lun); } core_dev_del_lun(se_tpg, lun); return 0; } +static void target_fabric_port_release(struct config_item *item) +{ + struct se_lun *lun = container_of(to_config_group(item), + struct se_lun, lun_group); + + kfree_rcu(lun, rcu_head); +} + static struct configfs_item_operations target_fabric_port_item_ops = { .show_attribute = target_fabric_port_attr_show, .store_attribute = target_fabric_port_attr_store, + .release = target_fabric_port_release, .allow_link = target_fabric_port_link, .drop_link = target_fabric_port_unlink, }; @@ -887,7 +868,7 @@ static struct config_group *target_fabric_make_lun( struct se_portal_group, tpg_lun_group); struct target_fabric_configfs *tf = se_tpg->se_tpg_wwn->wwn_tf; struct config_group *lun_cg = NULL, *port_stat_grp = NULL; - unsigned long unpacked_lun; + unsigned long long unpacked_lun; int errno; if (strstr(name, "lun_") != name) { @@ -895,28 +876,27 @@ static struct config_group *target_fabric_make_lun( " \"lun_$LUN_NUMBER\"\n"); return ERR_PTR(-EINVAL); } - errno = kstrtoul(name + 4, 0, &unpacked_lun); + errno = kstrtoull(name + 4, 0, &unpacked_lun); if (errno) return ERR_PTR(errno); - if (unpacked_lun > UINT_MAX) - return ERR_PTR(-EINVAL); - lun = core_get_lun_from_tpg(se_tpg, unpacked_lun); - if (!lun) - return ERR_PTR(-EINVAL); + lun = core_tpg_alloc_lun(se_tpg, unpacked_lun); + if (IS_ERR(lun)) + return ERR_CAST(lun); lun_cg = &lun->lun_group; lun_cg->default_groups = kmalloc(sizeof(struct config_group *) * 2, GFP_KERNEL); if (!lun_cg->default_groups) { pr_err("Unable to allocate lun_cg->default_groups\n"); + kfree(lun); return ERR_PTR(-ENOMEM); } config_group_init_type_name(&lun->lun_group, name, - &tf->tf_cit_tmpl.tfc_tpg_port_cit); + &tf->tf_tpg_port_cit); config_group_init_type_name(&lun->port_stat_grps.stat_group, - "statistics", &tf->tf_cit_tmpl.tfc_tpg_port_stat_cit); + "statistics", &tf->tf_tpg_port_stat_cit); lun_cg->default_groups[0] = &lun->port_stat_grps.stat_group; lun_cg->default_groups[1] = NULL; @@ -926,6 +906,7 @@ static struct config_group *target_fabric_make_lun( if (!port_stat_grp->default_groups) { pr_err("Unable to allocate port_stat_grp->default_groups\n"); kfree(lun_cg->default_groups); + kfree(lun); return ERR_PTR(-ENOMEM); } target_stat_setup_port_default_groups(lun); @@ -1023,7 +1004,7 @@ static void target_fabric_tpg_release(struct config_item *item) struct se_wwn *wwn = se_tpg->se_tpg_wwn; struct target_fabric_configfs *tf = wwn->wwn_tf; - tf->tf_ops.fabric_drop_tpg(se_tpg); + tf->tf_ops->fabric_drop_tpg(se_tpg); } static struct configfs_item_operations target_fabric_tpg_base_item_ops = { @@ -1046,12 +1027,12 @@ static struct config_group *target_fabric_make_tpg( struct target_fabric_configfs *tf = wwn->wwn_tf; struct se_portal_group *se_tpg; - if (!tf->tf_ops.fabric_make_tpg) { - pr_err("tf->tf_ops.fabric_make_tpg is NULL\n"); + if (!tf->tf_ops->fabric_make_tpg) { + pr_err("tf->tf_ops->fabric_make_tpg is NULL\n"); return ERR_PTR(-ENOSYS); } - se_tpg = tf->tf_ops.fabric_make_tpg(wwn, group, name); + se_tpg = tf->tf_ops->fabric_make_tpg(wwn, group, name); if (!se_tpg || IS_ERR(se_tpg)) return ERR_PTR(-EINVAL); /* @@ -1067,19 +1048,19 @@ static struct config_group *target_fabric_make_tpg( se_tpg->tpg_group.default_groups[6] = NULL; config_group_init_type_name(&se_tpg->tpg_group, name, - &tf->tf_cit_tmpl.tfc_tpg_base_cit); + &tf->tf_tpg_base_cit); config_group_init_type_name(&se_tpg->tpg_lun_group, "lun", - &tf->tf_cit_tmpl.tfc_tpg_lun_cit); + &tf->tf_tpg_lun_cit); config_group_init_type_name(&se_tpg->tpg_np_group, "np", - &tf->tf_cit_tmpl.tfc_tpg_np_cit); + &tf->tf_tpg_np_cit); config_group_init_type_name(&se_tpg->tpg_acl_group, "acls", - &tf->tf_cit_tmpl.tfc_tpg_nacl_cit); + &tf->tf_tpg_nacl_cit); config_group_init_type_name(&se_tpg->tpg_attrib_group, "attrib", - &tf->tf_cit_tmpl.tfc_tpg_attrib_cit); + &tf->tf_tpg_attrib_cit); config_group_init_type_name(&se_tpg->tpg_auth_group, "auth", - &tf->tf_cit_tmpl.tfc_tpg_auth_cit); + &tf->tf_tpg_auth_cit); config_group_init_type_name(&se_tpg->tpg_param_group, "param", - &tf->tf_cit_tmpl.tfc_tpg_param_cit); + &tf->tf_tpg_param_cit); return &se_tpg->tpg_group; } @@ -1112,7 +1093,7 @@ static void target_fabric_release_wwn(struct config_item *item) struct se_wwn, wwn_group); struct target_fabric_configfs *tf = wwn->wwn_tf; - tf->tf_ops.fabric_drop_wwn(wwn); + tf->tf_ops->fabric_drop_wwn(wwn); } static struct configfs_item_operations target_fabric_tpg_item_ops = { @@ -1148,12 +1129,12 @@ static struct config_group *target_fabric_make_wwn( struct target_fabric_configfs, tf_group); struct se_wwn *wwn; - if (!tf->tf_ops.fabric_make_wwn) { + if (!tf->tf_ops->fabric_make_wwn) { pr_err("tf->tf_ops.fabric_make_wwn is NULL\n"); return ERR_PTR(-ENOSYS); } - wwn = tf->tf_ops.fabric_make_wwn(tf, group, name); + wwn = tf->tf_ops->fabric_make_wwn(tf, group, name); if (!wwn || IS_ERR(wwn)) return ERR_PTR(-EINVAL); @@ -1165,10 +1146,9 @@ static struct config_group *target_fabric_make_wwn( wwn->wwn_group.default_groups[0] = &wwn->fabric_stat_group; wwn->wwn_group.default_groups[1] = NULL; - config_group_init_type_name(&wwn->wwn_group, name, - &tf->tf_cit_tmpl.tfc_tpg_cit); + config_group_init_type_name(&wwn->wwn_group, name, &tf->tf_tpg_cit); config_group_init_type_name(&wwn->fabric_stat_group, "fabric_statistics", - &tf->tf_cit_tmpl.tfc_wwn_fabric_stats_cit); + &tf->tf_wwn_fabric_stats_cit); return &wwn->wwn_group; } diff --git a/drivers/target/target_core_fabric_lib.c b/drivers/target/target_core_fabric_lib.c index 41f4f270f919..cb6497ce4b61 100644 --- a/drivers/target/target_core_fabric_lib.c +++ b/drivers/target/target_core_fabric_lib.c @@ -24,6 +24,11 @@ * ******************************************************************************/ +/* + * See SPC4, section 7.5 "Protocol specific parameters" for details + * on the formats implemented in this file. + */ + #include <linux/kernel.h> #include <linux/string.h> #include <linux/ctype.h> @@ -34,124 +39,30 @@ #include <target/target_core_base.h> #include <target/target_core_fabric.h> -#include <target/target_core_configfs.h> #include "target_core_internal.h" #include "target_core_pr.h" -/* - * Handlers for Serial Attached SCSI (SAS) - */ -u8 sas_get_fabric_proto_ident(struct se_portal_group *se_tpg) -{ - /* - * Return a SAS Serial SCSI Protocol identifier for loopback operations - * This is defined in section 7.5.1 Table 362 in spc4r17 - */ - return 0x6; -} -EXPORT_SYMBOL(sas_get_fabric_proto_ident); -u32 sas_get_pr_transport_id( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, +static int sas_get_pr_transport_id( + struct se_node_acl *nacl, int *format_code, unsigned char *buf) { - unsigned char *ptr; int ret; - /* - * Set PROTOCOL IDENTIFIER to 6h for SAS - */ - buf[0] = 0x06; - /* - * From spc4r17, 7.5.4.7 TransportID for initiator ports using SCSI - * over SAS Serial SCSI Protocol - */ - ptr = &se_nacl->initiatorname[4]; /* Skip over 'naa. prefix */ - - ret = hex2bin(&buf[4], ptr, 8); - if (ret < 0) - pr_debug("sas transport_id: invalid hex string\n"); - - /* - * The SAS Transport ID is a hardcoded 24-byte length - */ - return 24; -} -EXPORT_SYMBOL(sas_get_pr_transport_id); - -u32 sas_get_pr_transport_id_len( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code) -{ - *format_code = 0; - /* - * From spc4r17, 7.5.4.7 TransportID for initiator ports using SCSI - * over SAS Serial SCSI Protocol - * - * The SAS Transport ID is a hardcoded 24-byte length - */ - return 24; -} -EXPORT_SYMBOL(sas_get_pr_transport_id_len); - -/* - * Used for handling SCSI fabric dependent TransportIDs in SPC-3 and above - * Persistent Reservation SPEC_I_PT=1 and PROUT REGISTER_AND_MOVE operations. - */ -char *sas_parse_pr_out_transport_id( - struct se_portal_group *se_tpg, - const char *buf, - u32 *out_tid_len, - char **port_nexus_ptr) -{ - /* - * Assume the FORMAT CODE 00b from spc4r17, 7.5.4.7 TransportID - * for initiator ports using SCSI over SAS Serial SCSI Protocol - * - * The TransportID for a SAS Initiator Port is of fixed size of - * 24 bytes, and SAS does not contain a I_T nexus identifier, - * so we return the **port_nexus_ptr set to NULL. - */ - *port_nexus_ptr = NULL; - *out_tid_len = 24; - - return (char *)&buf[4]; -} -EXPORT_SYMBOL(sas_parse_pr_out_transport_id); - -/* - * Handlers for Fibre Channel Protocol (FCP) - */ -u8 fc_get_fabric_proto_ident(struct se_portal_group *se_tpg) -{ - return 0x0; /* 0 = fcp-2 per SPC4 section 7.5.1 */ -} -EXPORT_SYMBOL(fc_get_fabric_proto_ident); + /* Skip over 'naa. prefix */ + ret = hex2bin(&buf[4], &nacl->initiatorname[4], 8); + if (ret) { + pr_debug("%s: invalid hex string\n", __func__); + return ret; + } -u32 fc_get_pr_transport_id_len( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code) -{ - *format_code = 0; - /* - * The FC Transport ID is a hardcoded 24-byte length - */ return 24; } -EXPORT_SYMBOL(fc_get_pr_transport_id_len); -u32 fc_get_pr_transport_id( - struct se_portal_group *se_tpg, +static int fc_get_pr_transport_id( struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, int *format_code, unsigned char *buf) { @@ -160,24 +71,20 @@ u32 fc_get_pr_transport_id( u32 off = 8; /* - * PROTOCOL IDENTIFIER is 0h for FCP-2 - * - * From spc4r17, 7.5.4.2 TransportID for initiator ports using - * SCSI over Fibre Channel - * * We convert the ASCII formatted N Port name into a binary * encoded TransportID. */ ptr = &se_nacl->initiatorname[0]; - for (i = 0; i < 24; ) { if (!strncmp(&ptr[i], ":", 1)) { i++; continue; } ret = hex2bin(&buf[off++], &ptr[i], 1); - if (ret < 0) - pr_debug("fc transport_id: invalid hex string\n"); + if (ret < 0) { + pr_debug("%s: invalid hex string\n", __func__); + return ret; + } i += 2; } /* @@ -185,42 +92,52 @@ u32 fc_get_pr_transport_id( */ return 24; } -EXPORT_SYMBOL(fc_get_pr_transport_id); -char *fc_parse_pr_out_transport_id( - struct se_portal_group *se_tpg, - const char *buf, - u32 *out_tid_len, - char **port_nexus_ptr) +static int sbp_get_pr_transport_id( + struct se_node_acl *nacl, + int *format_code, + unsigned char *buf) { - /* - * The TransportID for a FC N Port is of fixed size of - * 24 bytes, and FC does not contain a I_T nexus identifier, - * so we return the **port_nexus_ptr set to NULL. - */ - *port_nexus_ptr = NULL; - *out_tid_len = 24; + int ret; - return (char *)&buf[8]; -} -EXPORT_SYMBOL(fc_parse_pr_out_transport_id); + ret = hex2bin(&buf[8], nacl->initiatorname, 8); + if (ret) { + pr_debug("%s: invalid hex string\n", __func__); + return ret; + } -/* - * Handlers for Internet Small Computer Systems Interface (iSCSI) - */ + return 24; +} -u8 iscsi_get_fabric_proto_ident(struct se_portal_group *se_tpg) +static int srp_get_pr_transport_id( + struct se_node_acl *nacl, + int *format_code, + unsigned char *buf) { - /* - * This value is defined for "Internet SCSI (iSCSI)" - * in spc4r17 section 7.5.1 Table 362 - */ - return 0x5; + const char *p; + unsigned len, count, leading_zero_bytes; + int rc; + + p = nacl->initiatorname; + if (strncasecmp(p, "0x", 2) == 0) + p += 2; + len = strlen(p); + if (len % 2) + return -EINVAL; + + count = min(len / 2, 16U); + leading_zero_bytes = 16 - count; + memset(buf + 8, 0, leading_zero_bytes); + rc = hex2bin(buf + 8 + leading_zero_bytes, p, count); + if (rc < 0) { + pr_debug("hex2bin failed for %s: %d\n", __func__, rc); + return rc; + } + + return 24; } -EXPORT_SYMBOL(iscsi_get_fabric_proto_ident); -u32 iscsi_get_pr_transport_id( - struct se_portal_group *se_tpg, +static int iscsi_get_pr_transport_id( struct se_node_acl *se_nacl, struct t10_pr_registration *pr_reg, int *format_code, @@ -231,10 +148,6 @@ u32 iscsi_get_pr_transport_id( spin_lock_irq(&se_nacl->nacl_sess_lock); /* - * Set PROTOCOL IDENTIFIER to 5h for iSCSI - */ - buf[0] = 0x05; - /* * From spc4r17 Section 7.5.4.6: TransportID for initiator * ports using SCSI over iSCSI. * @@ -313,10 +226,8 @@ u32 iscsi_get_pr_transport_id( return len; } -EXPORT_SYMBOL(iscsi_get_pr_transport_id); -u32 iscsi_get_pr_transport_id_len( - struct se_portal_group *se_tpg, +static int iscsi_get_pr_transport_id_len( struct se_node_acl *se_nacl, struct t10_pr_registration *pr_reg, int *format_code) @@ -359,9 +270,8 @@ u32 iscsi_get_pr_transport_id_len( return len; } -EXPORT_SYMBOL(iscsi_get_pr_transport_id_len); -char *iscsi_parse_pr_out_transport_id( +static char *iscsi_parse_pr_out_transport_id( struct se_portal_group *se_tpg, const char *buf, u32 *out_tid_len, @@ -448,4 +358,79 @@ char *iscsi_parse_pr_out_transport_id( return (char *)&buf[4]; } -EXPORT_SYMBOL(iscsi_parse_pr_out_transport_id); + +int target_get_pr_transport_id_len(struct se_node_acl *nacl, + struct t10_pr_registration *pr_reg, int *format_code) +{ + switch (nacl->se_tpg->proto_id) { + case SCSI_PROTOCOL_FCP: + case SCSI_PROTOCOL_SBP: + case SCSI_PROTOCOL_SRP: + case SCSI_PROTOCOL_SAS: + break; + case SCSI_PROTOCOL_ISCSI: + return iscsi_get_pr_transport_id_len(nacl, pr_reg, format_code); + default: + pr_err("Unknown proto_id: 0x%02x\n", nacl->se_tpg->proto_id); + return -EINVAL; + } + + /* + * Most transports use a fixed length 24 byte identifier. + */ + *format_code = 0; + return 24; +} + +int target_get_pr_transport_id(struct se_node_acl *nacl, + struct t10_pr_registration *pr_reg, int *format_code, + unsigned char *buf) +{ + switch (nacl->se_tpg->proto_id) { + case SCSI_PROTOCOL_SAS: + return sas_get_pr_transport_id(nacl, format_code, buf); + case SCSI_PROTOCOL_SBP: + return sbp_get_pr_transport_id(nacl, format_code, buf); + case SCSI_PROTOCOL_SRP: + return srp_get_pr_transport_id(nacl, format_code, buf); + case SCSI_PROTOCOL_FCP: + return fc_get_pr_transport_id(nacl, format_code, buf); + case SCSI_PROTOCOL_ISCSI: + return iscsi_get_pr_transport_id(nacl, pr_reg, format_code, + buf); + default: + pr_err("Unknown proto_id: 0x%02x\n", nacl->se_tpg->proto_id); + return -EINVAL; + } +} + +const char *target_parse_pr_out_transport_id(struct se_portal_group *tpg, + const char *buf, u32 *out_tid_len, char **port_nexus_ptr) +{ + u32 offset; + + switch (tpg->proto_id) { + case SCSI_PROTOCOL_SAS: + /* + * Assume the FORMAT CODE 00b from spc4r17, 7.5.4.7 TransportID + * for initiator ports using SCSI over SAS Serial SCSI Protocol. + */ + offset = 4; + break; + case SCSI_PROTOCOL_SBP: + case SCSI_PROTOCOL_SRP: + case SCSI_PROTOCOL_FCP: + offset = 8; + break; + case SCSI_PROTOCOL_ISCSI: + return iscsi_parse_pr_out_transport_id(tpg, buf, out_tid_len, + port_nexus_ptr); + default: + pr_err("Unknown proto_id: 0x%02x\n", tpg->proto_id); + return NULL; + } + + *port_nexus_ptr = NULL; + *out_tid_len = 24; + return buf + offset; +} diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 664171353289..e3195700211a 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -37,7 +37,6 @@ #include <target/target_core_base.h> #include <target/target_core_backend.h> -#include <target/target_core_backend_configfs.h> #include "target_core_file.h" @@ -46,10 +45,6 @@ static inline struct fd_dev *FD_DEV(struct se_device *dev) return container_of(dev, struct fd_dev, dev); } -/* fd_attach_hba(): (Part of se_subsystem_api_t template) - * - * - */ static int fd_attach_hba(struct se_hba *hba, u32 host_id) { struct fd_host *fd_host; @@ -66,7 +61,7 @@ static int fd_attach_hba(struct se_hba *hba, u32 host_id) pr_debug("CORE_HBA[%d] - TCM FILEIO HBA Driver %s on Generic" " Target Core Stack %s\n", hba->hba_id, FD_VERSION, - TARGET_CORE_MOD_VERSION); + TARGET_CORE_VERSION); pr_debug("CORE_HBA[%d] - Attached FILEIO HBA: %u to Generic\n", hba->hba_id, fd_host->fd_host_id); @@ -246,87 +241,34 @@ fail: return ret; } -static void fd_free_device(struct se_device *dev) +static void fd_dev_call_rcu(struct rcu_head *p) { + struct se_device *dev = container_of(p, struct se_device, rcu_head); struct fd_dev *fd_dev = FD_DEV(dev); - if (fd_dev->fd_file) { - filp_close(fd_dev->fd_file, NULL); - fd_dev->fd_file = NULL; - } - kfree(fd_dev); } -static int fd_do_prot_rw(struct se_cmd *cmd, struct fd_prot *fd_prot, - int is_write) +static void fd_free_device(struct se_device *dev) { - struct se_device *se_dev = cmd->se_dev; - struct fd_dev *dev = FD_DEV(se_dev); - struct file *prot_fd = dev->fd_prot_file; - loff_t pos = (cmd->t_task_lba * se_dev->prot_length); - unsigned char *buf; - u32 prot_size; - int rc, ret = 1; - - prot_size = (cmd->data_length / se_dev->dev_attrib.block_size) * - se_dev->prot_length; - - if (!is_write) { - fd_prot->prot_buf = kzalloc(prot_size, GFP_KERNEL); - if (!fd_prot->prot_buf) { - pr_err("Unable to allocate fd_prot->prot_buf\n"); - return -ENOMEM; - } - buf = fd_prot->prot_buf; - - fd_prot->prot_sg_nents = 1; - fd_prot->prot_sg = kzalloc(sizeof(struct scatterlist), - GFP_KERNEL); - if (!fd_prot->prot_sg) { - pr_err("Unable to allocate fd_prot->prot_sg\n"); - kfree(fd_prot->prot_buf); - return -ENOMEM; - } - sg_init_table(fd_prot->prot_sg, fd_prot->prot_sg_nents); - sg_set_buf(fd_prot->prot_sg, buf, prot_size); - } - - if (is_write) { - rc = kernel_write(prot_fd, fd_prot->prot_buf, prot_size, pos); - if (rc < 0 || prot_size != rc) { - pr_err("kernel_write() for fd_do_prot_rw failed:" - " %d\n", rc); - ret = -EINVAL; - } - } else { - rc = kernel_read(prot_fd, pos, fd_prot->prot_buf, prot_size); - if (rc < 0) { - pr_err("kernel_read() for fd_do_prot_rw failed:" - " %d\n", rc); - ret = -EINVAL; - } - } + struct fd_dev *fd_dev = FD_DEV(dev); - if (is_write || ret < 0) { - kfree(fd_prot->prot_sg); - kfree(fd_prot->prot_buf); + if (fd_dev->fd_file) { + filp_close(fd_dev->fd_file, NULL); + fd_dev->fd_file = NULL; } - - return ret; + call_rcu(&dev->rcu_head, fd_dev_call_rcu); } -static int fd_do_rw(struct se_cmd *cmd, struct scatterlist *sgl, - u32 sgl_nents, int is_write) +static int fd_do_rw(struct se_cmd *cmd, struct file *fd, + u32 block_size, struct scatterlist *sgl, + u32 sgl_nents, u32 data_length, int is_write) { - struct se_device *se_dev = cmd->se_dev; - struct fd_dev *dev = FD_DEV(se_dev); - struct file *fd = dev->fd_file; struct scatterlist *sg; struct iov_iter iter; struct bio_vec *bvec; ssize_t len = 0; - loff_t pos = (cmd->t_task_lba * se_dev->dev_attrib.block_size); + loff_t pos = (cmd->t_task_lba * block_size); int ret = 0, i; bvec = kcalloc(sgl_nents, sizeof(struct bio_vec), GFP_KERNEL); @@ -352,7 +294,7 @@ static int fd_do_rw(struct se_cmd *cmd, struct scatterlist *sgl, kfree(bvec); if (is_write) { - if (ret < 0 || ret != cmd->data_length) { + if (ret < 0 || ret != data_length) { pr_err("%s() write returned %d\n", __func__, ret); return (ret < 0 ? ret : -EINVAL); } @@ -363,10 +305,10 @@ static int fd_do_rw(struct se_cmd *cmd, struct scatterlist *sgl, * block_device. */ if (S_ISBLK(file_inode(fd)->i_mode)) { - if (ret < 0 || ret != cmd->data_length) { + if (ret < 0 || ret != data_length) { pr_err("%s() returned %d, expecting %u for " "S_ISBLK\n", __func__, ret, - cmd->data_length); + data_length); return (ret < 0 ? ret : -EINVAL); } } else { @@ -533,9 +475,9 @@ fd_do_prot_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb) } static sense_reason_t -fd_do_unmap(struct se_cmd *cmd, void *priv, sector_t lba, sector_t nolb) +fd_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb) { - struct file *file = priv; + struct file *file = FD_DEV(cmd->se_dev)->fd_file; struct inode *inode = file->f_mapping->host; int ret; @@ -577,42 +519,13 @@ fd_do_unmap(struct se_cmd *cmd, void *priv, sector_t lba, sector_t nolb) } static sense_reason_t -fd_execute_write_same_unmap(struct se_cmd *cmd) -{ - struct se_device *se_dev = cmd->se_dev; - struct fd_dev *fd_dev = FD_DEV(se_dev); - struct file *file = fd_dev->fd_file; - sector_t lba = cmd->t_task_lba; - sector_t nolb = sbc_get_write_same_sectors(cmd); - sense_reason_t ret; - - if (!nolb) { - target_complete_cmd(cmd, SAM_STAT_GOOD); - return 0; - } - - ret = fd_do_unmap(cmd, file, lba, nolb); - if (ret) - return ret; - - target_complete_cmd(cmd, GOOD); - return 0; -} - -static sense_reason_t -fd_execute_unmap(struct se_cmd *cmd) -{ - struct file *file = FD_DEV(cmd->se_dev)->fd_file; - - return sbc_execute_unmap(cmd, fd_do_unmap, file); -} - -static sense_reason_t fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, enum dma_data_direction data_direction) { struct se_device *dev = cmd->se_dev; - struct fd_prot fd_prot; + struct fd_dev *fd_dev = FD_DEV(dev); + struct file *file = fd_dev->fd_file; + struct file *pfile = fd_dev->fd_prot_file; sense_reason_t rc; int ret = 0; /* @@ -630,58 +543,45 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, * physical memory addresses to struct iovec virtual memory. */ if (data_direction == DMA_FROM_DEVICE) { - memset(&fd_prot, 0, sizeof(struct fd_prot)); - if (cmd->prot_type && dev->dev_attrib.pi_prot_type) { - ret = fd_do_prot_rw(cmd, &fd_prot, false); + ret = fd_do_rw(cmd, pfile, dev->prot_length, + cmd->t_prot_sg, cmd->t_prot_nents, + cmd->prot_length, 0); if (ret < 0) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } - ret = fd_do_rw(cmd, sgl, sgl_nents, 0); + ret = fd_do_rw(cmd, file, dev->dev_attrib.block_size, + sgl, sgl_nents, cmd->data_length, 0); if (ret > 0 && cmd->prot_type && dev->dev_attrib.pi_prot_type) { - u32 sectors = cmd->data_length / dev->dev_attrib.block_size; + u32 sectors = cmd->data_length >> + ilog2(dev->dev_attrib.block_size); - rc = sbc_dif_verify_read(cmd, cmd->t_task_lba, sectors, - 0, fd_prot.prot_sg, 0); - if (rc) { - kfree(fd_prot.prot_sg); - kfree(fd_prot.prot_buf); + rc = sbc_dif_verify(cmd, cmd->t_task_lba, sectors, + 0, cmd->t_prot_sg, 0); + if (rc) return rc; - } - kfree(fd_prot.prot_sg); - kfree(fd_prot.prot_buf); } } else { - memset(&fd_prot, 0, sizeof(struct fd_prot)); - if (cmd->prot_type && dev->dev_attrib.pi_prot_type) { - u32 sectors = cmd->data_length / dev->dev_attrib.block_size; + u32 sectors = cmd->data_length >> + ilog2(dev->dev_attrib.block_size); - ret = fd_do_prot_rw(cmd, &fd_prot, false); - if (ret < 0) - return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - - rc = sbc_dif_verify_write(cmd, cmd->t_task_lba, sectors, - 0, fd_prot.prot_sg, 0); - if (rc) { - kfree(fd_prot.prot_sg); - kfree(fd_prot.prot_buf); + rc = sbc_dif_verify(cmd, cmd->t_task_lba, sectors, + 0, cmd->t_prot_sg, 0); + if (rc) return rc; - } } - ret = fd_do_rw(cmd, sgl, sgl_nents, 1); + ret = fd_do_rw(cmd, file, dev->dev_attrib.block_size, + sgl, sgl_nents, cmd->data_length, 1); /* * Perform implicit vfs_fsync_range() for fd_do_writev() ops * for SCSI WRITEs with Forced Unit Access (FUA) set. * Allow this to happen independent of WCE=0 setting. */ - if (ret > 0 && - dev->dev_attrib.emulate_fua_write > 0 && - (cmd->se_cmd_flags & SCF_FUA)) { - struct fd_dev *fd_dev = FD_DEV(dev); + if (ret > 0 && (cmd->se_cmd_flags & SCF_FUA)) { loff_t start = cmd->t_task_lba * dev->dev_attrib.block_size; loff_t end; @@ -695,17 +595,16 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, } if (ret > 0 && cmd->prot_type && dev->dev_attrib.pi_prot_type) { - ret = fd_do_prot_rw(cmd, &fd_prot, true); + ret = fd_do_rw(cmd, pfile, dev->prot_length, + cmd->t_prot_sg, cmd->t_prot_nents, + cmd->prot_length, 1); if (ret < 0) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } } - if (ret < 0) { - kfree(fd_prot.prot_sg); - kfree(fd_prot.prot_buf); + if (ret < 0) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - } if (ret) target_complete_cmd(cmd, SAM_STAT_GOOD); @@ -908,7 +807,6 @@ static struct sbc_ops fd_sbc_ops = { .execute_rw = fd_execute_rw, .execute_sync_cache = fd_execute_sync_cache, .execute_write_same = fd_execute_write_same, - .execute_write_same_unmap = fd_execute_write_same_unmap, .execute_unmap = fd_execute_unmap, }; @@ -918,42 +816,7 @@ fd_parse_cdb(struct se_cmd *cmd) return sbc_parse_cdb(cmd, &fd_sbc_ops); } -DEF_TB_DEFAULT_ATTRIBS(fileio); - -static struct configfs_attribute *fileio_backend_dev_attrs[] = { - &fileio_dev_attrib_emulate_model_alias.attr, - &fileio_dev_attrib_emulate_dpo.attr, - &fileio_dev_attrib_emulate_fua_write.attr, - &fileio_dev_attrib_emulate_fua_read.attr, - &fileio_dev_attrib_emulate_write_cache.attr, - &fileio_dev_attrib_emulate_ua_intlck_ctrl.attr, - &fileio_dev_attrib_emulate_tas.attr, - &fileio_dev_attrib_emulate_tpu.attr, - &fileio_dev_attrib_emulate_tpws.attr, - &fileio_dev_attrib_emulate_caw.attr, - &fileio_dev_attrib_emulate_3pc.attr, - &fileio_dev_attrib_pi_prot_type.attr, - &fileio_dev_attrib_hw_pi_prot_type.attr, - &fileio_dev_attrib_pi_prot_format.attr, - &fileio_dev_attrib_enforce_pr_isids.attr, - &fileio_dev_attrib_is_nonrot.attr, - &fileio_dev_attrib_emulate_rest_reord.attr, - &fileio_dev_attrib_force_pr_aptpl.attr, - &fileio_dev_attrib_hw_block_size.attr, - &fileio_dev_attrib_block_size.attr, - &fileio_dev_attrib_hw_max_sectors.attr, - &fileio_dev_attrib_optimal_sectors.attr, - &fileio_dev_attrib_hw_queue_depth.attr, - &fileio_dev_attrib_queue_depth.attr, - &fileio_dev_attrib_max_unmap_lba_count.attr, - &fileio_dev_attrib_max_unmap_block_desc_count.attr, - &fileio_dev_attrib_unmap_granularity.attr, - &fileio_dev_attrib_unmap_granularity_alignment.attr, - &fileio_dev_attrib_max_write_same_len.attr, - NULL, -}; - -static struct se_subsystem_api fileio_template = { +static const struct target_backend_ops fileio_ops = { .name = "fileio", .inquiry_prod = "FILEIO", .inquiry_rev = FD_VERSION, @@ -971,21 +834,17 @@ static struct se_subsystem_api fileio_template = { .init_prot = fd_init_prot, .format_prot = fd_format_prot, .free_prot = fd_free_prot, + .tb_dev_attrib_attrs = sbc_attrib_attrs, }; static int __init fileio_module_init(void) { - struct target_backend_cits *tbc = &fileio_template.tb_cits; - - target_core_setup_sub_cits(&fileio_template); - tbc->tb_dev_attrib_cit.ct_attrs = fileio_backend_dev_attrs; - - return transport_subsystem_register(&fileio_template); + return transport_backend_register(&fileio_ops); } static void __exit fileio_module_exit(void) { - transport_subsystem_release(&fileio_template); + target_backend_unregister(&fileio_ops); } MODULE_DESCRIPTION("TCM FILEIO subsystem plugin"); diff --git a/drivers/target/target_core_file.h b/drivers/target/target_core_file.h index 182cbb295039..068966fce308 100644 --- a/drivers/target/target_core_file.h +++ b/drivers/target/target_core_file.h @@ -21,12 +21,6 @@ #define FDBD_HAS_BUFFERED_IO_WCE 0x04 #define FDBD_FORMAT_UNIT_SIZE 2048 -struct fd_prot { - unsigned char *prot_buf; - struct scatterlist *prot_sg; - u32 prot_sg_nents; -}; - struct fd_dev { struct se_device dev; diff --git a/drivers/target/target_core_hba.c b/drivers/target/target_core_hba.c index ff95f95dcd13..62ea4e8e70a8 100644 --- a/drivers/target/target_core_hba.c +++ b/drivers/target/target_core_hba.c @@ -36,67 +36,78 @@ #include <target/target_core_base.h> #include <target/target_core_backend.h> #include <target/target_core_fabric.h> -#include <target/target_core_configfs.h> #include "target_core_internal.h" -static LIST_HEAD(subsystem_list); -static DEFINE_MUTEX(subsystem_mutex); +static LIST_HEAD(backend_list); +static DEFINE_MUTEX(backend_mutex); static u32 hba_id_counter; static DEFINE_SPINLOCK(hba_lock); static LIST_HEAD(hba_list); -int transport_subsystem_register(struct se_subsystem_api *sub_api) -{ - struct se_subsystem_api *s; - - INIT_LIST_HEAD(&sub_api->sub_api_list); - mutex_lock(&subsystem_mutex); - list_for_each_entry(s, &subsystem_list, sub_api_list) { - if (!strcmp(s->name, sub_api->name)) { - pr_err("%p is already registered with" - " duplicate name %s, unable to process" - " request\n", s, s->name); - mutex_unlock(&subsystem_mutex); +int transport_backend_register(const struct target_backend_ops *ops) +{ + struct target_backend *tb, *old; + + tb = kzalloc(sizeof(*tb), GFP_KERNEL); + if (!tb) + return -ENOMEM; + tb->ops = ops; + + mutex_lock(&backend_mutex); + list_for_each_entry(old, &backend_list, list) { + if (!strcmp(old->ops->name, ops->name)) { + pr_err("backend %s already registered.\n", ops->name); + mutex_unlock(&backend_mutex); + kfree(tb); return -EEXIST; } } - list_add_tail(&sub_api->sub_api_list, &subsystem_list); - mutex_unlock(&subsystem_mutex); + target_setup_backend_cits(tb); + list_add_tail(&tb->list, &backend_list); + mutex_unlock(&backend_mutex); - pr_debug("TCM: Registered subsystem plugin: %s struct module:" - " %p\n", sub_api->name, sub_api->owner); + pr_debug("TCM: Registered subsystem plugin: %s struct module: %p\n", + ops->name, ops->owner); return 0; } -EXPORT_SYMBOL(transport_subsystem_register); +EXPORT_SYMBOL(transport_backend_register); -void transport_subsystem_release(struct se_subsystem_api *sub_api) +void target_backend_unregister(const struct target_backend_ops *ops) { - mutex_lock(&subsystem_mutex); - list_del(&sub_api->sub_api_list); - mutex_unlock(&subsystem_mutex); + struct target_backend *tb; + + mutex_lock(&backend_mutex); + list_for_each_entry(tb, &backend_list, list) { + if (tb->ops == ops) { + list_del(&tb->list); + kfree(tb); + break; + } + } + mutex_unlock(&backend_mutex); } -EXPORT_SYMBOL(transport_subsystem_release); +EXPORT_SYMBOL(target_backend_unregister); -static struct se_subsystem_api *core_get_backend(const char *sub_name) +static struct target_backend *core_get_backend(const char *name) { - struct se_subsystem_api *s; + struct target_backend *tb; - mutex_lock(&subsystem_mutex); - list_for_each_entry(s, &subsystem_list, sub_api_list) { - if (!strcmp(s->name, sub_name)) + mutex_lock(&backend_mutex); + list_for_each_entry(tb, &backend_list, list) { + if (!strcmp(tb->ops->name, name)) goto found; } - mutex_unlock(&subsystem_mutex); + mutex_unlock(&backend_mutex); return NULL; found: - if (s->owner && !try_module_get(s->owner)) - s = NULL; - mutex_unlock(&subsystem_mutex); - return s; + if (tb->ops->owner && !try_module_get(tb->ops->owner)) + tb = NULL; + mutex_unlock(&backend_mutex); + return tb; } struct se_hba * @@ -117,13 +128,13 @@ core_alloc_hba(const char *plugin_name, u32 plugin_dep_id, u32 hba_flags) hba->hba_index = scsi_get_new_index(SCSI_INST_INDEX); hba->hba_flags |= hba_flags; - hba->transport = core_get_backend(plugin_name); - if (!hba->transport) { + hba->backend = core_get_backend(plugin_name); + if (!hba->backend) { ret = -EINVAL; goto out_free_hba; } - ret = hba->transport->attach_hba(hba, plugin_dep_id); + ret = hba->backend->ops->attach_hba(hba, plugin_dep_id); if (ret < 0) goto out_module_put; @@ -138,8 +149,8 @@ core_alloc_hba(const char *plugin_name, u32 plugin_dep_id, u32 hba_flags) return hba; out_module_put: - module_put(hba->transport->owner); - hba->transport = NULL; + module_put(hba->backend->ops->owner); + hba->backend = NULL; out_free_hba: kfree(hba); return ERR_PTR(ret); @@ -150,7 +161,7 @@ core_delete_hba(struct se_hba *hba) { WARN_ON(hba->dev_count); - hba->transport->detach_hba(hba); + hba->backend->ops->detach_hba(hba); spin_lock(&hba_lock); list_del(&hba->hba_node); @@ -159,9 +170,9 @@ core_delete_hba(struct se_hba *hba) pr_debug("CORE_HBA[%d] - Detached HBA from Generic Target" " Core\n", hba->hba_id); - module_put(hba->transport->owner); + module_put(hba->backend->ops->owner); - hba->transport = NULL; + hba->backend = NULL; kfree(hba); return 0; } diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 972ed1781ae2..6d88d24e6cce 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -40,7 +40,6 @@ #include <target/target_core_base.h> #include <target/target_core_backend.h> -#include <target/target_core_backend_configfs.h> #include "target_core_iblock.h" @@ -53,17 +52,11 @@ static inline struct iblock_dev *IBLOCK_DEV(struct se_device *dev) } -static struct se_subsystem_api iblock_template; - -/* iblock_attach_hba(): (Part of se_subsystem_api_t template) - * - * - */ static int iblock_attach_hba(struct se_hba *hba, u32 host_id) { pr_debug("CORE_HBA[%d] - TCM iBlock HBA Driver %s on" " Generic Target Core Stack %s\n", hba->hba_id, - IBLOCK_VERSION, TARGET_CORE_MOD_VERSION); + IBLOCK_VERSION, TARGET_CORE_VERSION); return 0; } @@ -197,6 +190,14 @@ out: return ret; } +static void iblock_dev_call_rcu(struct rcu_head *p) +{ + struct se_device *dev = container_of(p, struct se_device, rcu_head); + struct iblock_dev *ib_dev = IBLOCK_DEV(dev); + + kfree(ib_dev); +} + static void iblock_free_device(struct se_device *dev) { struct iblock_dev *ib_dev = IBLOCK_DEV(dev); @@ -206,7 +207,7 @@ static void iblock_free_device(struct se_device *dev) if (ib_dev->ibd_bio_set != NULL) bioset_free(ib_dev->ibd_bio_set); - kfree(ib_dev); + call_rcu(&dev->rcu_head, iblock_dev_call_rcu); } static unsigned long long iblock_emulate_read_cap_with_block_size( @@ -414,10 +415,9 @@ iblock_execute_sync_cache(struct se_cmd *cmd) } static sense_reason_t -iblock_do_unmap(struct se_cmd *cmd, void *priv, - sector_t lba, sector_t nolb) +iblock_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb) { - struct block_device *bdev = priv; + struct block_device *bdev = IBLOCK_DEV(cmd->se_dev)->ibd_bd; int ret; ret = blkdev_issue_discard(bdev, lba, nolb, GFP_KERNEL, 0); @@ -430,30 +430,6 @@ iblock_do_unmap(struct se_cmd *cmd, void *priv, } static sense_reason_t -iblock_execute_unmap(struct se_cmd *cmd) -{ - struct block_device *bdev = IBLOCK_DEV(cmd->se_dev)->ibd_bd; - - return sbc_execute_unmap(cmd, iblock_do_unmap, bdev); -} - -static sense_reason_t -iblock_execute_write_same_unmap(struct se_cmd *cmd) -{ - struct block_device *bdev = IBLOCK_DEV(cmd->se_dev)->ibd_bd; - sector_t lba = cmd->t_task_lba; - sector_t nolb = sbc_get_write_same_sectors(cmd); - sense_reason_t ret; - - ret = iblock_do_unmap(cmd, bdev, lba, nolb); - if (ret) - return ret; - - target_complete_cmd(cmd, GOOD); - return 0; -} - -static sense_reason_t iblock_execute_write_same(struct se_cmd *cmd) { struct iblock_req *ibr; @@ -844,7 +820,6 @@ static struct sbc_ops iblock_sbc_ops = { .execute_rw = iblock_execute_rw, .execute_sync_cache = iblock_execute_sync_cache, .execute_write_same = iblock_execute_write_same, - .execute_write_same_unmap = iblock_execute_write_same_unmap, .execute_unmap = iblock_execute_unmap, }; @@ -863,42 +838,7 @@ static bool iblock_get_write_cache(struct se_device *dev) return q->flush_flags & REQ_FLUSH; } -DEF_TB_DEFAULT_ATTRIBS(iblock); - -static struct configfs_attribute *iblock_backend_dev_attrs[] = { - &iblock_dev_attrib_emulate_model_alias.attr, - &iblock_dev_attrib_emulate_dpo.attr, - &iblock_dev_attrib_emulate_fua_write.attr, - &iblock_dev_attrib_emulate_fua_read.attr, - &iblock_dev_attrib_emulate_write_cache.attr, - &iblock_dev_attrib_emulate_ua_intlck_ctrl.attr, - &iblock_dev_attrib_emulate_tas.attr, - &iblock_dev_attrib_emulate_tpu.attr, - &iblock_dev_attrib_emulate_tpws.attr, - &iblock_dev_attrib_emulate_caw.attr, - &iblock_dev_attrib_emulate_3pc.attr, - &iblock_dev_attrib_pi_prot_type.attr, - &iblock_dev_attrib_hw_pi_prot_type.attr, - &iblock_dev_attrib_pi_prot_format.attr, - &iblock_dev_attrib_enforce_pr_isids.attr, - &iblock_dev_attrib_is_nonrot.attr, - &iblock_dev_attrib_emulate_rest_reord.attr, - &iblock_dev_attrib_force_pr_aptpl.attr, - &iblock_dev_attrib_hw_block_size.attr, - &iblock_dev_attrib_block_size.attr, - &iblock_dev_attrib_hw_max_sectors.attr, - &iblock_dev_attrib_optimal_sectors.attr, - &iblock_dev_attrib_hw_queue_depth.attr, - &iblock_dev_attrib_queue_depth.attr, - &iblock_dev_attrib_max_unmap_lba_count.attr, - &iblock_dev_attrib_max_unmap_block_desc_count.attr, - &iblock_dev_attrib_unmap_granularity.attr, - &iblock_dev_attrib_unmap_granularity_alignment.attr, - &iblock_dev_attrib_max_write_same_len.attr, - NULL, -}; - -static struct se_subsystem_api iblock_template = { +static const struct target_backend_ops iblock_ops = { .name = "iblock", .inquiry_prod = "IBLOCK", .inquiry_rev = IBLOCK_VERSION, @@ -918,21 +858,17 @@ static struct se_subsystem_api iblock_template = { .get_io_min = iblock_get_io_min, .get_io_opt = iblock_get_io_opt, .get_write_cache = iblock_get_write_cache, + .tb_dev_attrib_attrs = sbc_attrib_attrs, }; static int __init iblock_module_init(void) { - struct target_backend_cits *tbc = &iblock_template.tb_cits; - - target_core_setup_sub_cits(&iblock_template); - tbc->tb_dev_attrib_cit.ct_attrs = iblock_backend_dev_attrs; - - return transport_subsystem_register(&iblock_template); + return transport_backend_register(&iblock_ops); } static void __exit iblock_module_exit(void) { - transport_subsystem_release(&iblock_template); + target_backend_unregister(&iblock_ops); } MODULE_DESCRIPTION("TCM IBLOCK subsystem plugin"); diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index 68bd7f5d9f73..99c24acfe676 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -1,6 +1,53 @@ #ifndef TARGET_CORE_INTERNAL_H #define TARGET_CORE_INTERNAL_H +#define TARGET_CORE_NAME_MAX_LEN 64 +#define TARGET_FABRIC_NAME_SIZE 32 + +struct target_backend { + struct list_head list; + + const struct target_backend_ops *ops; + + struct config_item_type tb_dev_cit; + struct config_item_type tb_dev_attrib_cit; + struct config_item_type tb_dev_pr_cit; + struct config_item_type tb_dev_wwn_cit; + struct config_item_type tb_dev_alua_tg_pt_gps_cit; + struct config_item_type tb_dev_stat_cit; +}; + +struct target_fabric_configfs { + atomic_t tf_access_cnt; + struct list_head tf_list; + struct config_group tf_group; + struct config_group tf_disc_group; + struct config_group *tf_default_groups[2]; + const struct target_core_fabric_ops *tf_ops; + + struct config_item_type tf_discovery_cit; + struct config_item_type tf_wwn_cit; + struct config_item_type tf_wwn_fabric_stats_cit; + struct config_item_type tf_tpg_cit; + struct config_item_type tf_tpg_base_cit; + struct config_item_type tf_tpg_lun_cit; + struct config_item_type tf_tpg_port_cit; + struct config_item_type tf_tpg_port_stat_cit; + struct config_item_type tf_tpg_np_cit; + struct config_item_type tf_tpg_np_base_cit; + struct config_item_type tf_tpg_attrib_cit; + struct config_item_type tf_tpg_auth_cit; + struct config_item_type tf_tpg_param_cit; + struct config_item_type tf_tpg_nacl_cit; + struct config_item_type tf_tpg_nacl_base_cit; + struct config_item_type tf_tpg_nacl_attrib_cit; + struct config_item_type tf_tpg_nacl_auth_cit; + struct config_item_type tf_tpg_nacl_param_cit; + struct config_item_type tf_tpg_nacl_stat_cit; + struct config_item_type tf_tpg_mappedlun_cit; + struct config_item_type tf_tpg_mappedlun_stat_cit; +}; + /* target_core_alua.c */ extern struct t10_alua_lu_gp *default_lu_gp; @@ -8,28 +55,27 @@ extern struct t10_alua_lu_gp *default_lu_gp; extern struct mutex g_device_mutex; extern struct list_head g_device_list; +int core_alloc_rtpi(struct se_lun *lun, struct se_device *dev); struct se_dev_entry *core_get_se_deve_from_rtpi(struct se_node_acl *, u16); -int core_free_device_list_for_node(struct se_node_acl *, +void target_pr_kref_release(struct kref *); +void core_free_device_list_for_node(struct se_node_acl *, struct se_portal_group *); -void core_update_device_list_access(u32, u32, struct se_node_acl *); +void core_update_device_list_access(u64, u32, struct se_node_acl *); +struct se_dev_entry *target_nacl_find_deve(struct se_node_acl *, u64); int core_enable_device_list_for_node(struct se_lun *, struct se_lun_acl *, - u32, u32, struct se_node_acl *, struct se_portal_group *); -int core_disable_device_list_for_node(struct se_lun *, struct se_lun_acl *, - u32, u32, struct se_node_acl *, struct se_portal_group *); + u64, u32, struct se_node_acl *, struct se_portal_group *); +void core_disable_device_list_for_node(struct se_lun *, struct se_dev_entry *, + struct se_node_acl *, struct se_portal_group *); void core_clear_lun_from_tpg(struct se_lun *, struct se_portal_group *); -int core_dev_export(struct se_device *, struct se_portal_group *, - struct se_lun *); -void core_dev_unexport(struct se_device *, struct se_portal_group *, - struct se_lun *); -struct se_lun *core_dev_add_lun(struct se_portal_group *, struct se_device *, u32); +int core_dev_add_lun(struct se_portal_group *, struct se_device *, + struct se_lun *lun); void core_dev_del_lun(struct se_portal_group *, struct se_lun *); -struct se_lun *core_get_lun_from_tpg(struct se_portal_group *, u32); struct se_lun_acl *core_dev_init_initiator_node_lun_acl(struct se_portal_group *, - struct se_node_acl *, u32, int *); + struct se_node_acl *, u64, int *); int core_dev_add_initiator_node_lun_acl(struct se_portal_group *, - struct se_lun_acl *, u32, u32); -int core_dev_del_initiator_node_lun_acl(struct se_portal_group *, - struct se_lun *, struct se_lun_acl *); + struct se_lun_acl *, struct se_lun *lun, u32); +int core_dev_del_initiator_node_lun_acl(struct se_lun *, + struct se_lun_acl *); void core_dev_free_initiator_node_lun_acl(struct se_portal_group *, struct se_lun_acl *lacl); int core_dev_setup_virtual_lun0(void); @@ -38,6 +84,18 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name); int target_configure_device(struct se_device *dev); void target_free_device(struct se_device *); +/* target_core_configfs.c */ +void target_setup_backend_cits(struct target_backend *); + +/* target_core_fabric_lib.c */ +int target_get_pr_transport_id_len(struct se_node_acl *nacl, + struct t10_pr_registration *pr_reg, int *format_code); +int target_get_pr_transport_id(struct se_node_acl *nacl, + struct t10_pr_registration *pr_reg, int *format_code, + unsigned char *buf); +const char *target_parse_pr_out_transport_id(struct se_portal_group *tpg, + const char *buf, u32 *out_tid_len, char **port_nexus_ptr); + /* target_core_hba.c */ struct se_hba *core_alloc_hba(const char *, u32, u32); int core_delete_hba(struct se_hba *); @@ -53,12 +111,16 @@ extern struct se_device *g_lun0_dev; struct se_node_acl *__core_tpg_get_initiator_node_acl(struct se_portal_group *tpg, const char *); -void core_tpg_add_node_to_devs(struct se_node_acl *, struct se_portal_group *); +void core_tpg_add_node_to_devs(struct se_node_acl *, struct se_portal_group *, + struct se_lun *); void core_tpg_wait_for_nacl_pr_ref(struct se_node_acl *); -struct se_lun *core_tpg_alloc_lun(struct se_portal_group *, u32); +struct se_lun *core_tpg_alloc_lun(struct se_portal_group *, u64); int core_tpg_add_lun(struct se_portal_group *, struct se_lun *, u32, struct se_device *); void core_tpg_remove_lun(struct se_portal_group *, struct se_lun *); +struct se_node_acl *core_tpg_add_initiator_node_acl(struct se_portal_group *tpg, + const char *initiatorname); +void core_tpg_del_initiator_node_acl(struct se_node_acl *acl); /* target_core_transport.c */ extern struct kmem_cache *se_tmr_req_cache; @@ -77,14 +139,19 @@ int transport_dump_vpd_assoc(struct t10_vpd *, unsigned char *, int); int transport_dump_vpd_ident_type(struct t10_vpd *, unsigned char *, int); int transport_dump_vpd_ident(struct t10_vpd *, unsigned char *, int); bool target_stop_cmd(struct se_cmd *cmd, unsigned long *flags); -int transport_clear_lun_ref(struct se_lun *); +void transport_clear_lun_ref(struct se_lun *); void transport_send_task_abort(struct se_cmd *); sense_reason_t target_cmd_size_check(struct se_cmd *cmd, unsigned int size); void target_qf_do_work(struct work_struct *work); +bool target_check_wce(struct se_device *dev); +bool target_check_fua(struct se_device *dev); /* target_core_stat.c */ void target_stat_setup_dev_default_groups(struct se_device *); void target_stat_setup_port_default_groups(struct se_lun *); void target_stat_setup_mappedlun_default_groups(struct se_lun_acl *); +/* target_core_xcopy.c */ +extern struct se_portal_group xcopy_pt_tpg; + #endif /* TARGET_CORE_INTERNAL_H */ diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 8e5fa291f878..0fdbe43b7dad 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -35,7 +35,6 @@ #include <target/target_core_base.h> #include <target/target_core_backend.h> #include <target/target_core_fabric.h> -#include <target/target_core_configfs.h> #include "target_core_internal.h" #include "target_core_pr.h" @@ -45,7 +44,6 @@ * Used for Specify Initiator Ports Capable Bit (SPEC_I_PT) */ struct pr_transport_id_holder { - int dest_local_nexus; struct t10_pr_registration *dest_pr_reg; struct se_portal_group *dest_tpg; struct se_node_acl *dest_node_acl; @@ -231,9 +229,10 @@ target_scsi2_reservation_release(struct se_cmd *cmd) dev->dev_reservation_flags &= ~DRF_SPC2_RESERVATIONS_WITH_ISID; } tpg = sess->se_tpg; - pr_debug("SCSI-2 Released reservation for %s LUN: %u ->" - " MAPPED LUN: %u for %s\n", tpg->se_tpg_tfo->get_fabric_name(), - cmd->se_lun->unpacked_lun, cmd->se_deve->mapped_lun, + pr_debug("SCSI-2 Released reservation for %s LUN: %llu ->" + " MAPPED LUN: %llu for %s\n", + tpg->se_tpg_tfo->get_fabric_name(), + cmd->se_lun->unpacked_lun, cmd->orig_fe_lun, sess->se_node_acl->initiatorname); out_unlock: @@ -277,12 +276,12 @@ target_scsi2_reservation_reserve(struct se_cmd *cmd) (dev->dev_reserved_node_acl != sess->se_node_acl)) { pr_err("SCSI-2 RESERVATION CONFLIFT for %s fabric\n", tpg->se_tpg_tfo->get_fabric_name()); - pr_err("Original reserver LUN: %u %s\n", + pr_err("Original reserver LUN: %llu %s\n", cmd->se_lun->unpacked_lun, dev->dev_reserved_node_acl->initiatorname); - pr_err("Current attempt - LUN: %u -> MAPPED LUN: %u" + pr_err("Current attempt - LUN: %llu -> MAPPED LUN: %llu" " from %s \n", cmd->se_lun->unpacked_lun, - cmd->se_deve->mapped_lun, + cmd->orig_fe_lun, sess->se_node_acl->initiatorname); ret = TCM_RESERVATION_CONFLICT; goto out_unlock; @@ -294,9 +293,9 @@ target_scsi2_reservation_reserve(struct se_cmd *cmd) dev->dev_res_bin_isid = sess->sess_bin_isid; dev->dev_reservation_flags |= DRF_SPC2_RESERVATIONS_WITH_ISID; } - pr_debug("SCSI-2 Reserved %s LUN: %u -> MAPPED LUN: %u" + pr_debug("SCSI-2 Reserved %s LUN: %llu -> MAPPED LUN: %llu" " for %s\n", tpg->se_tpg_tfo->get_fabric_name(), - cmd->se_lun->unpacked_lun, cmd->se_deve->mapped_lun, + cmd->se_lun->unpacked_lun, cmd->orig_fe_lun, sess->se_node_acl->initiatorname); out_unlock: @@ -314,28 +313,31 @@ out: * This function is called by those initiator ports who are *NOT* * the active PR reservation holder when a reservation is present. */ -static int core_scsi3_pr_seq_non_holder( - struct se_cmd *cmd, - u32 pr_reg_type) +static int core_scsi3_pr_seq_non_holder(struct se_cmd *cmd, u32 pr_reg_type, + bool isid_mismatch) { unsigned char *cdb = cmd->t_task_cdb; - struct se_dev_entry *se_deve; struct se_session *se_sess = cmd->se_sess; - int other_cdb = 0, ignore_reg; + struct se_node_acl *nacl = se_sess->se_node_acl; + int other_cdb = 0; int registered_nexus = 0, ret = 1; /* Conflict by default */ int all_reg = 0, reg_only = 0; /* ALL_REG, REG_ONLY */ int we = 0; /* Write Exclusive */ int legacy = 0; /* Act like a legacy device and return * RESERVATION CONFLICT on some CDBs */ - se_deve = se_sess->se_node_acl->device_list[cmd->orig_fe_lun]; - /* - * Determine if the registration should be ignored due to - * non-matching ISIDs in target_scsi3_pr_reservation_check(). - */ - ignore_reg = (pr_reg_type & 0x80000000); - if (ignore_reg) - pr_reg_type &= ~0x80000000; + if (isid_mismatch) { + registered_nexus = 0; + } else { + struct se_dev_entry *se_deve; + + rcu_read_lock(); + se_deve = target_nacl_find_deve(nacl, cmd->orig_fe_lun); + if (se_deve) + registered_nexus = test_bit(DEF_PR_REG_ACTIVE, + &se_deve->deve_flags); + rcu_read_unlock(); + } switch (pr_reg_type) { case PR_TYPE_WRITE_EXCLUSIVE: @@ -345,8 +347,6 @@ static int core_scsi3_pr_seq_non_holder( * Some commands are only allowed for the persistent reservation * holder. */ - if ((se_deve->def_pr_registered) && !(ignore_reg)) - registered_nexus = 1; break; case PR_TYPE_WRITE_EXCLUSIVE_REGONLY: we = 1; @@ -355,8 +355,6 @@ static int core_scsi3_pr_seq_non_holder( * Some commands are only allowed for registered I_T Nexuses. */ reg_only = 1; - if ((se_deve->def_pr_registered) && !(ignore_reg)) - registered_nexus = 1; break; case PR_TYPE_WRITE_EXCLUSIVE_ALLREG: we = 1; @@ -365,8 +363,6 @@ static int core_scsi3_pr_seq_non_holder( * Each registered I_T Nexus is a reservation holder. */ all_reg = 1; - if ((se_deve->def_pr_registered) && !(ignore_reg)) - registered_nexus = 1; break; default: return -EINVAL; @@ -572,6 +568,7 @@ target_scsi3_pr_reservation_check(struct se_cmd *cmd) struct se_device *dev = cmd->se_dev; struct se_session *sess = cmd->se_sess; u32 pr_reg_type; + bool isid_mismatch = false; if (!dev->dev_pr_res_holder) return 0; @@ -584,7 +581,7 @@ target_scsi3_pr_reservation_check(struct se_cmd *cmd) if (dev->dev_pr_res_holder->isid_present_at_reg) { if (dev->dev_pr_res_holder->pr_reg_bin_isid != sess->sess_bin_isid) { - pr_reg_type |= 0x80000000; + isid_mismatch = true; goto check_nonholder; } } @@ -592,7 +589,7 @@ target_scsi3_pr_reservation_check(struct se_cmd *cmd) return 0; check_nonholder: - if (core_scsi3_pr_seq_non_holder(cmd, pr_reg_type)) + if (core_scsi3_pr_seq_non_holder(cmd, pr_reg_type, isid_mismatch)) return TCM_RESERVATION_CONFLICT; return 0; } @@ -620,7 +617,9 @@ static u32 core_scsi3_pr_generation(struct se_device *dev) static struct t10_pr_registration *__core_scsi3_do_alloc_registration( struct se_device *dev, struct se_node_acl *nacl, + struct se_lun *lun, struct se_dev_entry *deve, + u64 mapped_lun, unsigned char *isid, u64 sa_res_key, int all_tg_pt, @@ -642,12 +641,12 @@ static struct t10_pr_registration *__core_scsi3_do_alloc_registration( atomic_set(&pr_reg->pr_res_holders, 0); pr_reg->pr_reg_nacl = nacl; pr_reg->pr_reg_deve = deve; - pr_reg->pr_res_mapped_lun = deve->mapped_lun; - pr_reg->pr_aptpl_target_lun = deve->se_lun->unpacked_lun; + pr_reg->pr_res_mapped_lun = mapped_lun; + pr_reg->pr_aptpl_target_lun = lun->unpacked_lun; + pr_reg->tg_pt_sep_rtpi = lun->lun_rtpi; pr_reg->pr_res_key = sa_res_key; pr_reg->pr_reg_all_tg_pt = all_tg_pt; pr_reg->pr_reg_aptpl = aptpl; - pr_reg->pr_reg_tg_pt_lun = deve->se_lun; /* * If an ISID value for this SCSI Initiator Port exists, * save it to the registration now. @@ -671,7 +670,9 @@ static void core_scsi3_lunacl_undepend_item(struct se_dev_entry *); static struct t10_pr_registration *__core_scsi3_alloc_registration( struct se_device *dev, struct se_node_acl *nacl, + struct se_lun *lun, struct se_dev_entry *deve, + u64 mapped_lun, unsigned char *isid, u64 sa_res_key, int all_tg_pt, @@ -679,7 +680,8 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( { struct se_dev_entry *deve_tmp; struct se_node_acl *nacl_tmp; - struct se_port *port, *port_tmp; + struct se_lun_acl *lacl_tmp; + struct se_lun *lun_tmp, *next, *dest_lun; const struct target_core_fabric_ops *tfo = nacl->se_tpg->se_tpg_tfo; struct t10_pr_registration *pr_reg, *pr_reg_atp, *pr_reg_tmp, *pr_reg_tmp_safe; int ret; @@ -687,8 +689,9 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( * Create a registration for the I_T Nexus upon which the * PROUT REGISTER was received. */ - pr_reg = __core_scsi3_do_alloc_registration(dev, nacl, deve, isid, - sa_res_key, all_tg_pt, aptpl); + pr_reg = __core_scsi3_do_alloc_registration(dev, nacl, lun, deve, mapped_lun, + isid, sa_res_key, all_tg_pt, + aptpl); if (!pr_reg) return NULL; /* @@ -701,13 +704,13 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( * for ALL_TG_PT=1 */ spin_lock(&dev->se_port_lock); - list_for_each_entry_safe(port, port_tmp, &dev->dev_sep_list, sep_list) { - atomic_inc_mb(&port->sep_tg_pt_ref_cnt); + list_for_each_entry_safe(lun_tmp, next, &dev->dev_sep_list, lun_dev_link) { + if (!percpu_ref_tryget_live(&lun_tmp->lun_ref)) + continue; spin_unlock(&dev->se_port_lock); - spin_lock_bh(&port->sep_alua_lock); - list_for_each_entry(deve_tmp, &port->sep_alua_list, - alua_port_list) { + spin_lock(&lun_tmp->lun_deve_lock); + list_for_each_entry(deve_tmp, &lun_tmp->lun_deve_list, lun_link) { /* * This pointer will be NULL for demo mode MappedLUNs * that have not been make explicit via a ConfigFS @@ -716,7 +719,9 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( if (!deve_tmp->se_lun_acl) continue; - nacl_tmp = deve_tmp->se_lun_acl->se_lun_nacl; + lacl_tmp = rcu_dereference_check(deve_tmp->se_lun_acl, + lockdep_is_held(&lun_tmp->lun_deve_lock)); + nacl_tmp = lacl_tmp->se_lun_nacl; /* * Skip the matching struct se_node_acl that is allocated * above.. @@ -736,8 +741,8 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( if (strcmp(nacl->initiatorname, nacl_tmp->initiatorname)) continue; - atomic_inc_mb(&deve_tmp->pr_ref_count); - spin_unlock_bh(&port->sep_alua_lock); + kref_get(&deve_tmp->pr_kref); + spin_unlock(&lun_tmp->lun_deve_lock); /* * Grab a configfs group dependency that is released * for the exception path at label out: below, or upon @@ -748,8 +753,8 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( if (ret < 0) { pr_err("core_scsi3_lunacl_depend" "_item() failed\n"); - atomic_dec_mb(&port->sep_tg_pt_ref_cnt); - atomic_dec_mb(&deve_tmp->pr_ref_count); + percpu_ref_put(&lun_tmp->lun_ref); + kref_put(&deve_tmp->pr_kref, target_pr_kref_release); goto out; } /* @@ -759,24 +764,27 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration( * the original *pr_reg is processed in * __core_scsi3_add_registration() */ + dest_lun = rcu_dereference_check(deve_tmp->se_lun, + atomic_read(&deve_tmp->pr_kref.refcount) != 0); + pr_reg_atp = __core_scsi3_do_alloc_registration(dev, - nacl_tmp, deve_tmp, NULL, + nacl_tmp, dest_lun, deve_tmp, + deve_tmp->mapped_lun, NULL, sa_res_key, all_tg_pt, aptpl); if (!pr_reg_atp) { - atomic_dec_mb(&port->sep_tg_pt_ref_cnt); - atomic_dec_mb(&deve_tmp->pr_ref_count); + percpu_ref_put(&lun_tmp->lun_ref); core_scsi3_lunacl_undepend_item(deve_tmp); goto out; } list_add_tail(&pr_reg_atp->pr_reg_atp_mem_list, &pr_reg->pr_reg_atp_list); - spin_lock_bh(&port->sep_alua_lock); + spin_lock(&lun_tmp->lun_deve_lock); } - spin_unlock_bh(&port->sep_alua_lock); + spin_unlock(&lun_tmp->lun_deve_lock); spin_lock(&dev->se_port_lock); - atomic_dec_mb(&port->sep_tg_pt_ref_cnt); + percpu_ref_put(&lun_tmp->lun_ref); } spin_unlock(&dev->se_port_lock); @@ -797,10 +805,10 @@ int core_scsi3_alloc_aptpl_registration( u64 sa_res_key, unsigned char *i_port, unsigned char *isid, - u32 mapped_lun, + u64 mapped_lun, unsigned char *t_port, u16 tpgt, - u32 target_lun, + u64 target_lun, int res_holder, int all_tg_pt, u8 type) @@ -831,7 +839,6 @@ int core_scsi3_alloc_aptpl_registration( pr_reg->pr_res_key = sa_res_key; pr_reg->pr_reg_all_tg_pt = all_tg_pt; pr_reg->pr_reg_aptpl = 1; - pr_reg->pr_reg_tg_pt_lun = NULL; pr_reg->pr_res_scope = 0; /* Always LUN_SCOPE */ pr_reg->pr_res_type = type; /* @@ -895,9 +902,9 @@ static int __core_scsi3_check_aptpl_registration( struct se_device *dev, struct se_portal_group *tpg, struct se_lun *lun, - u32 target_lun, + u64 target_lun, struct se_node_acl *nacl, - struct se_dev_entry *deve) + u64 mapped_lun) { struct t10_pr_registration *pr_reg, *pr_reg_tmp; struct t10_reservation *pr_tmpl = &dev->t10_pr; @@ -925,14 +932,13 @@ static int __core_scsi3_check_aptpl_registration( pr_reg_aptpl_list) { if (!strcmp(pr_reg->pr_iport, i_port) && - (pr_reg->pr_res_mapped_lun == deve->mapped_lun) && + (pr_reg->pr_res_mapped_lun == mapped_lun) && !(strcmp(pr_reg->pr_tport, t_port)) && (pr_reg->pr_reg_tpgt == tpgt) && (pr_reg->pr_aptpl_target_lun == target_lun)) { pr_reg->pr_reg_nacl = nacl; - pr_reg->pr_reg_deve = deve; - pr_reg->pr_reg_tg_pt_lun = lun; + pr_reg->tg_pt_sep_rtpi = lun->lun_rtpi; list_del(&pr_reg->pr_reg_aptpl_list); spin_unlock(&pr_tmpl->aptpl_reg_lock); @@ -967,15 +973,14 @@ int core_scsi3_check_aptpl_registration( struct se_portal_group *tpg, struct se_lun *lun, struct se_node_acl *nacl, - u32 mapped_lun) + u64 mapped_lun) { - struct se_dev_entry *deve = nacl->device_list[mapped_lun]; - if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS) return 0; return __core_scsi3_check_aptpl_registration(dev, tpg, lun, - lun->unpacked_lun, nacl, deve); + lun->unpacked_lun, nacl, + mapped_lun); } static void __core_scsi3_dump_registration( @@ -1009,10 +1014,6 @@ static void __core_scsi3_dump_registration( pr_reg->pr_reg_aptpl); } -/* - * this function can be called with struct se_device->dev_reservation_lock - * when register_move = 1 - */ static void __core_scsi3_add_registration( struct se_device *dev, struct se_node_acl *nacl, @@ -1023,6 +1024,7 @@ static void __core_scsi3_add_registration( const struct target_core_fabric_ops *tfo = nacl->se_tpg->se_tpg_tfo; struct t10_pr_registration *pr_reg_tmp, *pr_reg_tmp_safe; struct t10_reservation *pr_tmpl = &dev->t10_pr; + struct se_dev_entry *deve; /* * Increment PRgeneration counter for struct se_device upon a successful @@ -1039,10 +1041,16 @@ static void __core_scsi3_add_registration( spin_lock(&pr_tmpl->registration_lock); list_add_tail(&pr_reg->pr_reg_list, &pr_tmpl->registration_list); - pr_reg->pr_reg_deve->def_pr_registered = 1; __core_scsi3_dump_registration(tfo, dev, nacl, pr_reg, register_type); spin_unlock(&pr_tmpl->registration_lock); + + rcu_read_lock(); + deve = pr_reg->pr_reg_deve; + if (deve) + set_bit(DEF_PR_REG_ACTIVE, &deve->deve_flags); + rcu_read_unlock(); + /* * Skip extra processing for ALL_TG_PT=0 or REGISTER_AND_MOVE. */ @@ -1054,6 +1062,8 @@ static void __core_scsi3_add_registration( */ list_for_each_entry_safe(pr_reg_tmp, pr_reg_tmp_safe, &pr_reg->pr_reg_atp_list, pr_reg_atp_mem_list) { + struct se_node_acl *nacl_tmp = pr_reg_tmp->pr_reg_nacl; + list_del(&pr_reg_tmp->pr_reg_atp_mem_list); pr_reg_tmp->pr_res_generation = core_scsi3_pr_generation(dev); @@ -1061,12 +1071,17 @@ static void __core_scsi3_add_registration( spin_lock(&pr_tmpl->registration_lock); list_add_tail(&pr_reg_tmp->pr_reg_list, &pr_tmpl->registration_list); - pr_reg_tmp->pr_reg_deve->def_pr_registered = 1; - __core_scsi3_dump_registration(tfo, dev, - pr_reg_tmp->pr_reg_nacl, pr_reg_tmp, - register_type); + __core_scsi3_dump_registration(tfo, dev, nacl_tmp, pr_reg_tmp, + register_type); spin_unlock(&pr_tmpl->registration_lock); + + rcu_read_lock(); + deve = pr_reg_tmp->pr_reg_deve; + if (deve) + set_bit(DEF_PR_REG_ACTIVE, &deve->deve_flags); + rcu_read_unlock(); + /* * Drop configfs group dependency reference from * __core_scsi3_alloc_registration() @@ -1078,7 +1093,9 @@ static void __core_scsi3_add_registration( static int core_scsi3_alloc_registration( struct se_device *dev, struct se_node_acl *nacl, + struct se_lun *lun, struct se_dev_entry *deve, + u64 mapped_lun, unsigned char *isid, u64 sa_res_key, int all_tg_pt, @@ -1088,8 +1105,9 @@ static int core_scsi3_alloc_registration( { struct t10_pr_registration *pr_reg; - pr_reg = __core_scsi3_alloc_registration(dev, nacl, deve, isid, - sa_res_key, all_tg_pt, aptpl); + pr_reg = __core_scsi3_alloc_registration(dev, nacl, lun, deve, mapped_lun, + isid, sa_res_key, all_tg_pt, + aptpl); if (!pr_reg) return -EPERM; @@ -1242,13 +1260,13 @@ static void __core_scsi3_free_registration( const struct target_core_fabric_ops *tfo = pr_reg->pr_reg_nacl->se_tpg->se_tpg_tfo; struct t10_reservation *pr_tmpl = &dev->t10_pr; + struct se_node_acl *nacl = pr_reg->pr_reg_nacl; + struct se_dev_entry *deve; char i_buf[PR_REG_ISID_ID_LEN]; memset(i_buf, 0, PR_REG_ISID_ID_LEN); core_pr_dump_initiator_port(pr_reg, i_buf, PR_REG_ISID_ID_LEN); - pr_reg->pr_reg_deve->def_pr_registered = 0; - pr_reg->pr_reg_deve->pr_res_key = 0; if (!list_empty(&pr_reg->pr_reg_list)) list_del(&pr_reg->pr_reg_list); /* @@ -1257,6 +1275,8 @@ static void __core_scsi3_free_registration( */ if (dec_holders) core_scsi3_put_pr_reg(pr_reg); + + spin_unlock(&pr_tmpl->registration_lock); /* * Wait until all reference from any other I_T nexuses for this * *pr_reg have been released. Because list_del() is called above, @@ -1264,13 +1284,18 @@ static void __core_scsi3_free_registration( * count back to zero, and we release *pr_reg. */ while (atomic_read(&pr_reg->pr_res_holders) != 0) { - spin_unlock(&pr_tmpl->registration_lock); pr_debug("SPC-3 PR [%s] waiting for pr_res_holders\n", tfo->get_fabric_name()); cpu_relax(); - spin_lock(&pr_tmpl->registration_lock); } + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, pr_reg->pr_res_mapped_lun); + if (deve) + clear_bit(DEF_PR_REG_ACTIVE, &deve->deve_flags); + rcu_read_unlock(); + + spin_lock(&pr_tmpl->registration_lock); pr_debug("SPC-3 PR [%s] Service Action: UNREGISTER Initiator" " Node: %s%s\n", tfo->get_fabric_name(), pr_reg->pr_reg_nacl->initiatorname, @@ -1392,12 +1417,14 @@ static void core_scsi3_nodeacl_undepend_item(struct se_node_acl *nacl) static int core_scsi3_lunacl_depend_item(struct se_dev_entry *se_deve) { - struct se_lun_acl *lun_acl = se_deve->se_lun_acl; + struct se_lun_acl *lun_acl; struct se_node_acl *nacl; struct se_portal_group *tpg; /* * For nacl->dynamic_node_acl=1 */ + lun_acl = rcu_dereference_check(se_deve->se_lun_acl, + atomic_read(&se_deve->pr_kref.refcount) != 0); if (!lun_acl) return 0; @@ -1409,21 +1436,23 @@ static int core_scsi3_lunacl_depend_item(struct se_dev_entry *se_deve) static void core_scsi3_lunacl_undepend_item(struct se_dev_entry *se_deve) { - struct se_lun_acl *lun_acl = se_deve->se_lun_acl; + struct se_lun_acl *lun_acl; struct se_node_acl *nacl; struct se_portal_group *tpg; /* * For nacl->dynamic_node_acl=1 */ + lun_acl = rcu_dereference_check(se_deve->se_lun_acl, + atomic_read(&se_deve->pr_kref.refcount) != 0); if (!lun_acl) { - atomic_dec_mb(&se_deve->pr_ref_count); + kref_put(&se_deve->pr_kref, target_pr_kref_release); return; } nacl = lun_acl->se_lun_nacl; tpg = nacl->se_tpg; target_undepend_item(&lun_acl->se_lun_group.cg_item); - atomic_dec_mb(&se_deve->pr_ref_count); + kref_put(&se_deve->pr_kref, target_pr_kref_release); } static sense_reason_t @@ -1436,30 +1465,25 @@ core_scsi3_decode_spec_i_port( int aptpl) { struct se_device *dev = cmd->se_dev; - struct se_port *tmp_port; struct se_portal_group *dest_tpg = NULL, *tmp_tpg; struct se_session *se_sess = cmd->se_sess; struct se_node_acl *dest_node_acl = NULL; - struct se_dev_entry *dest_se_deve = NULL, *local_se_deve; + struct se_dev_entry *dest_se_deve = NULL; struct t10_pr_registration *dest_pr_reg, *local_pr_reg, *pr_reg_e; struct t10_pr_registration *pr_reg_tmp, *pr_reg_tmp_safe; LIST_HEAD(tid_dest_list); struct pr_transport_id_holder *tidh_new, *tidh, *tidh_tmp; - const struct target_core_fabric_ops *tmp_tf_ops; - unsigned char *buf; - unsigned char *ptr, *i_str = NULL, proto_ident, tmp_proto_ident; + unsigned char *buf, *ptr, proto_ident; + const unsigned char *i_str; char *iport_ptr = NULL, i_buf[PR_REG_ISID_ID_LEN]; sense_reason_t ret; u32 tpdl, tid_len = 0; - int dest_local_nexus; u32 dest_rtpi = 0; - local_se_deve = se_sess->se_node_acl->device_list[cmd->orig_fe_lun]; /* * Allocate a struct pr_transport_id_holder and setup the - * local_node_acl and local_se_deve pointers and add to - * struct list_head tid_dest_list for add registration - * processing in the loop of tid_dest_list below. + * local_node_acl pointer and add to struct list_head tid_dest_list + * for add registration processing in the loop of tid_dest_list below. */ tidh_new = kzalloc(sizeof(struct pr_transport_id_holder), GFP_KERNEL); if (!tidh_new) { @@ -1469,10 +1493,10 @@ core_scsi3_decode_spec_i_port( INIT_LIST_HEAD(&tidh_new->dest_list); tidh_new->dest_tpg = tpg; tidh_new->dest_node_acl = se_sess->se_node_acl; - tidh_new->dest_se_deve = local_se_deve; local_pr_reg = __core_scsi3_alloc_registration(cmd->se_dev, - se_sess->se_node_acl, local_se_deve, l_isid, + se_sess->se_node_acl, cmd->se_lun, + NULL, cmd->orig_fe_lun, l_isid, sa_res_key, all_tg_pt, aptpl); if (!local_pr_reg) { kfree(tidh_new); @@ -1481,10 +1505,10 @@ core_scsi3_decode_spec_i_port( tidh_new->dest_pr_reg = local_pr_reg; /* * The local I_T nexus does not hold any configfs dependances, - * so we set tid_h->dest_local_nexus=1 to prevent the + * so we set tidh_new->dest_se_deve to NULL to prevent the * configfs_undepend_item() calls in the tid_dest_list loops below. */ - tidh_new->dest_local_nexus = 1; + tidh_new->dest_se_deve = NULL; list_add_tail(&tidh_new->dest_list, &tid_dest_list); if (cmd->data_length < 28) { @@ -1525,32 +1549,25 @@ core_scsi3_decode_spec_i_port( ptr = &buf[28]; while (tpdl > 0) { + struct se_lun *dest_lun, *tmp_lun; + proto_ident = (ptr[0] & 0x0f); dest_tpg = NULL; spin_lock(&dev->se_port_lock); - list_for_each_entry(tmp_port, &dev->dev_sep_list, sep_list) { - tmp_tpg = tmp_port->sep_tpg; - if (!tmp_tpg) - continue; - tmp_tf_ops = tmp_tpg->se_tpg_tfo; - if (!tmp_tf_ops) - continue; - if (!tmp_tf_ops->get_fabric_proto_ident || - !tmp_tf_ops->tpg_parse_pr_out_transport_id) - continue; + list_for_each_entry(tmp_lun, &dev->dev_sep_list, lun_dev_link) { + tmp_tpg = tmp_lun->lun_tpg; + /* * Look for the matching proto_ident provided by * the received TransportID */ - tmp_proto_ident = tmp_tf_ops->get_fabric_proto_ident(tmp_tpg); - if (tmp_proto_ident != proto_ident) + if (tmp_tpg->proto_id != proto_ident) continue; - dest_rtpi = tmp_port->sep_rtpi; + dest_rtpi = tmp_lun->lun_rtpi; - i_str = tmp_tf_ops->tpg_parse_pr_out_transport_id( - tmp_tpg, (const char *)ptr, &tid_len, - &iport_ptr); + i_str = target_parse_pr_out_transport_id(tmp_tpg, + (const char *)ptr, &tid_len, &iport_ptr); if (!i_str) continue; @@ -1569,12 +1586,12 @@ core_scsi3_decode_spec_i_port( * from the decoded fabric module specific TransportID * at *i_str. */ - spin_lock_irq(&tmp_tpg->acl_node_lock); + mutex_lock(&tmp_tpg->acl_node_mutex); dest_node_acl = __core_tpg_get_initiator_node_acl( tmp_tpg, i_str); if (dest_node_acl) atomic_inc_mb(&dest_node_acl->acl_pr_ref_count); - spin_unlock_irq(&tmp_tpg->acl_node_lock); + mutex_unlock(&tmp_tpg->acl_node_mutex); if (!dest_node_acl) { core_scsi3_tpg_undepend_item(tmp_tpg); @@ -1644,7 +1661,7 @@ core_scsi3_decode_spec_i_port( if (core_scsi3_lunacl_depend_item(dest_se_deve)) { pr_err("core_scsi3_lunacl_depend_item()" " failed\n"); - atomic_dec_mb(&dest_se_deve->pr_ref_count); + kref_put(&dest_se_deve->pr_kref, target_pr_kref_release); core_scsi3_nodeacl_undepend_item(dest_node_acl); core_scsi3_tpg_undepend_item(dest_tpg); ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; @@ -1652,7 +1669,7 @@ core_scsi3_decode_spec_i_port( } pr_debug("SPC-3 PR SPEC_I_PT: Located %s Node: %s" - " dest_se_deve mapped_lun: %u\n", + " dest_se_deve mapped_lun: %llu\n", dest_tpg->se_tpg_tfo->get_fabric_name(), dest_node_acl->initiatorname, dest_se_deve->mapped_lun); @@ -1708,9 +1725,13 @@ core_scsi3_decode_spec_i_port( * and then call __core_scsi3_add_registration() in the * 2nd loop which will never fail. */ + dest_lun = rcu_dereference_check(dest_se_deve->se_lun, + atomic_read(&dest_se_deve->pr_kref.refcount) != 0); + dest_pr_reg = __core_scsi3_alloc_registration(cmd->se_dev, - dest_node_acl, dest_se_deve, iport_ptr, - sa_res_key, all_tg_pt, aptpl); + dest_node_acl, dest_lun, dest_se_deve, + dest_se_deve->mapped_lun, iport_ptr, + sa_res_key, all_tg_pt, aptpl); if (!dest_pr_reg) { core_scsi3_lunacl_undepend_item(dest_se_deve); core_scsi3_nodeacl_undepend_item(dest_node_acl); @@ -1748,7 +1769,6 @@ core_scsi3_decode_spec_i_port( dest_node_acl = tidh->dest_node_acl; dest_se_deve = tidh->dest_se_deve; dest_pr_reg = tidh->dest_pr_reg; - dest_local_nexus = tidh->dest_local_nexus; list_del(&tidh->dest_list); kfree(tidh); @@ -1761,10 +1781,11 @@ core_scsi3_decode_spec_i_port( pr_debug("SPC-3 PR [%s] SPEC_I_PT: Successfully" " registered Transport ID for Node: %s%s Mapped LUN:" - " %u\n", dest_tpg->se_tpg_tfo->get_fabric_name(), - dest_node_acl->initiatorname, i_buf, dest_se_deve->mapped_lun); + " %llu\n", dest_tpg->se_tpg_tfo->get_fabric_name(), + dest_node_acl->initiatorname, i_buf, (dest_se_deve) ? + dest_se_deve->mapped_lun : 0); - if (dest_local_nexus) + if (!dest_se_deve) continue; core_scsi3_lunacl_undepend_item(dest_se_deve); @@ -1785,7 +1806,6 @@ out: dest_node_acl = tidh->dest_node_acl; dest_se_deve = tidh->dest_se_deve; dest_pr_reg = tidh->dest_pr_reg; - dest_local_nexus = tidh->dest_local_nexus; list_del(&tidh->dest_list); kfree(tidh); @@ -1803,7 +1823,7 @@ out: kmem_cache_free(t10_pr_reg_cache, dest_pr_reg); - if (dest_local_nexus) + if (!dest_se_deve) continue; core_scsi3_lunacl_undepend_item(dest_se_deve); @@ -1818,7 +1838,6 @@ static int core_scsi3_update_aptpl_buf( unsigned char *buf, u32 pr_aptpl_buf_len) { - struct se_lun *lun; struct se_portal_group *tpg; struct t10_pr_registration *pr_reg; unsigned char tmp[512], isid_buf[32]; @@ -1837,7 +1856,6 @@ static int core_scsi3_update_aptpl_buf( tmp[0] = '\0'; isid_buf[0] = '\0'; tpg = pr_reg->pr_reg_nacl->se_tpg; - lun = pr_reg->pr_reg_tg_pt_lun; /* * Write out any ISID value to APTPL metadata that was included * in the original registration. @@ -1856,7 +1874,7 @@ static int core_scsi3_update_aptpl_buf( "sa_res_key=%llu\n" "res_holder=1\nres_type=%02x\n" "res_scope=%02x\nres_all_tg_pt=%d\n" - "mapped_lun=%u\n", reg_count, + "mapped_lun=%llu\n", reg_count, tpg->se_tpg_tfo->get_fabric_name(), pr_reg->pr_reg_nacl->initiatorname, isid_buf, pr_reg->pr_res_key, pr_reg->pr_res_type, @@ -1866,7 +1884,7 @@ static int core_scsi3_update_aptpl_buf( snprintf(tmp, 512, "PR_REG_START: %d\n" "initiator_fabric=%s\ninitiator_node=%s\n%s" "sa_res_key=%llu\nres_holder=0\n" - "res_all_tg_pt=%d\nmapped_lun=%u\n", + "res_all_tg_pt=%d\nmapped_lun=%llu\n", reg_count, tpg->se_tpg_tfo->get_fabric_name(), pr_reg->pr_reg_nacl->initiatorname, isid_buf, pr_reg->pr_res_key, pr_reg->pr_reg_all_tg_pt, @@ -1885,11 +1903,12 @@ static int core_scsi3_update_aptpl_buf( * Include information about the associated SCSI target port. */ snprintf(tmp, 512, "target_fabric=%s\ntarget_node=%s\n" - "tpgt=%hu\nport_rtpi=%hu\ntarget_lun=%u\nPR_REG_END:" + "tpgt=%hu\nport_rtpi=%hu\ntarget_lun=%llu\nPR_REG_END:" " %d\n", tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_wwn(tpg), tpg->se_tpg_tfo->tpg_get_tag(tpg), - lun->lun_sep->sep_rtpi, lun->unpacked_lun, reg_count); + pr_reg->tg_pt_sep_rtpi, pr_reg->pr_aptpl_target_lun, + reg_count); if ((len + strlen(tmp) >= pr_aptpl_buf_len)) { pr_err("Unable to update renaming APTPL metadata," @@ -2000,7 +2019,6 @@ core_scsi3_emulate_pro_register(struct se_cmd *cmd, u64 res_key, u64 sa_res_key, { struct se_session *se_sess = cmd->se_sess; struct se_device *dev = cmd->se_dev; - struct se_dev_entry *se_deve; struct se_lun *se_lun = cmd->se_lun; struct se_portal_group *se_tpg; struct t10_pr_registration *pr_reg, *pr_reg_p, *pr_reg_tmp; @@ -2014,7 +2032,6 @@ core_scsi3_emulate_pro_register(struct se_cmd *cmd, u64 res_key, u64 sa_res_key, return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } se_tpg = se_sess->se_tpg; - se_deve = se_sess->se_node_acl->device_list[cmd->orig_fe_lun]; if (se_tpg->se_tpg_tfo->sess_get_initiator_sid) { memset(&isid_buf[0], 0, PR_REG_ISID_LEN); @@ -2045,7 +2062,8 @@ core_scsi3_emulate_pro_register(struct se_cmd *cmd, u64 res_key, u64 sa_res_key, * Logical Unit of the SCSI device server. */ if (core_scsi3_alloc_registration(cmd->se_dev, - se_sess->se_node_acl, se_deve, isid_ptr, + se_sess->se_node_acl, cmd->se_lun, + NULL, cmd->orig_fe_lun, isid_ptr, sa_res_key, all_tg_pt, aptpl, register_type, 0)) { pr_err("Unable to allocate" @@ -2066,7 +2084,6 @@ core_scsi3_emulate_pro_register(struct se_cmd *cmd, u64 res_key, u64 sa_res_key, if (ret != 0) return ret; } - return core_scsi3_update_and_write_aptpl(dev, aptpl); } @@ -2180,7 +2197,7 @@ core_scsi3_emulate_pro_register(struct se_cmd *cmd, u64 res_key, u64 sa_res_key, &pr_tmpl->registration_list, pr_reg_list) { - core_scsi3_ua_allocate( + target_ua_allocate_lun( pr_reg_p->pr_reg_nacl, pr_reg_p->pr_res_mapped_lun, 0x2A, @@ -2607,7 +2624,7 @@ core_scsi3_emulate_pro_release(struct se_cmd *cmd, int type, int scope, if (pr_reg_p == pr_reg) continue; - core_scsi3_ua_allocate(pr_reg_p->pr_reg_nacl, + target_ua_allocate_lun(pr_reg_p->pr_reg_nacl, pr_reg_p->pr_res_mapped_lun, 0x2A, ASCQ_2AH_RESERVATIONS_RELEASED); } @@ -2630,7 +2647,7 @@ core_scsi3_emulate_pro_clear(struct se_cmd *cmd, u64 res_key) struct se_session *se_sess = cmd->se_sess; struct t10_reservation *pr_tmpl = &dev->t10_pr; struct t10_pr_registration *pr_reg, *pr_reg_tmp, *pr_reg_n, *pr_res_holder; - u32 pr_res_mapped_lun = 0; + u64 pr_res_mapped_lun = 0; int calling_it_nexus = 0; /* * Locate the existing *pr_reg via struct se_node_acl pointers @@ -2692,7 +2709,7 @@ core_scsi3_emulate_pro_clear(struct se_cmd *cmd, u64 res_key) * additional sense code set to RESERVATIONS PREEMPTED. */ if (!calling_it_nexus) - core_scsi3_ua_allocate(pr_reg_nacl, pr_res_mapped_lun, + target_ua_allocate_lun(pr_reg_nacl, pr_res_mapped_lun, 0x2A, ASCQ_2AH_RESERVATIONS_PREEMPTED); } spin_unlock(&pr_tmpl->registration_lock); @@ -2786,7 +2803,7 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, LIST_HEAD(preempt_and_abort_list); struct t10_pr_registration *pr_reg, *pr_reg_tmp, *pr_reg_n, *pr_res_holder; struct t10_reservation *pr_tmpl = &dev->t10_pr; - u32 pr_res_mapped_lun = 0; + u64 pr_res_mapped_lun = 0; int all_reg = 0, calling_it_nexus = 0; bool sa_res_key_unmatched = sa_res_key != 0; int prh_type = 0, prh_scope = 0; @@ -2901,7 +2918,7 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, NULL, 0); } if (!calling_it_nexus) - core_scsi3_ua_allocate(pr_reg_nacl, + target_ua_allocate_lun(pr_reg_nacl, pr_res_mapped_lun, 0x2A, ASCQ_2AH_REGISTRATIONS_PREEMPTED); } @@ -3007,7 +3024,7 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, * persistent reservation and/or registration, with the * additional sense code set to REGISTRATIONS PREEMPTED; */ - core_scsi3_ua_allocate(pr_reg_nacl, pr_res_mapped_lun, 0x2A, + target_ua_allocate_lun(pr_reg_nacl, pr_res_mapped_lun, 0x2A, ASCQ_2AH_REGISTRATIONS_PREEMPTED); } spin_unlock(&pr_tmpl->registration_lock); @@ -3040,7 +3057,7 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, if (calling_it_nexus) continue; - core_scsi3_ua_allocate(pr_reg->pr_reg_nacl, + target_ua_allocate_lun(pr_reg->pr_reg_nacl, pr_reg->pr_res_mapped_lun, 0x2A, ASCQ_2AH_RESERVATIONS_RELEASED); } @@ -3099,15 +3116,14 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key, struct se_session *se_sess = cmd->se_sess; struct se_device *dev = cmd->se_dev; struct se_dev_entry *dest_se_deve = NULL; - struct se_lun *se_lun = cmd->se_lun; + struct se_lun *se_lun = cmd->se_lun, *tmp_lun; struct se_node_acl *pr_res_nacl, *pr_reg_nacl, *dest_node_acl = NULL; - struct se_port *se_port; struct se_portal_group *se_tpg, *dest_se_tpg = NULL; const struct target_core_fabric_ops *dest_tf_ops = NULL, *tf_ops; struct t10_pr_registration *pr_reg, *pr_res_holder, *dest_pr_reg; struct t10_reservation *pr_tmpl = &dev->t10_pr; unsigned char *buf; - unsigned char *initiator_str; + const unsigned char *initiator_str; char *iport_ptr = NULL, i_buf[PR_REG_ISID_ID_LEN]; u32 tid_len, tmp_tid_len; int new_reg = 0, type, scope, matching_iname; @@ -3186,12 +3202,10 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key, } spin_lock(&dev->se_port_lock); - list_for_each_entry(se_port, &dev->dev_sep_list, sep_list) { - if (se_port->sep_rtpi != rtpi) - continue; - dest_se_tpg = se_port->sep_tpg; - if (!dest_se_tpg) + list_for_each_entry(tmp_lun, &dev->dev_sep_list, lun_dev_link) { + if (tmp_lun->lun_rtpi != rtpi) continue; + dest_se_tpg = tmp_lun->lun_tpg; dest_tf_ops = dest_se_tpg->se_tpg_tfo; if (!dest_tf_ops) continue; @@ -3230,23 +3244,16 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key, pr_debug("SPC-3 PR REGISTER_AND_MOVE: Extracted Protocol Identifier:" " 0x%02x\n", proto_ident); - if (proto_ident != dest_tf_ops->get_fabric_proto_ident(dest_se_tpg)) { + if (proto_ident != dest_se_tpg->proto_id) { pr_err("SPC-3 PR REGISTER_AND_MOVE: Received" " proto_ident: 0x%02x does not match ident: 0x%02x" " from fabric: %s\n", proto_ident, - dest_tf_ops->get_fabric_proto_ident(dest_se_tpg), + dest_se_tpg->proto_id, dest_tf_ops->get_fabric_name()); ret = TCM_INVALID_PARAMETER_LIST; goto out; } - if (dest_tf_ops->tpg_parse_pr_out_transport_id == NULL) { - pr_err("SPC-3 PR REGISTER_AND_MOVE: Fabric does not" - " containg a valid tpg_parse_pr_out_transport_id" - " function pointer\n"); - ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - goto out; - } - initiator_str = dest_tf_ops->tpg_parse_pr_out_transport_id(dest_se_tpg, + initiator_str = target_parse_pr_out_transport_id(dest_se_tpg, (const char *)&buf[24], &tmp_tid_len, &iport_ptr); if (!initiator_str) { pr_err("SPC-3 PR REGISTER_AND_MOVE: Unable to locate" @@ -3295,12 +3302,12 @@ after_iport_check: /* * Locate the destination struct se_node_acl from the received Transport ID */ - spin_lock_irq(&dest_se_tpg->acl_node_lock); + mutex_lock(&dest_se_tpg->acl_node_mutex); dest_node_acl = __core_tpg_get_initiator_node_acl(dest_se_tpg, initiator_str); if (dest_node_acl) atomic_inc_mb(&dest_node_acl->acl_pr_ref_count); - spin_unlock_irq(&dest_se_tpg->acl_node_lock); + mutex_unlock(&dest_se_tpg->acl_node_mutex); if (!dest_node_acl) { pr_err("Unable to locate %s dest_node_acl for" @@ -3337,14 +3344,14 @@ after_iport_check: if (core_scsi3_lunacl_depend_item(dest_se_deve)) { pr_err("core_scsi3_lunacl_depend_item() failed\n"); - atomic_dec_mb(&dest_se_deve->pr_ref_count); + kref_put(&dest_se_deve->pr_kref, target_pr_kref_release); dest_se_deve = NULL; ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; goto out; } pr_debug("SPC-3 PR REGISTER_AND_MOVE: Located %s node %s LUN" - " ACL for dest_se_deve->mapped_lun: %u\n", + " ACL for dest_se_deve->mapped_lun: %llu\n", dest_tf_ops->get_fabric_name(), dest_node_acl->initiatorname, dest_se_deve->mapped_lun); @@ -3421,13 +3428,17 @@ after_iport_check: dest_pr_reg = __core_scsi3_locate_pr_reg(dev, dest_node_acl, iport_ptr); if (!dest_pr_reg) { - if (core_scsi3_alloc_registration(cmd->se_dev, - dest_node_acl, dest_se_deve, iport_ptr, - sa_res_key, 0, aptpl, 2, 1)) { - spin_unlock(&dev->dev_reservation_lock); + struct se_lun *dest_lun = rcu_dereference_check(dest_se_deve->se_lun, + atomic_read(&dest_se_deve->pr_kref.refcount) != 0); + + spin_unlock(&dev->dev_reservation_lock); + if (core_scsi3_alloc_registration(cmd->se_dev, dest_node_acl, + dest_lun, dest_se_deve, dest_se_deve->mapped_lun, + iport_ptr, sa_res_key, 0, aptpl, 2, 1)) { ret = TCM_INVALID_PARAMETER_LIST; goto out; } + spin_lock(&dev->dev_reservation_lock); dest_pr_reg = __core_scsi3_locate_pr_reg(dev, dest_node_acl, iport_ptr); new_reg = 1; @@ -3883,9 +3894,10 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd) struct t10_pr_registration *pr_reg, *pr_reg_tmp; struct t10_reservation *pr_tmpl = &dev->t10_pr; unsigned char *buf; - u32 add_desc_len = 0, add_len = 0, desc_len, exp_desc_len; + u32 add_desc_len = 0, add_len = 0; u32 off = 8; /* off into first Full Status descriptor */ int format_code = 0, pr_res_type = 0, pr_res_scope = 0; + int exp_desc_len, desc_len; bool all_reg = false; if (cmd->data_length < 8) { @@ -3930,10 +3942,10 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd) * Determine expected length of $FABRIC_MOD specific * TransportID full status descriptor.. */ - exp_desc_len = se_tpg->se_tpg_tfo->tpg_get_pr_transport_id_len( - se_tpg, se_nacl, pr_reg, &format_code); - - if ((exp_desc_len + add_len) > cmd->data_length) { + exp_desc_len = target_get_pr_transport_id_len(se_nacl, pr_reg, + &format_code); + if (exp_desc_len < 0 || + exp_desc_len + add_len > cmd->data_length) { pr_warn("SPC-3 PRIN READ_FULL_STATUS ran" " out of buffer: %d\n", cmd->data_length); spin_lock(&pr_tmpl->registration_lock); @@ -3990,21 +4002,26 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd) * IDENTIFIER field are not defined by this standard. */ if (!pr_reg->pr_reg_all_tg_pt) { - struct se_port *port = pr_reg->pr_reg_tg_pt_lun->lun_sep; + u16 sep_rtpi = pr_reg->tg_pt_sep_rtpi; - buf[off++] = ((port->sep_rtpi >> 8) & 0xff); - buf[off++] = (port->sep_rtpi & 0xff); + buf[off++] = ((sep_rtpi >> 8) & 0xff); + buf[off++] = (sep_rtpi & 0xff); } else off += 2; /* Skip over RELATIVE TARGET PORT IDENTIFIER */ + buf[off+4] = se_tpg->proto_id; + /* - * Now, have the $FABRIC_MOD fill in the protocol identifier + * Now, have the $FABRIC_MOD fill in the transport ID. */ - desc_len = se_tpg->se_tpg_tfo->tpg_get_pr_transport_id(se_tpg, - se_nacl, pr_reg, &format_code, &buf[off+4]); + desc_len = target_get_pr_transport_id(se_nacl, pr_reg, + &format_code, &buf[off+4]); spin_lock(&pr_tmpl->registration_lock); atomic_dec_mb(&pr_reg->pr_res_holders); + + if (desc_len < 0) + break; /* * Set the ADDITIONAL DESCRIPTOR LENGTH */ diff --git a/drivers/target/target_core_pr.h b/drivers/target/target_core_pr.h index 749fd7bb7510..e3d26e9126a0 100644 --- a/drivers/target/target_core_pr.h +++ b/drivers/target/target_core_pr.h @@ -56,11 +56,11 @@ extern sense_reason_t target_scsi2_reservation_release(struct se_cmd *); extern sense_reason_t target_scsi2_reservation_reserve(struct se_cmd *); extern int core_scsi3_alloc_aptpl_registration( struct t10_reservation *, u64, - unsigned char *, unsigned char *, u32, - unsigned char *, u16, u32, int, int, u8); + unsigned char *, unsigned char *, u64, + unsigned char *, u16, u64, int, int, u8); extern int core_scsi3_check_aptpl_registration(struct se_device *, struct se_portal_group *, struct se_lun *, - struct se_node_acl *, u32); + struct se_node_acl *, u64); extern void core_scsi3_free_pr_reg_from_nacl(struct se_device *, struct se_node_acl *); extern void core_scsi3_free_all_registrations(struct se_device *); diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 26581e215141..08e9084ee615 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -42,9 +42,9 @@ #include <target/target_core_base.h> #include <target/target_core_backend.h> -#include <target/target_core_backend_configfs.h> #include "target_core_alua.h" +#include "target_core_internal.h" #include "target_core_pscsi.h" #define ISPRINT(a) ((a >= ' ') && (a <= '~')) @@ -54,8 +54,6 @@ static inline struct pscsi_dev_virt *PSCSI_DEV(struct se_device *dev) return container_of(dev, struct pscsi_dev_virt, dev); } -static struct se_subsystem_api pscsi_template; - static sense_reason_t pscsi_execute_cmd(struct se_cmd *cmd); static void pscsi_req_done(struct request *, int); @@ -80,7 +78,7 @@ static int pscsi_attach_hba(struct se_hba *hba, u32 host_id) pr_debug("CORE_HBA[%d] - TCM SCSI HBA Driver %s on" " Generic Target Core Stack %s\n", hba->hba_id, - PSCSI_VERSION, TARGET_CORE_MOD_VERSION); + PSCSI_VERSION, TARGET_CORE_VERSION); pr_debug("CORE_HBA[%d] - Attached SCSI HBA to Generic\n", hba->hba_id); @@ -579,6 +577,14 @@ static int pscsi_configure_device(struct se_device *dev) return -ENODEV; } +static void pscsi_dev_call_rcu(struct rcu_head *p) +{ + struct se_device *dev = container_of(p, struct se_device, rcu_head); + struct pscsi_dev_virt *pdv = PSCSI_DEV(dev); + + kfree(pdv); +} + static void pscsi_free_device(struct se_device *dev) { struct pscsi_dev_virt *pdv = PSCSI_DEV(dev); @@ -610,8 +616,7 @@ static void pscsi_free_device(struct se_device *dev) pdv->pdv_sd = NULL; } - - kfree(pdv); + call_rcu(&dev->rcu_head, pscsi_dev_call_rcu); } static void pscsi_transport_complete(struct se_cmd *cmd, struct scatterlist *sg, @@ -635,12 +640,14 @@ static void pscsi_transport_complete(struct se_cmd *cmd, struct scatterlist *sg, * Hack to make sure that Write-Protect modepage is set if R/O mode is * forced. */ - if (!cmd->se_deve || !cmd->data_length) + if (!cmd->data_length) goto after_mode_sense; if (((cdb[0] == MODE_SENSE) || (cdb[0] == MODE_SENSE_10)) && (status_byte(result) << 1) == SAM_STAT_GOOD) { - if (cmd->se_deve->lun_flags & TRANSPORT_LUNFLAGS_READ_ONLY) { + bool read_only = target_lun_is_rdonly(cmd); + + if (read_only) { unsigned char *buf; buf = transport_kmap_data_sg(cmd); @@ -1116,27 +1123,7 @@ static void pscsi_req_done(struct request *req, int uptodate) kfree(pt); } -DEF_TB_DEV_ATTRIB_RO(pscsi, hw_pi_prot_type); -TB_DEV_ATTR_RO(pscsi, hw_pi_prot_type); - -DEF_TB_DEV_ATTRIB_RO(pscsi, hw_block_size); -TB_DEV_ATTR_RO(pscsi, hw_block_size); - -DEF_TB_DEV_ATTRIB_RO(pscsi, hw_max_sectors); -TB_DEV_ATTR_RO(pscsi, hw_max_sectors); - -DEF_TB_DEV_ATTRIB_RO(pscsi, hw_queue_depth); -TB_DEV_ATTR_RO(pscsi, hw_queue_depth); - -static struct configfs_attribute *pscsi_backend_dev_attrs[] = { - &pscsi_dev_attrib_hw_pi_prot_type.attr, - &pscsi_dev_attrib_hw_block_size.attr, - &pscsi_dev_attrib_hw_max_sectors.attr, - &pscsi_dev_attrib_hw_queue_depth.attr, - NULL, -}; - -static struct se_subsystem_api pscsi_template = { +static const struct target_backend_ops pscsi_ops = { .name = "pscsi", .owner = THIS_MODULE, .transport_flags = TRANSPORT_FLAG_PASSTHROUGH, @@ -1152,21 +1139,17 @@ static struct se_subsystem_api pscsi_template = { .show_configfs_dev_params = pscsi_show_configfs_dev_params, .get_device_type = pscsi_get_device_type, .get_blocks = pscsi_get_blocks, + .tb_dev_attrib_attrs = passthrough_attrib_attrs, }; static int __init pscsi_module_init(void) { - struct target_backend_cits *tbc = &pscsi_template.tb_cits; - - target_core_setup_sub_cits(&pscsi_template); - tbc->tb_dev_attrib_cit.ct_attrs = pscsi_backend_dev_attrs; - - return transport_subsystem_register(&pscsi_template); + return transport_backend_register(&pscsi_ops); } static void __exit pscsi_module_exit(void) { - transport_subsystem_release(&pscsi_template); + target_backend_unregister(&pscsi_ops); } MODULE_DESCRIPTION("TCM PSCSI subsystem plugin"); diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c index b2d8f6f91633..4703f403f31c 100644 --- a/drivers/target/target_core_rd.c +++ b/drivers/target/target_core_rd.c @@ -33,7 +33,6 @@ #include <target/target_core_base.h> #include <target/target_core_backend.h> -#include <target/target_core_backend_configfs.h> #include "target_core_rd.h" @@ -42,10 +41,6 @@ static inline struct rd_dev *RD_DEV(struct se_device *dev) return container_of(dev, struct rd_dev, dev); } -/* rd_attach_hba(): (Part of se_subsystem_api_t template) - * - * - */ static int rd_attach_hba(struct se_hba *hba, u32 host_id) { struct rd_host *rd_host; @@ -62,7 +57,7 @@ static int rd_attach_hba(struct se_hba *hba, u32 host_id) pr_debug("CORE_HBA[%d] - TCM Ramdisk HBA Driver %s on" " Generic Target Core Stack %s\n", hba->hba_id, - RD_HBA_VERSION, TARGET_CORE_MOD_VERSION); + RD_HBA_VERSION, TARGET_CORE_VERSION); return 0; } @@ -354,12 +349,20 @@ fail: return ret; } +static void rd_dev_call_rcu(struct rcu_head *p) +{ + struct se_device *dev = container_of(p, struct se_device, rcu_head); + struct rd_dev *rd_dev = RD_DEV(dev); + + kfree(rd_dev); +} + static void rd_free_device(struct se_device *dev) { struct rd_dev *rd_dev = RD_DEV(dev); rd_release_device_space(rd_dev); - kfree(rd_dev); + call_rcu(&dev->rcu_head, rd_dev_call_rcu); } static struct rd_dev_sg_table *rd_get_sg_table(struct rd_dev *rd_dev, u32 page) @@ -402,10 +405,7 @@ static struct rd_dev_sg_table *rd_get_prot_table(struct rd_dev *rd_dev, u32 page return NULL; } -typedef sense_reason_t (*dif_verify)(struct se_cmd *, sector_t, unsigned int, - unsigned int, struct scatterlist *, int); - -static sense_reason_t rd_do_prot_rw(struct se_cmd *cmd, dif_verify dif_verify) +static sense_reason_t rd_do_prot_rw(struct se_cmd *cmd, bool is_read) { struct se_device *se_dev = cmd->se_dev; struct rd_dev *dev = RD_DEV(se_dev); @@ -465,7 +465,16 @@ static sense_reason_t rd_do_prot_rw(struct se_cmd *cmd, dif_verify dif_verify) #endif /* !CONFIG_ARCH_HAS_SG_CHAIN */ - rc = dif_verify(cmd, cmd->t_task_lba, sectors, 0, prot_sg, prot_offset); + if (is_read) + rc = sbc_dif_verify(cmd, cmd->t_task_lba, sectors, 0, + prot_sg, prot_offset); + else + rc = sbc_dif_verify(cmd, cmd->t_task_lba, sectors, 0, + cmd->t_prot_sg, 0); + + if (!rc) + sbc_dif_copy_prot(cmd, sectors, is_read, prot_sg, prot_offset); + if (need_to_release) kfree(prot_sg); @@ -511,7 +520,7 @@ rd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, if (cmd->prot_type && se_dev->dev_attrib.pi_prot_type && data_direction == DMA_TO_DEVICE) { - rc = rd_do_prot_rw(cmd, sbc_dif_verify_write); + rc = rd_do_prot_rw(cmd, false); if (rc) return rc; } @@ -579,7 +588,7 @@ rd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, if (cmd->prot_type && se_dev->dev_attrib.pi_prot_type && data_direction == DMA_FROM_DEVICE) { - rc = rd_do_prot_rw(cmd, sbc_dif_verify_read); + rc = rd_do_prot_rw(cmd, true); if (rc) return rc; } @@ -693,42 +702,7 @@ rd_parse_cdb(struct se_cmd *cmd) return sbc_parse_cdb(cmd, &rd_sbc_ops); } -DEF_TB_DEFAULT_ATTRIBS(rd_mcp); - -static struct configfs_attribute *rd_mcp_backend_dev_attrs[] = { - &rd_mcp_dev_attrib_emulate_model_alias.attr, - &rd_mcp_dev_attrib_emulate_dpo.attr, - &rd_mcp_dev_attrib_emulate_fua_write.attr, - &rd_mcp_dev_attrib_emulate_fua_read.attr, - &rd_mcp_dev_attrib_emulate_write_cache.attr, - &rd_mcp_dev_attrib_emulate_ua_intlck_ctrl.attr, - &rd_mcp_dev_attrib_emulate_tas.attr, - &rd_mcp_dev_attrib_emulate_tpu.attr, - &rd_mcp_dev_attrib_emulate_tpws.attr, - &rd_mcp_dev_attrib_emulate_caw.attr, - &rd_mcp_dev_attrib_emulate_3pc.attr, - &rd_mcp_dev_attrib_pi_prot_type.attr, - &rd_mcp_dev_attrib_hw_pi_prot_type.attr, - &rd_mcp_dev_attrib_pi_prot_format.attr, - &rd_mcp_dev_attrib_enforce_pr_isids.attr, - &rd_mcp_dev_attrib_is_nonrot.attr, - &rd_mcp_dev_attrib_emulate_rest_reord.attr, - &rd_mcp_dev_attrib_force_pr_aptpl.attr, - &rd_mcp_dev_attrib_hw_block_size.attr, - &rd_mcp_dev_attrib_block_size.attr, - &rd_mcp_dev_attrib_hw_max_sectors.attr, - &rd_mcp_dev_attrib_optimal_sectors.attr, - &rd_mcp_dev_attrib_hw_queue_depth.attr, - &rd_mcp_dev_attrib_queue_depth.attr, - &rd_mcp_dev_attrib_max_unmap_lba_count.attr, - &rd_mcp_dev_attrib_max_unmap_block_desc_count.attr, - &rd_mcp_dev_attrib_unmap_granularity.attr, - &rd_mcp_dev_attrib_unmap_granularity_alignment.attr, - &rd_mcp_dev_attrib_max_write_same_len.attr, - NULL, -}; - -static struct se_subsystem_api rd_mcp_template = { +static const struct target_backend_ops rd_mcp_ops = { .name = "rd_mcp", .inquiry_prod = "RAMDISK-MCP", .inquiry_rev = RD_MCP_VERSION, @@ -744,25 +718,15 @@ static struct se_subsystem_api rd_mcp_template = { .get_blocks = rd_get_blocks, .init_prot = rd_init_prot, .free_prot = rd_free_prot, + .tb_dev_attrib_attrs = sbc_attrib_attrs, }; int __init rd_module_init(void) { - struct target_backend_cits *tbc = &rd_mcp_template.tb_cits; - int ret; - - target_core_setup_sub_cits(&rd_mcp_template); - tbc->tb_dev_attrib_cit.ct_attrs = rd_mcp_backend_dev_attrs; - - ret = transport_subsystem_register(&rd_mcp_template); - if (ret < 0) { - return ret; - } - - return 0; + return transport_backend_register(&rd_mcp_ops); } void rd_module_exit(void) { - transport_subsystem_release(&rd_mcp_template); + target_backend_unregister(&rd_mcp_ops); } diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 43719b393ca9..e318ddbe15da 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -38,6 +38,7 @@ static sense_reason_t sbc_check_prot(struct se_device *, struct se_cmd *, unsigned char *, u32, bool); +static sense_reason_t sbc_execute_unmap(struct se_cmd *cmd); static sense_reason_t sbc_emulate_readcapacity(struct se_cmd *cmd) @@ -177,6 +178,23 @@ sector_t sbc_get_write_same_sectors(struct se_cmd *cmd) EXPORT_SYMBOL(sbc_get_write_same_sectors); static sense_reason_t +sbc_execute_write_same_unmap(struct se_cmd *cmd) +{ + struct sbc_ops *ops = cmd->protocol_data; + sector_t nolb = sbc_get_write_same_sectors(cmd); + sense_reason_t ret; + + if (nolb) { + ret = ops->execute_unmap(cmd, cmd->t_task_lba, nolb); + if (ret) + return ret; + } + + target_complete_cmd(cmd, GOOD); + return 0; +} + +static sense_reason_t sbc_emulate_noop(struct se_cmd *cmd) { target_complete_cmd(cmd, GOOD); @@ -299,7 +317,7 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o * translated into block discard requests within backend code. */ if (flags[0] & 0x08) { - if (!ops->execute_write_same_unmap) + if (!ops->execute_unmap) return TCM_UNSUPPORTED_SCSI_OPCODE; if (!dev->dev_attrib.emulate_tpws) { @@ -307,7 +325,7 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o " has emulate_tpws disabled\n"); return TCM_UNSUPPORTED_SCSI_OPCODE; } - cmd->execute_cmd = ops->execute_write_same_unmap; + cmd->execute_cmd = sbc_execute_write_same_unmap; return 0; } if (!ops->execute_write_same) @@ -381,7 +399,9 @@ out: static sense_reason_t sbc_execute_rw(struct se_cmd *cmd) { - return cmd->execute_rw(cmd, cmd->t_data_sg, cmd->t_data_nents, + struct sbc_ops *ops = cmd->protocol_data; + + return ops->execute_rw(cmd, cmd->t_data_sg, cmd->t_data_nents, cmd->data_direction); } @@ -560,6 +580,7 @@ out: static sense_reason_t sbc_compare_and_write(struct se_cmd *cmd) { + struct sbc_ops *ops = cmd->protocol_data; struct se_device *dev = cmd->se_dev; sense_reason_t ret; int rc; @@ -579,7 +600,7 @@ sbc_compare_and_write(struct se_cmd *cmd) */ cmd->data_length = cmd->t_task_nolb * dev->dev_attrib.block_size; - ret = cmd->execute_rw(cmd, cmd->t_bidi_data_sg, cmd->t_bidi_data_nents, + ret = ops->execute_rw(cmd, cmd->t_bidi_data_sg, cmd->t_bidi_data_nents, DMA_FROM_DEVICE); if (ret) { cmd->transport_complete_callback = NULL; @@ -738,14 +759,15 @@ static int sbc_check_dpofua(struct se_device *dev, struct se_cmd *cmd, unsigned char *cdb) { if (cdb[1] & 0x10) { - if (!dev->dev_attrib.emulate_dpo) { + /* see explanation in spc_emulate_modesense */ + if (!target_check_fua(dev)) { pr_err("Got CDB: 0x%02x with DPO bit set, but device" " does not advertise support for DPO\n", cdb[0]); return -EINVAL; } } if (cdb[1] & 0x8) { - if (!dev->dev_attrib.emulate_fua_write || !se_dev_check_wce(dev)) { + if (!target_check_fua(dev)) { pr_err("Got CDB: 0x%02x with FUA bit set, but device" " does not advertise support for FUA write\n", cdb[0]); @@ -765,12 +787,13 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) u32 sectors = 0; sense_reason_t ret; + cmd->protocol_data = ops; + switch (cdb[0]) { case READ_6: sectors = transport_get_sectors_6(cdb); cmd->t_task_lba = transport_lba_21(cdb); cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_rw = ops->execute_rw; cmd->execute_cmd = sbc_execute_rw; break; case READ_10: @@ -785,7 +808,6 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) return ret; cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_rw = ops->execute_rw; cmd->execute_cmd = sbc_execute_rw; break; case READ_12: @@ -800,7 +822,6 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) return ret; cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_rw = ops->execute_rw; cmd->execute_cmd = sbc_execute_rw; break; case READ_16: @@ -815,14 +836,12 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) return ret; cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_rw = ops->execute_rw; cmd->execute_cmd = sbc_execute_rw; break; case WRITE_6: sectors = transport_get_sectors_6(cdb); cmd->t_task_lba = transport_lba_21(cdb); cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_rw = ops->execute_rw; cmd->execute_cmd = sbc_execute_rw; break; case WRITE_10: @@ -838,7 +857,6 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) return ret; cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_rw = ops->execute_rw; cmd->execute_cmd = sbc_execute_rw; break; case WRITE_12: @@ -853,7 +871,6 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) return ret; cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_rw = ops->execute_rw; cmd->execute_cmd = sbc_execute_rw; break; case WRITE_16: @@ -868,7 +885,6 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) return ret; cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_rw = ops->execute_rw; cmd->execute_cmd = sbc_execute_rw; break; case XDWRITEREAD_10: @@ -886,7 +902,6 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) /* * Setup BIDI XOR callback to be run after I/O completion. */ - cmd->execute_rw = ops->execute_rw; cmd->execute_cmd = sbc_execute_rw; cmd->transport_complete_callback = &xdreadwrite_callback; break; @@ -910,7 +925,6 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) * Setup BIDI XOR callback to be run during after I/O * completion. */ - cmd->execute_rw = ops->execute_rw; cmd->execute_cmd = sbc_execute_rw; cmd->transport_complete_callback = &xdreadwrite_callback; break; @@ -954,7 +968,6 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) cmd->t_task_lba = get_unaligned_be64(&cdb[2]); cmd->t_task_nolb = sectors; cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB | SCF_COMPARE_AND_WRITE; - cmd->execute_rw = ops->execute_rw; cmd->execute_cmd = sbc_compare_and_write; cmd->transport_complete_callback = compare_and_write_callback; break; @@ -1004,7 +1017,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) return TCM_UNSUPPORTED_SCSI_OPCODE; } size = get_unaligned_be16(&cdb[7]); - cmd->execute_cmd = ops->execute_unmap; + cmd->execute_cmd = sbc_execute_unmap; break; case WRITE_SAME_16: sectors = transport_get_sectors_16(cdb); @@ -1092,12 +1105,10 @@ u32 sbc_get_device_type(struct se_device *dev) } EXPORT_SYMBOL(sbc_get_device_type); -sense_reason_t -sbc_execute_unmap(struct se_cmd *cmd, - sense_reason_t (*do_unmap_fn)(struct se_cmd *, void *, - sector_t, sector_t), - void *priv) +static sense_reason_t +sbc_execute_unmap(struct se_cmd *cmd) { + struct sbc_ops *ops = cmd->protocol_data; struct se_device *dev = cmd->se_dev; unsigned char *buf, *ptr = NULL; sector_t lba; @@ -1161,7 +1172,7 @@ sbc_execute_unmap(struct se_cmd *cmd, goto err; } - ret = do_unmap_fn(cmd, priv, lba, range); + ret = ops->execute_unmap(cmd, lba, range); if (ret) goto err; @@ -1175,34 +1186,56 @@ err: target_complete_cmd(cmd, GOOD); return ret; } -EXPORT_SYMBOL(sbc_execute_unmap); void sbc_dif_generate(struct se_cmd *cmd) { struct se_device *dev = cmd->se_dev; struct se_dif_v1_tuple *sdt; - struct scatterlist *dsg, *psg = cmd->t_prot_sg; + struct scatterlist *dsg = cmd->t_data_sg, *psg; sector_t sector = cmd->t_task_lba; void *daddr, *paddr; int i, j, offset = 0; + unsigned int block_size = dev->dev_attrib.block_size; - for_each_sg(cmd->t_data_sg, dsg, cmd->t_data_nents, i) { - daddr = kmap_atomic(sg_page(dsg)) + dsg->offset; + for_each_sg(cmd->t_prot_sg, psg, cmd->t_prot_nents, i) { paddr = kmap_atomic(sg_page(psg)) + psg->offset; + daddr = kmap_atomic(sg_page(dsg)) + dsg->offset; - for (j = 0; j < dsg->length; j += dev->dev_attrib.block_size) { + for (j = 0; j < psg->length; + j += sizeof(struct se_dif_v1_tuple)) { + __u16 crc; + unsigned int avail; + + if (offset >= dsg->length) { + offset -= dsg->length; + kunmap_atomic(daddr - dsg->offset); + dsg = sg_next(dsg); + if (!dsg) { + kunmap_atomic(paddr - psg->offset); + return; + } + daddr = kmap_atomic(sg_page(dsg)) + dsg->offset; + } - if (offset >= psg->length) { - kunmap_atomic(paddr); - psg = sg_next(psg); - paddr = kmap_atomic(sg_page(psg)) + psg->offset; - offset = 0; + sdt = paddr + j; + avail = min(block_size, dsg->length - offset); + crc = crc_t10dif(daddr + offset, avail); + if (avail < block_size) { + kunmap_atomic(daddr - dsg->offset); + dsg = sg_next(dsg); + if (!dsg) { + kunmap_atomic(paddr - psg->offset); + return; + } + daddr = kmap_atomic(sg_page(dsg)) + dsg->offset; + offset = block_size - avail; + crc = crc_t10dif_update(crc, daddr, offset); + } else { + offset += block_size; } - sdt = paddr + offset; - sdt->guard_tag = cpu_to_be16(crc_t10dif(daddr + j, - dev->dev_attrib.block_size)); + sdt->guard_tag = cpu_to_be16(crc); if (cmd->prot_type == TARGET_DIF_TYPE1_PROT) sdt->ref_tag = cpu_to_be32(sector & 0xffffffff); sdt->app_tag = 0; @@ -1215,26 +1248,23 @@ sbc_dif_generate(struct se_cmd *cmd) be32_to_cpu(sdt->ref_tag)); sector++; - offset += sizeof(struct se_dif_v1_tuple); } - kunmap_atomic(paddr); - kunmap_atomic(daddr); + kunmap_atomic(daddr - dsg->offset); + kunmap_atomic(paddr - psg->offset); } } static sense_reason_t sbc_dif_v1_verify(struct se_cmd *cmd, struct se_dif_v1_tuple *sdt, - const void *p, sector_t sector, unsigned int ei_lba) + __u16 crc, sector_t sector, unsigned int ei_lba) { - struct se_device *dev = cmd->se_dev; - int block_size = dev->dev_attrib.block_size; __be16 csum; if (!(cmd->prot_checks & TARGET_DIF_CHECK_GUARD)) goto check_ref; - csum = cpu_to_be16(crc_t10dif(p, block_size)); + csum = cpu_to_be16(crc); if (sdt->guard_tag != csum) { pr_err("DIFv1 checksum failed on sector %llu guard tag 0x%04x" @@ -1266,9 +1296,8 @@ check_ref: return 0; } -static void -sbc_dif_copy_prot(struct se_cmd *cmd, unsigned int sectors, bool read, - struct scatterlist *sg, int sg_off) +void sbc_dif_copy_prot(struct se_cmd *cmd, unsigned int sectors, bool read, + struct scatterlist *sg, int sg_off) { struct se_device *dev = cmd->se_dev; struct scatterlist *psg; @@ -1300,100 +1329,54 @@ sbc_dif_copy_prot(struct se_cmd *cmd, unsigned int sectors, bool read, copied += len; psg_len -= len; + kunmap_atomic(addr - sg->offset - offset); + if (offset >= sg->length) { sg = sg_next(sg); offset = 0; } - kunmap_atomic(addr); } - kunmap_atomic(paddr); + kunmap_atomic(paddr - psg->offset); } } +EXPORT_SYMBOL(sbc_dif_copy_prot); sense_reason_t -sbc_dif_verify_write(struct se_cmd *cmd, sector_t start, unsigned int sectors, - unsigned int ei_lba, struct scatterlist *sg, int sg_off) +sbc_dif_verify(struct se_cmd *cmd, sector_t start, unsigned int sectors, + unsigned int ei_lba, struct scatterlist *psg, int psg_off) { struct se_device *dev = cmd->se_dev; struct se_dif_v1_tuple *sdt; - struct scatterlist *dsg, *psg = cmd->t_prot_sg; + struct scatterlist *dsg = cmd->t_data_sg; sector_t sector = start; void *daddr, *paddr; - int i, j, offset = 0; + int i; sense_reason_t rc; + int dsg_off = 0; + unsigned int block_size = dev->dev_attrib.block_size; - for_each_sg(cmd->t_data_sg, dsg, cmd->t_data_nents, i) { - daddr = kmap_atomic(sg_page(dsg)) + dsg->offset; + for (; psg && sector < start + sectors; psg = sg_next(psg)) { paddr = kmap_atomic(sg_page(psg)) + psg->offset; - - for (j = 0; j < dsg->length; j += dev->dev_attrib.block_size) { - - if (offset >= psg->length) { - kunmap_atomic(paddr); - psg = sg_next(psg); - paddr = kmap_atomic(sg_page(psg)) + psg->offset; - offset = 0; - } - - sdt = paddr + offset; - - pr_debug("DIF WRITE sector: %llu guard_tag: 0x%04x" - " app_tag: 0x%04x ref_tag: %u\n", - (unsigned long long)sector, sdt->guard_tag, - sdt->app_tag, be32_to_cpu(sdt->ref_tag)); - - rc = sbc_dif_v1_verify(cmd, sdt, daddr + j, sector, - ei_lba); - if (rc) { - kunmap_atomic(paddr); - kunmap_atomic(daddr); - cmd->bad_sector = sector; - return rc; - } - - sector++; - ei_lba++; - offset += sizeof(struct se_dif_v1_tuple); - } - - kunmap_atomic(paddr); - kunmap_atomic(daddr); - } - if (!sg) - return 0; - - sbc_dif_copy_prot(cmd, sectors, false, sg, sg_off); - - return 0; -} -EXPORT_SYMBOL(sbc_dif_verify_write); - -static sense_reason_t -__sbc_dif_verify_read(struct se_cmd *cmd, sector_t start, unsigned int sectors, - unsigned int ei_lba, struct scatterlist *sg, int sg_off) -{ - struct se_device *dev = cmd->se_dev; - struct se_dif_v1_tuple *sdt; - struct scatterlist *dsg, *psg = sg; - sector_t sector = start; - void *daddr, *paddr; - int i, j, offset = sg_off; - sense_reason_t rc; - - for_each_sg(cmd->t_data_sg, dsg, cmd->t_data_nents, i) { daddr = kmap_atomic(sg_page(dsg)) + dsg->offset; - paddr = kmap_atomic(sg_page(psg)) + sg->offset; - - for (j = 0; j < dsg->length; j += dev->dev_attrib.block_size) { - if (offset >= psg->length) { - kunmap_atomic(paddr); - psg = sg_next(psg); - paddr = kmap_atomic(sg_page(psg)) + psg->offset; - offset = 0; + for (i = psg_off; i < psg->length && + sector < start + sectors; + i += sizeof(struct se_dif_v1_tuple)) { + __u16 crc; + unsigned int avail; + + if (dsg_off >= dsg->length) { + dsg_off -= dsg->length; + kunmap_atomic(daddr - dsg->offset); + dsg = sg_next(dsg); + if (!dsg) { + kunmap_atomic(paddr - psg->offset); + return 0; + } + daddr = kmap_atomic(sg_page(dsg)) + dsg->offset; } - sdt = paddr + offset; + sdt = paddr + i; pr_debug("DIF READ sector: %llu guard_tag: 0x%04x" " app_tag: 0x%04x ref_tag: %u\n", @@ -1401,53 +1384,43 @@ __sbc_dif_verify_read(struct se_cmd *cmd, sector_t start, unsigned int sectors, sdt->app_tag, be32_to_cpu(sdt->ref_tag)); if (sdt->app_tag == cpu_to_be16(0xffff)) { - sector++; - offset += sizeof(struct se_dif_v1_tuple); - continue; + dsg_off += block_size; + goto next; + } + + avail = min(block_size, dsg->length - dsg_off); + crc = crc_t10dif(daddr + dsg_off, avail); + if (avail < block_size) { + kunmap_atomic(daddr - dsg->offset); + dsg = sg_next(dsg); + if (!dsg) { + kunmap_atomic(paddr - psg->offset); + return 0; + } + daddr = kmap_atomic(sg_page(dsg)) + dsg->offset; + dsg_off = block_size - avail; + crc = crc_t10dif_update(crc, daddr, dsg_off); + } else { + dsg_off += block_size; } - rc = sbc_dif_v1_verify(cmd, sdt, daddr + j, sector, - ei_lba); + rc = sbc_dif_v1_verify(cmd, sdt, crc, sector, ei_lba); if (rc) { - kunmap_atomic(paddr); - kunmap_atomic(daddr); + kunmap_atomic(daddr - dsg->offset); + kunmap_atomic(paddr - psg->offset); cmd->bad_sector = sector; return rc; } - +next: sector++; ei_lba++; - offset += sizeof(struct se_dif_v1_tuple); } - kunmap_atomic(paddr); - kunmap_atomic(daddr); + psg_off = 0; + kunmap_atomic(daddr - dsg->offset); + kunmap_atomic(paddr - psg->offset); } return 0; } - -sense_reason_t -sbc_dif_read_strip(struct se_cmd *cmd) -{ - struct se_device *dev = cmd->se_dev; - u32 sectors = cmd->prot_length / dev->prot_length; - - return __sbc_dif_verify_read(cmd, cmd->t_task_lba, sectors, 0, - cmd->t_prot_sg, 0); -} - -sense_reason_t -sbc_dif_verify_read(struct se_cmd *cmd, sector_t start, unsigned int sectors, - unsigned int ei_lba, struct scatterlist *sg, int sg_off) -{ - sense_reason_t rc; - - rc = __sbc_dif_verify_read(cmd, start, sectors, ei_lba, sg, sg_off); - if (rc) - return rc; - - sbc_dif_copy_prot(cmd, sectors, true, sg, sg_off); - return 0; -} -EXPORT_SYMBOL(sbc_dif_verify_read); +EXPORT_SYMBOL(sbc_dif_verify); diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index 52ea640274f4..b0744433315a 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -38,10 +38,9 @@ #include "target_core_ua.h" #include "target_core_xcopy.h" -static void spc_fill_alua_data(struct se_port *port, unsigned char *buf) +static void spc_fill_alua_data(struct se_lun *lun, unsigned char *buf) { struct t10_alua_tg_pt_gp *tg_pt_gp; - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem; /* * Set SCCS for MAINTENANCE_IN + REPORT_TARGET_PORT_GROUPS. @@ -54,17 +53,11 @@ static void spc_fill_alua_data(struct se_port *port, unsigned char *buf) * * See spc4r17 section 6.4.2 Table 135 */ - if (!port) - return; - tg_pt_gp_mem = port->sep_alua_tg_pt_gp_mem; - if (!tg_pt_gp_mem) - return; - - spin_lock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); - tg_pt_gp = tg_pt_gp_mem->tg_pt_gp; + spin_lock(&lun->lun_tg_pt_gp_lock); + tg_pt_gp = lun->lun_tg_pt_gp; if (tg_pt_gp) buf[5] |= tg_pt_gp->tg_pt_gp_alua_access_type; - spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); + spin_unlock(&lun->lun_tg_pt_gp_lock); } sense_reason_t @@ -95,7 +88,7 @@ spc_emulate_inquiry_std(struct se_cmd *cmd, unsigned char *buf) /* * Enable SCCS and TPGS fields for Emulated ALUA */ - spc_fill_alua_data(lun->lun_sep, buf); + spc_fill_alua_data(lun, buf); /* * Set Third-Party Copy (3PC) bit to indicate support for EXTENDED_COPY @@ -182,11 +175,9 @@ spc_emulate_evpd_83(struct se_cmd *cmd, unsigned char *buf) { struct se_device *dev = cmd->se_dev; struct se_lun *lun = cmd->se_lun; - struct se_port *port = NULL; struct se_portal_group *tpg = NULL; struct t10_alua_lu_gp_member *lu_gp_mem; struct t10_alua_tg_pt_gp *tg_pt_gp; - struct t10_alua_tg_pt_gp_member *tg_pt_gp_mem; unsigned char *prod = &dev->t10_wwn.model[0]; u32 prod_len; u32 unit_serial_len, off = 0; @@ -268,18 +259,15 @@ check_t10_vend_desc: /* Header size for Designation descriptor */ len += (id_len + 4); off += (id_len + 4); - /* - * struct se_port is only set for INQUIRY VPD=1 through $FABRIC_MOD - */ - port = lun->lun_sep; - if (port) { + + if (1) { struct t10_alua_lu_gp *lu_gp; u32 padding, scsi_name_len, scsi_target_len; u16 lu_gp_id = 0; u16 tg_pt_gp_id = 0; u16 tpgt; - tpg = port->sep_tpg; + tpg = lun->lun_tpg; /* * Relative target port identifer, see spc4r17 * section 7.7.3.7 @@ -287,8 +275,7 @@ check_t10_vend_desc: * Get the PROTOCOL IDENTIFIER as defined by spc4r17 * section 7.5.1 Table 362 */ - buf[off] = - (tpg->se_tpg_tfo->get_fabric_proto_ident(tpg) << 4); + buf[off] = tpg->proto_id << 4; buf[off++] |= 0x1; /* CODE SET == Binary */ buf[off] = 0x80; /* Set PIV=1 */ /* Set ASSOCIATION == target port: 01b */ @@ -300,8 +287,8 @@ check_t10_vend_desc: /* Skip over Obsolete field in RTPI payload * in Table 472 */ off += 2; - buf[off++] = ((port->sep_rtpi >> 8) & 0xff); - buf[off++] = (port->sep_rtpi & 0xff); + buf[off++] = ((lun->lun_rtpi >> 8) & 0xff); + buf[off++] = (lun->lun_rtpi & 0xff); len += 8; /* Header size + Designation descriptor */ /* * Target port group identifier, see spc4r17 @@ -310,21 +297,16 @@ check_t10_vend_desc: * Get the PROTOCOL IDENTIFIER as defined by spc4r17 * section 7.5.1 Table 362 */ - tg_pt_gp_mem = port->sep_alua_tg_pt_gp_mem; - if (!tg_pt_gp_mem) - goto check_lu_gp; - - spin_lock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); - tg_pt_gp = tg_pt_gp_mem->tg_pt_gp; + spin_lock(&lun->lun_tg_pt_gp_lock); + tg_pt_gp = lun->lun_tg_pt_gp; if (!tg_pt_gp) { - spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); + spin_unlock(&lun->lun_tg_pt_gp_lock); goto check_lu_gp; } tg_pt_gp_id = tg_pt_gp->tg_pt_gp_id; - spin_unlock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); + spin_unlock(&lun->lun_tg_pt_gp_lock); - buf[off] = - (tpg->se_tpg_tfo->get_fabric_proto_ident(tpg) << 4); + buf[off] = tpg->proto_id << 4; buf[off++] |= 0x1; /* CODE SET == Binary */ buf[off] = 0x80; /* Set PIV=1 */ /* Set ASSOCIATION == target port: 01b */ @@ -372,8 +354,7 @@ check_lu_gp: * section 7.5.1 Table 362 */ check_scsi_name: - buf[off] = - (tpg->se_tpg_tfo->get_fabric_proto_ident(tpg) << 4); + buf[off] = tpg->proto_id << 4; buf[off++] |= 0x3; /* CODE SET == UTF-8 */ buf[off] = 0x80; /* Set PIV=1 */ /* Set ASSOCIATION == target port: 01b */ @@ -413,8 +394,7 @@ check_scsi_name: /* * Target device designator */ - buf[off] = - (tpg->se_tpg_tfo->get_fabric_proto_ident(tpg) << 4); + buf[off] = tpg->proto_id << 4; buf[off++] |= 0x3; /* CODE SET == UTF-8 */ buf[off] = 0x80; /* Set PIV=1 */ /* Set ASSOCIATION == target device: 10b */ @@ -482,7 +462,7 @@ spc_emulate_evpd_86(struct se_cmd *cmd, unsigned char *buf) buf[5] = 0x07; /* If WriteCache emulation is enabled, set V_SUP */ - if (se_dev_check_wce(dev)) + if (target_check_wce(dev)) buf[6] = 0x01; /* If an LBA map is present set R_SUP */ spin_lock(&cmd->se_dev->t10_alua.lba_map_lock); @@ -699,7 +679,7 @@ static sense_reason_t spc_emulate_inquiry(struct se_cmd *cmd) { struct se_device *dev = cmd->se_dev; - struct se_portal_group *tpg = cmd->se_lun->lun_sep->sep_tpg; + struct se_portal_group *tpg = cmd->se_lun->lun_tpg; unsigned char *rbuf; unsigned char *cdb = cmd->t_task_cdb; unsigned char *buf; @@ -713,7 +693,7 @@ spc_emulate_inquiry(struct se_cmd *cmd) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } - if (dev == tpg->tpg_virt_lun0.lun_se_dev) + if (dev == rcu_access_pointer(tpg->tpg_virt_lun0->lun_se_dev)) buf[0] = 0x3f; /* Not connected */ else buf[0] = dev->transport->get_device_type(dev); @@ -889,7 +869,7 @@ static int spc_modesense_caching(struct se_cmd *cmd, u8 pc, u8 *p) if (pc == 1) goto out; - if (se_dev_check_wce(dev)) + if (target_check_wce(dev)) p[2] = 0x04; /* Write Cache Enable */ p[12] = 0x20; /* Disabled Read Ahead */ @@ -986,6 +966,7 @@ static sense_reason_t spc_emulate_modesense(struct se_cmd *cmd) int length = 0; int ret; int i; + bool read_only = target_lun_is_rdonly(cmd);; memset(buf, 0, SE_MODE_PAGE_BUF); @@ -996,13 +977,15 @@ static sense_reason_t spc_emulate_modesense(struct se_cmd *cmd) length = ten ? 3 : 2; /* DEVICE-SPECIFIC PARAMETER */ - if ((cmd->se_lun->lun_access & TRANSPORT_LUNFLAGS_READ_ONLY) || - (cmd->se_deve && - (cmd->se_deve->lun_flags & TRANSPORT_LUNFLAGS_READ_ONLY))) + if ((cmd->se_lun->lun_access & TRANSPORT_LUNFLAGS_READ_ONLY) || read_only) spc_modesense_write_protect(&buf[length], type); - if ((se_dev_check_wce(dev)) && - (dev->dev_attrib.emulate_fua_write > 0)) + /* + * SBC only allows us to enable FUA and DPO together. Fortunately + * DPO is explicitly specified as a hint, so a noop is a perfectly + * valid implementation. + */ + if (target_check_fua(dev)) spc_modesense_dpofua(&buf[length], type); ++length; @@ -1212,8 +1195,9 @@ sense_reason_t spc_emulate_report_luns(struct se_cmd *cmd) { struct se_dev_entry *deve; struct se_session *sess = cmd->se_sess; + struct se_node_acl *nacl; unsigned char *buf; - u32 lun_count = 0, offset = 8, i; + u32 lun_count = 0, offset = 8; if (cmd->data_length < 16) { pr_warn("REPORT LUNS allocation length %u too small\n", @@ -1235,12 +1219,10 @@ sense_reason_t spc_emulate_report_luns(struct se_cmd *cmd) lun_count = 1; goto done; } + nacl = sess->se_node_acl; - spin_lock_irq(&sess->se_node_acl->device_list_lock); - for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) { - deve = sess->se_node_acl->device_list[i]; - if (!(deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS)) - continue; + rcu_read_lock(); + hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link) { /* * We determine the correct LUN LIST LENGTH even once we * have reached the initial allocation length. @@ -1253,7 +1235,7 @@ sense_reason_t spc_emulate_report_luns(struct se_cmd *cmd) int_to_scsilun(deve->mapped_lun, (struct scsi_lun *)&buf[offset]); offset += 8; } - spin_unlock_irq(&sess->se_node_acl->device_list_lock); + rcu_read_unlock(); /* * See SPC3 r07, page 159. diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c index 40f6c1378041..20ed5d2e151a 100644 --- a/drivers/target/target_core_stat.c +++ b/drivers/target/target_core_stat.c @@ -37,7 +37,6 @@ #include <target/target_core_base.h> #include <target/target_core_backend.h> #include <target/target_core_fabric.h> -#include <target/target_core_configfs.h> #include <target/configfs_macros.h> #include "target_core_internal.h" @@ -104,7 +103,7 @@ static ssize_t target_stat_scsi_dev_show_attr_ports( struct se_device *dev = container_of(sgrps, struct se_device, dev_stat_grps); - return snprintf(page, PAGE_SIZE, "%u\n", dev->dev_port_count); + return snprintf(page, PAGE_SIZE, "%u\n", dev->export_count); } DEV_STAT_SCSI_DEV_ATTR_RO(ports); @@ -540,20 +539,14 @@ static ssize_t target_stat_scsi_port_show_attr_inst( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_port *sep; - struct se_device *dev = lun->lun_se_dev; - struct se_hba *hba; - ssize_t ret; - - spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; - } - hba = dev->se_hba; - ret = snprintf(page, PAGE_SIZE, "%u\n", hba->hba_index); - spin_unlock(&lun->lun_sep_lock); + struct se_device *dev; + ssize_t ret = -ENODEV; + + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); + if (dev) + ret = snprintf(page, PAGE_SIZE, "%u\n", dev->hba_index); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_PORT_ATTR_RO(inst); @@ -562,18 +555,14 @@ static ssize_t target_stat_scsi_port_show_attr_dev( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_port *sep; - struct se_device *dev = lun->lun_se_dev; - ssize_t ret; - - spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; - } - ret = snprintf(page, PAGE_SIZE, "%u\n", dev->dev_index); - spin_unlock(&lun->lun_sep_lock); + struct se_device *dev; + ssize_t ret = -ENODEV; + + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); + if (dev) + ret = snprintf(page, PAGE_SIZE, "%u\n", dev->dev_index); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_PORT_ATTR_RO(dev); @@ -582,17 +571,14 @@ static ssize_t target_stat_scsi_port_show_attr_indx( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_port *sep; - ssize_t ret; - - spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; - } - ret = snprintf(page, PAGE_SIZE, "%u\n", sep->sep_index); - spin_unlock(&lun->lun_sep_lock); + struct se_device *dev; + ssize_t ret = -ENODEV; + + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); + if (dev) + ret = snprintf(page, PAGE_SIZE, "%u\n", lun->lun_rtpi); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_PORT_ATTR_RO(indx); @@ -601,21 +587,14 @@ static ssize_t target_stat_scsi_port_show_attr_role( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_device *dev = lun->lun_se_dev; - struct se_port *sep; - ssize_t ret; - - if (!dev) - return -ENODEV; - - spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; - } - ret = snprintf(page, PAGE_SIZE, "%s%u\n", "Device", dev->dev_index); - spin_unlock(&lun->lun_sep_lock); + struct se_device *dev; + ssize_t ret = -ENODEV; + + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); + if (dev) + ret = snprintf(page, PAGE_SIZE, "%s%u\n", "Device", dev->dev_index); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_PORT_ATTR_RO(role); @@ -624,18 +603,16 @@ static ssize_t target_stat_scsi_port_show_attr_busy_count( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_port *sep; - ssize_t ret; - - spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; + struct se_device *dev; + ssize_t ret = -ENODEV; + + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); + if (dev) { + /* FIXME: scsiPortBusyStatuses */ + ret = snprintf(page, PAGE_SIZE, "%u\n", 0); } - /* FIXME: scsiPortBusyStatuses */ - ret = snprintf(page, PAGE_SIZE, "%u\n", 0); - spin_unlock(&lun->lun_sep_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_PORT_ATTR_RO(busy_count); @@ -683,20 +660,14 @@ static ssize_t target_stat_scsi_tgt_port_show_attr_inst( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_device *dev = lun->lun_se_dev; - struct se_port *sep; - struct se_hba *hba; - ssize_t ret; - - spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; - } - hba = dev->se_hba; - ret = snprintf(page, PAGE_SIZE, "%u\n", hba->hba_index); - spin_unlock(&lun->lun_sep_lock); + struct se_device *dev; + ssize_t ret = -ENODEV; + + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); + if (dev) + ret = snprintf(page, PAGE_SIZE, "%u\n", dev->hba_index); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_TGT_PORT_ATTR_RO(inst); @@ -705,18 +676,14 @@ static ssize_t target_stat_scsi_tgt_port_show_attr_dev( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_device *dev = lun->lun_se_dev; - struct se_port *sep; - ssize_t ret; - - spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; - } - ret = snprintf(page, PAGE_SIZE, "%u\n", dev->dev_index); - spin_unlock(&lun->lun_sep_lock); + struct se_device *dev; + ssize_t ret = -ENODEV; + + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); + if (dev) + ret = snprintf(page, PAGE_SIZE, "%u\n", dev->dev_index); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_TGT_PORT_ATTR_RO(dev); @@ -725,17 +692,14 @@ static ssize_t target_stat_scsi_tgt_port_show_attr_indx( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_port *sep; - ssize_t ret; - - spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; - } - ret = snprintf(page, PAGE_SIZE, "%u\n", sep->sep_index); - spin_unlock(&lun->lun_sep_lock); + struct se_device *dev; + ssize_t ret = -ENODEV; + + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); + if (dev) + ret = snprintf(page, PAGE_SIZE, "%u\n", lun->lun_rtpi); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_TGT_PORT_ATTR_RO(indx); @@ -744,21 +708,17 @@ static ssize_t target_stat_scsi_tgt_port_show_attr_name( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_port *sep; - struct se_portal_group *tpg; - ssize_t ret; - - spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; - } - tpg = sep->sep_tpg; - - ret = snprintf(page, PAGE_SIZE, "%sPort#%u\n", - tpg->se_tpg_tfo->get_fabric_name(), sep->sep_index); - spin_unlock(&lun->lun_sep_lock); + struct se_portal_group *tpg = lun->lun_tpg; + struct se_device *dev; + ssize_t ret = -ENODEV; + + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); + if (dev) + ret = snprintf(page, PAGE_SIZE, "%sPort#%u\n", + tpg->se_tpg_tfo->get_fabric_name(), + lun->lun_rtpi); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_TGT_PORT_ATTR_RO(name); @@ -767,22 +727,17 @@ static ssize_t target_stat_scsi_tgt_port_show_attr_port_index( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_port *sep; - struct se_portal_group *tpg; - ssize_t ret; - - spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; - } - tpg = sep->sep_tpg; - - ret = snprintf(page, PAGE_SIZE, "%s%s%d\n", - tpg->se_tpg_tfo->tpg_get_wwn(tpg), "+t+", - tpg->se_tpg_tfo->tpg_get_tag(tpg)); - spin_unlock(&lun->lun_sep_lock); + struct se_portal_group *tpg = lun->lun_tpg; + struct se_device *dev; + ssize_t ret = -ENODEV; + + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); + if (dev) + ret = snprintf(page, PAGE_SIZE, "%s%s%d\n", + tpg->se_tpg_tfo->tpg_get_wwn(tpg), "+t+", + tpg->se_tpg_tfo->tpg_get_tag(tpg)); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_TGT_PORT_ATTR_RO(port_index); @@ -791,18 +746,15 @@ static ssize_t target_stat_scsi_tgt_port_show_attr_in_cmds( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_port *sep; - ssize_t ret; - - spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; - } - - ret = snprintf(page, PAGE_SIZE, "%llu\n", sep->sep_stats.cmd_pdus); - spin_unlock(&lun->lun_sep_lock); + struct se_device *dev; + ssize_t ret = -ENODEV; + + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); + if (dev) + ret = snprintf(page, PAGE_SIZE, "%lu\n", + atomic_long_read(&lun->lun_stats.cmd_pdus)); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_TGT_PORT_ATTR_RO(in_cmds); @@ -811,19 +763,15 @@ static ssize_t target_stat_scsi_tgt_port_show_attr_write_mbytes( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_port *sep; - ssize_t ret; - - spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; - } - - ret = snprintf(page, PAGE_SIZE, "%u\n", - (u32)(sep->sep_stats.rx_data_octets >> 20)); - spin_unlock(&lun->lun_sep_lock); + struct se_device *dev; + ssize_t ret = -ENODEV; + + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); + if (dev) + ret = snprintf(page, PAGE_SIZE, "%u\n", + (u32)(atomic_long_read(&lun->lun_stats.rx_data_octets) >> 20)); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_TGT_PORT_ATTR_RO(write_mbytes); @@ -832,19 +780,15 @@ static ssize_t target_stat_scsi_tgt_port_show_attr_read_mbytes( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_port *sep; - ssize_t ret; - - spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; - } - - ret = snprintf(page, PAGE_SIZE, "%u\n", - (u32)(sep->sep_stats.tx_data_octets >> 20)); - spin_unlock(&lun->lun_sep_lock); + struct se_device *dev; + ssize_t ret = -ENODEV; + + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); + if (dev) + ret = snprintf(page, PAGE_SIZE, "%u\n", + (u32)(atomic_long_read(&lun->lun_stats.tx_data_octets) >> 20)); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_TGT_PORT_ATTR_RO(read_mbytes); @@ -853,19 +797,16 @@ static ssize_t target_stat_scsi_tgt_port_show_attr_hs_in_cmds( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_port *sep; - ssize_t ret; - - spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; + struct se_device *dev; + ssize_t ret = -ENODEV; + + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); + if (dev) { + /* FIXME: scsiTgtPortHsInCommands */ + ret = snprintf(page, PAGE_SIZE, "%u\n", 0); } - - /* FIXME: scsiTgtPortHsInCommands */ - ret = snprintf(page, PAGE_SIZE, "%u\n", 0); - spin_unlock(&lun->lun_sep_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_TGT_PORT_ATTR_RO(hs_in_cmds); @@ -919,21 +860,14 @@ static ssize_t target_stat_scsi_transport_show_attr_inst( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_device *dev = lun->lun_se_dev; - struct se_port *sep; - struct se_hba *hba; - ssize_t ret; - - spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; - } - - hba = dev->se_hba; - ret = snprintf(page, PAGE_SIZE, "%u\n", hba->hba_index); - spin_unlock(&lun->lun_sep_lock); + struct se_device *dev; + ssize_t ret = -ENODEV; + + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); + if (dev) + ret = snprintf(page, PAGE_SIZE, "%u\n", dev->hba_index); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_TRANSPORT_ATTR_RO(inst); @@ -942,21 +876,18 @@ static ssize_t target_stat_scsi_transport_show_attr_device( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_port *sep; - struct se_portal_group *tpg; - ssize_t ret; - - spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; + struct se_device *dev; + struct se_portal_group *tpg = lun->lun_tpg; + ssize_t ret = -ENODEV; + + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); + if (dev) { + /* scsiTransportType */ + ret = snprintf(page, PAGE_SIZE, "scsiTransport%s\n", + tpg->se_tpg_tfo->get_fabric_name()); } - tpg = sep->sep_tpg; - /* scsiTransportType */ - ret = snprintf(page, PAGE_SIZE, "scsiTransport%s\n", - tpg->se_tpg_tfo->get_fabric_name()); - spin_unlock(&lun->lun_sep_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_TRANSPORT_ATTR_RO(device); @@ -965,20 +896,16 @@ static ssize_t target_stat_scsi_transport_show_attr_indx( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_port *sep; - struct se_portal_group *tpg; - ssize_t ret; - - spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; - } - tpg = sep->sep_tpg; - ret = snprintf(page, PAGE_SIZE, "%u\n", - tpg->se_tpg_tfo->tpg_get_inst_index(tpg)); - spin_unlock(&lun->lun_sep_lock); + struct se_device *dev; + struct se_portal_group *tpg = lun->lun_tpg; + ssize_t ret = -ENODEV; + + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); + if (dev) + ret = snprintf(page, PAGE_SIZE, "%u\n", + tpg->se_tpg_tfo->tpg_get_inst_index(tpg)); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_TRANSPORT_ATTR_RO(indx); @@ -987,26 +914,22 @@ static ssize_t target_stat_scsi_transport_show_attr_dev_name( struct se_port_stat_grps *pgrps, char *page) { struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps); - struct se_device *dev = lun->lun_se_dev; - struct se_port *sep; - struct se_portal_group *tpg; + struct se_device *dev; + struct se_portal_group *tpg = lun->lun_tpg; struct t10_wwn *wwn; - ssize_t ret; - - spin_lock(&lun->lun_sep_lock); - sep = lun->lun_sep; - if (!sep) { - spin_unlock(&lun->lun_sep_lock); - return -ENODEV; + ssize_t ret = -ENODEV; + + rcu_read_lock(); + dev = rcu_dereference(lun->lun_se_dev); + if (dev) { + wwn = &dev->t10_wwn; + /* scsiTransportDevName */ + ret = snprintf(page, PAGE_SIZE, "%s+%s\n", + tpg->se_tpg_tfo->tpg_get_wwn(tpg), + (strlen(wwn->unit_serial)) ? wwn->unit_serial : + wwn->vendor); } - tpg = sep->sep_tpg; - wwn = &dev->t10_wwn; - /* scsiTransportDevName */ - ret = snprintf(page, PAGE_SIZE, "%s+%s\n", - tpg->se_tpg_tfo->tpg_get_wwn(tpg), - (strlen(wwn->unit_serial)) ? wwn->unit_serial : - wwn->vendor); - spin_unlock(&lun->lun_sep_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_TRANSPORT_ATTR_RO(dev_name); @@ -1082,17 +1005,17 @@ static ssize_t target_stat_scsi_auth_intr_show_attr_inst( struct se_portal_group *tpg; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } tpg = nacl->se_tpg; /* scsiInstIndex */ ret = snprintf(page, PAGE_SIZE, "%u\n", tpg->se_tpg_tfo->tpg_get_inst_index(tpg)); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(inst); @@ -1107,16 +1030,16 @@ static ssize_t target_stat_scsi_auth_intr_show_attr_dev( struct se_lun *lun; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } - lun = deve->se_lun; + lun = rcu_dereference(deve->se_lun); /* scsiDeviceIndex */ - ret = snprintf(page, PAGE_SIZE, "%u\n", lun->lun_se_dev->dev_index); - spin_unlock_irq(&nacl->device_list_lock); + ret = snprintf(page, PAGE_SIZE, "%u\n", lun->lun_index); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(dev); @@ -1131,16 +1054,16 @@ static ssize_t target_stat_scsi_auth_intr_show_attr_port( struct se_portal_group *tpg; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } tpg = nacl->se_tpg; /* scsiAuthIntrTgtPortIndex */ ret = snprintf(page, PAGE_SIZE, "%u\n", tpg->se_tpg_tfo->tpg_get_tag(tpg)); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(port); @@ -1154,15 +1077,15 @@ static ssize_t target_stat_scsi_auth_intr_show_attr_indx( struct se_dev_entry *deve; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } /* scsiAuthIntrIndex */ ret = snprintf(page, PAGE_SIZE, "%u\n", nacl->acl_index); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(indx); @@ -1176,15 +1099,15 @@ static ssize_t target_stat_scsi_auth_intr_show_attr_dev_or_port( struct se_dev_entry *deve; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } /* scsiAuthIntrDevOrPort */ ret = snprintf(page, PAGE_SIZE, "%u\n", 1); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(dev_or_port); @@ -1198,15 +1121,15 @@ static ssize_t target_stat_scsi_auth_intr_show_attr_intr_name( struct se_dev_entry *deve; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } /* scsiAuthIntrName */ ret = snprintf(page, PAGE_SIZE, "%s\n", nacl->initiatorname); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(intr_name); @@ -1220,15 +1143,15 @@ static ssize_t target_stat_scsi_auth_intr_show_attr_map_indx( struct se_dev_entry *deve; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } /* FIXME: scsiAuthIntrLunMapIndex */ ret = snprintf(page, PAGE_SIZE, "%u\n", 0); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(map_indx); @@ -1242,15 +1165,15 @@ static ssize_t target_stat_scsi_auth_intr_show_attr_att_count( struct se_dev_entry *deve; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } /* scsiAuthIntrAttachedTimes */ ret = snprintf(page, PAGE_SIZE, "%u\n", deve->attach_count); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(att_count); @@ -1264,15 +1187,16 @@ static ssize_t target_stat_scsi_auth_intr_show_attr_num_cmds( struct se_dev_entry *deve; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } /* scsiAuthIntrOutCommands */ - ret = snprintf(page, PAGE_SIZE, "%u\n", deve->total_cmds); - spin_unlock_irq(&nacl->device_list_lock); + ret = snprintf(page, PAGE_SIZE, "%lu\n", + atomic_long_read(&deve->total_cmds)); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(num_cmds); @@ -1286,15 +1210,16 @@ static ssize_t target_stat_scsi_auth_intr_show_attr_read_mbytes( struct se_dev_entry *deve; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } /* scsiAuthIntrReadMegaBytes */ - ret = snprintf(page, PAGE_SIZE, "%u\n", (u32)(deve->read_bytes >> 20)); - spin_unlock_irq(&nacl->device_list_lock); + ret = snprintf(page, PAGE_SIZE, "%u\n", + (u32)(atomic_long_read(&deve->read_bytes) >> 20)); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(read_mbytes); @@ -1308,15 +1233,16 @@ static ssize_t target_stat_scsi_auth_intr_show_attr_write_mbytes( struct se_dev_entry *deve; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } /* scsiAuthIntrWrittenMegaBytes */ - ret = snprintf(page, PAGE_SIZE, "%u\n", (u32)(deve->write_bytes >> 20)); - spin_unlock_irq(&nacl->device_list_lock); + ret = snprintf(page, PAGE_SIZE, "%u\n", + (u32)(atomic_long_read(&deve->write_bytes) >> 20)); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(write_mbytes); @@ -1330,15 +1256,15 @@ static ssize_t target_stat_scsi_auth_intr_show_attr_hs_num_cmds( struct se_dev_entry *deve; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } /* FIXME: scsiAuthIntrHSOutCommands */ ret = snprintf(page, PAGE_SIZE, "%u\n", 0); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(hs_num_cmds); @@ -1352,16 +1278,16 @@ static ssize_t target_stat_scsi_auth_intr_show_attr_creation_time( struct se_dev_entry *deve; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } /* scsiAuthIntrLastCreation */ ret = snprintf(page, PAGE_SIZE, "%u\n", (u32)(((u32)deve->creation_time - INITIAL_JIFFIES) * 100 / HZ)); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(creation_time); @@ -1375,15 +1301,15 @@ static ssize_t target_stat_scsi_auth_intr_show_attr_row_status( struct se_dev_entry *deve; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } /* FIXME: scsiAuthIntrRowStatus */ ret = snprintf(page, PAGE_SIZE, "Ready\n"); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(row_status); @@ -1448,17 +1374,17 @@ static ssize_t target_stat_scsi_att_intr_port_show_attr_inst( struct se_portal_group *tpg; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } tpg = nacl->se_tpg; /* scsiInstIndex */ ret = snprintf(page, PAGE_SIZE, "%u\n", tpg->se_tpg_tfo->tpg_get_inst_index(tpg)); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_ATTR_INTR_PORT_ATTR_RO(inst); @@ -1473,16 +1399,16 @@ static ssize_t target_stat_scsi_att_intr_port_show_attr_dev( struct se_lun *lun; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } - lun = deve->se_lun; + lun = rcu_dereference(deve->se_lun); /* scsiDeviceIndex */ - ret = snprintf(page, PAGE_SIZE, "%u\n", lun->lun_se_dev->dev_index); - spin_unlock_irq(&nacl->device_list_lock); + ret = snprintf(page, PAGE_SIZE, "%u\n", lun->lun_index); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_ATTR_INTR_PORT_ATTR_RO(dev); @@ -1497,16 +1423,16 @@ static ssize_t target_stat_scsi_att_intr_port_show_attr_port( struct se_portal_group *tpg; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } tpg = nacl->se_tpg; /* scsiPortIndex */ ret = snprintf(page, PAGE_SIZE, "%u\n", tpg->se_tpg_tfo->tpg_get_tag(tpg)); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_ATTR_INTR_PORT_ATTR_RO(port); @@ -1546,15 +1472,15 @@ static ssize_t target_stat_scsi_att_intr_port_show_attr_port_auth_indx( struct se_dev_entry *deve; ssize_t ret; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[lacl->mapped_lun]; - if (!deve->se_lun || !deve->se_lun_acl) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); + if (!deve) { + rcu_read_unlock(); return -ENODEV; } /* scsiAttIntrPortAuthIntrIdx */ ret = snprintf(page, PAGE_SIZE, "%u\n", nacl->acl_index); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return ret; } DEV_STAT_SCSI_ATTR_INTR_PORT_ATTR_RO(port_auth_indx); diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index a5bb0c46e57e..5b2820312310 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -31,7 +31,6 @@ #include <target/target_core_base.h> #include <target/target_core_backend.h> #include <target/target_core_fabric.h> -#include <target/target_core_configfs.h> #include "target_core_internal.h" #include "target_core_alua.h" @@ -115,7 +114,7 @@ void core_tmr_abort_task( { struct se_cmd *se_cmd; unsigned long flags; - int ref_tag; + u64 ref_tag; spin_lock_irqsave(&se_sess->sess_cmd_lock, flags); list_for_each_entry(se_cmd, &se_sess->sess_cmd_list, se_cmd_list) { @@ -127,16 +126,17 @@ void core_tmr_abort_task( if (se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB) continue; - ref_tag = se_cmd->se_tfo->get_task_tag(se_cmd); + ref_tag = se_cmd->tag; if (tmr->ref_task_tag != ref_tag) continue; - printk("ABORT_TASK: Found referenced %s task_tag: %u\n", + printk("ABORT_TASK: Found referenced %s task_tag: %llu\n", se_cmd->se_tfo->get_fabric_name(), ref_tag); spin_lock(&se_cmd->t_state_lock); if (se_cmd->transport_state & CMD_T_COMPLETE) { - printk("ABORT_TASK: ref_tag: %u already complete, skipping\n", ref_tag); + printk("ABORT_TASK: ref_tag: %llu already complete," + " skipping\n", ref_tag); spin_unlock(&se_cmd->t_state_lock); spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); goto out; @@ -151,18 +151,18 @@ void core_tmr_abort_task( cancel_work_sync(&se_cmd->work); transport_wait_for_tasks(se_cmd); - target_put_sess_cmd(se_sess, se_cmd); + target_put_sess_cmd(se_cmd); transport_cmd_finish_abort(se_cmd, true); printk("ABORT_TASK: Sending TMR_FUNCTION_COMPLETE for" - " ref_tag: %d\n", ref_tag); + " ref_tag: %llu\n", ref_tag); tmr->response = TMR_FUNCTION_COMPLETE; return; } spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); out: - printk("ABORT_TASK: Sending TMR_TASK_DOES_NOT_EXIST for ref_tag: %d\n", + printk("ABORT_TASK: Sending TMR_TASK_DOES_NOT_EXIST for ref_tag: %lld\n", tmr->ref_task_tag); tmr->response = TMR_TASK_DOES_NOT_EXIST; } @@ -287,16 +287,16 @@ static void core_tmr_drain_state_list( list_del(&cmd->state_list); pr_debug("LUN_RESET: %s cmd: %p" - " ITT/CmdSN: 0x%08x/0x%08x, i_state: %d, t_state: %d" + " ITT/CmdSN: 0x%08llx/0x%08x, i_state: %d, t_state: %d" "cdb: 0x%02x\n", (preempt_and_abort_list) ? "Preempt" : "", cmd, - cmd->se_tfo->get_task_tag(cmd), 0, + cmd->tag, 0, cmd->se_tfo->get_cmd_state(cmd), cmd->t_state, cmd->t_task_cdb[0]); - pr_debug("LUN_RESET: ITT[0x%08x] - pr_res_key: 0x%016Lx" + pr_debug("LUN_RESET: ITT[0x%08llx] - pr_res_key: 0x%016Lx" " -- CMD_T_ACTIVE: %d" " CMD_T_STOP: %d CMD_T_SENT: %d\n", - cmd->se_tfo->get_task_tag(cmd), cmd->pr_res_key, + cmd->tag, cmd->pr_res_key, (cmd->transport_state & CMD_T_ACTIVE) != 0, (cmd->transport_state & CMD_T_STOP) != 0, (cmd->transport_state & CMD_T_SENT) != 0); diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index 84de757bd458..babde4ad841f 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -39,6 +39,7 @@ #include <target/target_core_fabric.h> #include "target_core_internal.h" +#include "target_core_alua.h" #include "target_core_pr.h" extern struct se_device *g_lun0_dev; @@ -46,45 +47,9 @@ extern struct se_device *g_lun0_dev; static DEFINE_SPINLOCK(tpg_lock); static LIST_HEAD(tpg_list); -/* core_clear_initiator_node_from_tpg(): - * - * - */ -static void core_clear_initiator_node_from_tpg( - struct se_node_acl *nacl, - struct se_portal_group *tpg) -{ - int i; - struct se_dev_entry *deve; - struct se_lun *lun; - - spin_lock_irq(&nacl->device_list_lock); - for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) { - deve = nacl->device_list[i]; - - if (!(deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS)) - continue; - - if (!deve->se_lun) { - pr_err("%s device entries device pointer is" - " NULL, but Initiator has access.\n", - tpg->se_tpg_tfo->get_fabric_name()); - continue; - } - - lun = deve->se_lun; - spin_unlock_irq(&nacl->device_list_lock); - core_disable_device_list_for_node(lun, NULL, deve->mapped_lun, - TRANSPORT_LUNFLAGS_NO_ACCESS, nacl, tpg); - - spin_lock_irq(&nacl->device_list_lock); - } - spin_unlock_irq(&nacl->device_list_lock); -} - /* __core_tpg_get_initiator_node_acl(): * - * spin_lock_bh(&tpg->acl_node_lock); must be held when calling + * mutex_lock(&tpg->acl_node_mutex); must be held when calling */ struct se_node_acl *__core_tpg_get_initiator_node_acl( struct se_portal_group *tpg, @@ -110,9 +75,9 @@ struct se_node_acl *core_tpg_get_initiator_node_acl( { struct se_node_acl *acl; - spin_lock_irq(&tpg->acl_node_lock); + mutex_lock(&tpg->acl_node_mutex); acl = __core_tpg_get_initiator_node_acl(tpg, initiatorname); - spin_unlock_irq(&tpg->acl_node_lock); + mutex_unlock(&tpg->acl_node_mutex); return acl; } @@ -124,22 +89,20 @@ EXPORT_SYMBOL(core_tpg_get_initiator_node_acl); */ void core_tpg_add_node_to_devs( struct se_node_acl *acl, - struct se_portal_group *tpg) + struct se_portal_group *tpg, + struct se_lun *lun_orig) { - int i = 0; u32 lun_access = 0; struct se_lun *lun; struct se_device *dev; - spin_lock(&tpg->tpg_lun_lock); - for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) { - lun = tpg->tpg_lun_list[i]; - if (lun->lun_status != TRANSPORT_LUN_STATUS_ACTIVE) + mutex_lock(&tpg->tpg_lun_mutex); + hlist_for_each_entry_rcu(lun, &tpg->tpg_lun_hlist, link) { + if (lun_orig && lun != lun_orig) continue; - spin_unlock(&tpg->tpg_lun_lock); - - dev = lun->lun_se_dev; + dev = rcu_dereference_check(lun->lun_se_dev, + lockdep_is_held(&tpg->tpg_lun_mutex)); /* * By default in LIO-Target $FABRIC_MOD, * demo_mode_write_protect is ON, or READ_ONLY; @@ -157,7 +120,7 @@ void core_tpg_add_node_to_devs( lun_access = TRANSPORT_LUNFLAGS_READ_WRITE; } - pr_debug("TARGET_CORE[%s]->TPG[%u]_LUN[%u] - Adding %s" + pr_debug("TARGET_CORE[%s]->TPG[%u]_LUN[%llu] - Adding %s" " access for LUN in Demo Mode\n", tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun, @@ -165,7 +128,7 @@ void core_tpg_add_node_to_devs( "READ-WRITE" : "READ-ONLY"); core_enable_device_list_for_node(lun, NULL, lun->unpacked_lun, - lun_access, acl, tpg); + lun_access, acl, tpg); /* * Check to see if there are any existing persistent reservation * APTPL pre-registrations that need to be enabled for this dynamic @@ -173,9 +136,8 @@ void core_tpg_add_node_to_devs( */ core_scsi3_check_aptpl_registration(dev, tpg, lun, acl, lun->unpacked_lun); - spin_lock(&tpg->tpg_lun_lock); } - spin_unlock(&tpg->tpg_lun_lock); + mutex_unlock(&tpg->tpg_lun_mutex); } /* core_set_queue_depth_for_node(): @@ -196,67 +158,63 @@ static int core_set_queue_depth_for_node( return 0; } -void array_free(void *array, int n) +static struct se_node_acl *target_alloc_node_acl(struct se_portal_group *tpg, + const unsigned char *initiatorname) { - void **a = array; - int i; + struct se_node_acl *acl; - for (i = 0; i < n; i++) - kfree(a[i]); - kfree(a); -} + acl = kzalloc(max(sizeof(*acl), tpg->se_tpg_tfo->node_acl_size), + GFP_KERNEL); + if (!acl) + return NULL; -static void *array_zalloc(int n, size_t size, gfp_t flags) -{ - void **a; - int i; + INIT_LIST_HEAD(&acl->acl_list); + INIT_LIST_HEAD(&acl->acl_sess_list); + INIT_HLIST_HEAD(&acl->lun_entry_hlist); + kref_init(&acl->acl_kref); + init_completion(&acl->acl_free_comp); + spin_lock_init(&acl->nacl_sess_lock); + mutex_init(&acl->lun_entry_mutex); + atomic_set(&acl->acl_pr_ref_count, 0); + if (tpg->se_tpg_tfo->tpg_get_default_depth) + acl->queue_depth = tpg->se_tpg_tfo->tpg_get_default_depth(tpg); + else + acl->queue_depth = 1; + snprintf(acl->initiatorname, TRANSPORT_IQN_LEN, "%s", initiatorname); + acl->se_tpg = tpg; + acl->acl_index = scsi_get_new_index(SCSI_AUTH_INTR_INDEX); - a = kzalloc(n * sizeof(void*), flags); - if (!a) - return NULL; - for (i = 0; i < n; i++) { - a[i] = kzalloc(size, flags); - if (!a[i]) { - array_free(a, n); - return NULL; - } - } - return a; + tpg->se_tpg_tfo->set_default_node_attributes(acl); + + if (core_set_queue_depth_for_node(tpg, acl) < 0) + goto out_free_acl; + + return acl; + +out_free_acl: + kfree(acl); + return NULL; } -/* core_create_device_list_for_node(): - * - * - */ -static int core_create_device_list_for_node(struct se_node_acl *nacl) +static void target_add_node_acl(struct se_node_acl *acl) { - struct se_dev_entry *deve; - int i; - - nacl->device_list = array_zalloc(TRANSPORT_MAX_LUNS_PER_TPG, - sizeof(struct se_dev_entry), GFP_KERNEL); - if (!nacl->device_list) { - pr_err("Unable to allocate memory for" - " struct se_node_acl->device_list\n"); - return -ENOMEM; - } - for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) { - deve = nacl->device_list[i]; - - atomic_set(&deve->ua_count, 0); - atomic_set(&deve->pr_ref_count, 0); - spin_lock_init(&deve->ua_lock); - INIT_LIST_HEAD(&deve->alua_port_list); - INIT_LIST_HEAD(&deve->ua_list); - } + struct se_portal_group *tpg = acl->se_tpg; - return 0; + mutex_lock(&tpg->acl_node_mutex); + list_add_tail(&acl->acl_list, &tpg->acl_node_list); + tpg->num_node_acls++; + mutex_unlock(&tpg->acl_node_mutex); + + pr_debug("%s_TPG[%hu] - Added %s ACL with TCQ Depth: %d for %s" + " Initiator Node: %s\n", + tpg->se_tpg_tfo->get_fabric_name(), + tpg->se_tpg_tfo->tpg_get_tag(tpg), + acl->dynamic_node_acl ? "DYNAMIC" : "", + acl->queue_depth, + tpg->se_tpg_tfo->get_fabric_name(), + acl->initiatorname); } -/* core_tpg_check_initiator_node_acl() - * - * - */ struct se_node_acl *core_tpg_check_initiator_node_acl( struct se_portal_group *tpg, unsigned char *initiatorname) @@ -270,35 +228,11 @@ struct se_node_acl *core_tpg_check_initiator_node_acl( if (!tpg->se_tpg_tfo->tpg_check_demo_mode(tpg)) return NULL; - acl = tpg->se_tpg_tfo->tpg_alloc_fabric_acl(tpg); + acl = target_alloc_node_acl(tpg, initiatorname); if (!acl) return NULL; - - INIT_LIST_HEAD(&acl->acl_list); - INIT_LIST_HEAD(&acl->acl_sess_list); - kref_init(&acl->acl_kref); - init_completion(&acl->acl_free_comp); - spin_lock_init(&acl->device_list_lock); - spin_lock_init(&acl->nacl_sess_lock); - atomic_set(&acl->acl_pr_ref_count, 0); - acl->queue_depth = tpg->se_tpg_tfo->tpg_get_default_depth(tpg); - snprintf(acl->initiatorname, TRANSPORT_IQN_LEN, "%s", initiatorname); - acl->se_tpg = tpg; - acl->acl_index = scsi_get_new_index(SCSI_AUTH_INTR_INDEX); acl->dynamic_node_acl = 1; - tpg->se_tpg_tfo->set_default_node_attributes(acl); - - if (core_create_device_list_for_node(acl) < 0) { - tpg->se_tpg_tfo->tpg_release_fabric_acl(tpg, acl); - return NULL; - } - - if (core_set_queue_depth_for_node(tpg, acl) < 0) { - core_free_device_list_for_node(acl, tpg); - tpg->se_tpg_tfo->tpg_release_fabric_acl(tpg, acl); - return NULL; - } /* * Here we only create demo-mode MappedLUNs from the active * TPG LUNs if the fabric is not explicitly asking for @@ -306,18 +240,9 @@ struct se_node_acl *core_tpg_check_initiator_node_acl( */ if ((tpg->se_tpg_tfo->tpg_check_demo_mode_login_only == NULL) || (tpg->se_tpg_tfo->tpg_check_demo_mode_login_only(tpg) != 1)) - core_tpg_add_node_to_devs(acl, tpg); - - spin_lock_irq(&tpg->acl_node_lock); - list_add_tail(&acl->acl_list, &tpg->acl_node_list); - tpg->num_node_acls++; - spin_unlock_irq(&tpg->acl_node_lock); - - pr_debug("%s_TPG[%u] - Added DYNAMIC ACL with TCQ Depth: %d for %s" - " Initiator Node: %s\n", tpg->se_tpg_tfo->get_fabric_name(), - tpg->se_tpg_tfo->tpg_get_tag(tpg), acl->queue_depth, - tpg->se_tpg_tfo->get_fabric_name(), initiatorname); + core_tpg_add_node_to_devs(acl, tpg, NULL); + target_add_node_acl(acl); return acl; } EXPORT_SYMBOL(core_tpg_check_initiator_node_acl); @@ -328,40 +253,13 @@ void core_tpg_wait_for_nacl_pr_ref(struct se_node_acl *nacl) cpu_relax(); } -void core_tpg_clear_object_luns(struct se_portal_group *tpg) -{ - int i; - struct se_lun *lun; - - spin_lock(&tpg->tpg_lun_lock); - for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) { - lun = tpg->tpg_lun_list[i]; - - if ((lun->lun_status != TRANSPORT_LUN_STATUS_ACTIVE) || - (lun->lun_se_dev == NULL)) - continue; - - spin_unlock(&tpg->tpg_lun_lock); - core_dev_del_lun(tpg, lun); - spin_lock(&tpg->tpg_lun_lock); - } - spin_unlock(&tpg->tpg_lun_lock); -} -EXPORT_SYMBOL(core_tpg_clear_object_luns); - -/* core_tpg_add_initiator_node_acl(): - * - * - */ struct se_node_acl *core_tpg_add_initiator_node_acl( struct se_portal_group *tpg, - struct se_node_acl *se_nacl, - const char *initiatorname, - u32 queue_depth) + const char *initiatorname) { - struct se_node_acl *acl = NULL; + struct se_node_acl *acl; - spin_lock_irq(&tpg->acl_node_lock); + mutex_lock(&tpg->acl_node_mutex); acl = __core_tpg_get_initiator_node_acl(tpg, initiatorname); if (acl) { if (acl->dynamic_node_acl) { @@ -369,99 +267,42 @@ struct se_node_acl *core_tpg_add_initiator_node_acl( pr_debug("%s_TPG[%u] - Replacing dynamic ACL" " for %s\n", tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_tag(tpg), initiatorname); - spin_unlock_irq(&tpg->acl_node_lock); - /* - * Release the locally allocated struct se_node_acl - * because * core_tpg_add_initiator_node_acl() returned - * a pointer to an existing demo mode node ACL. - */ - if (se_nacl) - tpg->se_tpg_tfo->tpg_release_fabric_acl(tpg, - se_nacl); - goto done; + mutex_unlock(&tpg->acl_node_mutex); + return acl; } pr_err("ACL entry for %s Initiator" " Node %s already exists for TPG %u, ignoring" " request.\n", tpg->se_tpg_tfo->get_fabric_name(), initiatorname, tpg->se_tpg_tfo->tpg_get_tag(tpg)); - spin_unlock_irq(&tpg->acl_node_lock); + mutex_unlock(&tpg->acl_node_mutex); return ERR_PTR(-EEXIST); } - spin_unlock_irq(&tpg->acl_node_lock); - - if (!se_nacl) { - pr_err("struct se_node_acl pointer is NULL\n"); - return ERR_PTR(-EINVAL); - } - /* - * For v4.x logic the se_node_acl_s is hanging off a fabric - * dependent structure allocated via - * struct target_core_fabric_ops->fabric_make_nodeacl() - */ - acl = se_nacl; + mutex_unlock(&tpg->acl_node_mutex); - INIT_LIST_HEAD(&acl->acl_list); - INIT_LIST_HEAD(&acl->acl_sess_list); - kref_init(&acl->acl_kref); - init_completion(&acl->acl_free_comp); - spin_lock_init(&acl->device_list_lock); - spin_lock_init(&acl->nacl_sess_lock); - atomic_set(&acl->acl_pr_ref_count, 0); - acl->queue_depth = queue_depth; - snprintf(acl->initiatorname, TRANSPORT_IQN_LEN, "%s", initiatorname); - acl->se_tpg = tpg; - acl->acl_index = scsi_get_new_index(SCSI_AUTH_INTR_INDEX); - - tpg->se_tpg_tfo->set_default_node_attributes(acl); - - if (core_create_device_list_for_node(acl) < 0) { - tpg->se_tpg_tfo->tpg_release_fabric_acl(tpg, acl); + acl = target_alloc_node_acl(tpg, initiatorname); + if (!acl) return ERR_PTR(-ENOMEM); - } - - if (core_set_queue_depth_for_node(tpg, acl) < 0) { - core_free_device_list_for_node(acl, tpg); - tpg->se_tpg_tfo->tpg_release_fabric_acl(tpg, acl); - return ERR_PTR(-EINVAL); - } - - spin_lock_irq(&tpg->acl_node_lock); - list_add_tail(&acl->acl_list, &tpg->acl_node_list); - tpg->num_node_acls++; - spin_unlock_irq(&tpg->acl_node_lock); - -done: - pr_debug("%s_TPG[%hu] - Added ACL with TCQ Depth: %d for %s" - " Initiator Node: %s\n", tpg->se_tpg_tfo->get_fabric_name(), - tpg->se_tpg_tfo->tpg_get_tag(tpg), acl->queue_depth, - tpg->se_tpg_tfo->get_fabric_name(), initiatorname); + target_add_node_acl(acl); return acl; } -EXPORT_SYMBOL(core_tpg_add_initiator_node_acl); -/* core_tpg_del_initiator_node_acl(): - * - * - */ -int core_tpg_del_initiator_node_acl( - struct se_portal_group *tpg, - struct se_node_acl *acl, - int force) +void core_tpg_del_initiator_node_acl(struct se_node_acl *acl) { + struct se_portal_group *tpg = acl->se_tpg; LIST_HEAD(sess_list); struct se_session *sess, *sess_tmp; unsigned long flags; int rc; - spin_lock_irq(&tpg->acl_node_lock); + mutex_lock(&tpg->acl_node_mutex); if (acl->dynamic_node_acl) { acl->dynamic_node_acl = 0; } list_del(&acl->acl_list); tpg->num_node_acls--; - spin_unlock_irq(&tpg->acl_node_lock); + mutex_unlock(&tpg->acl_node_mutex); spin_lock_irqsave(&acl->nacl_sess_lock, flags); acl->acl_stop = 1; @@ -493,7 +334,6 @@ int core_tpg_del_initiator_node_acl( wait_for_completion(&acl->acl_free_comp); core_tpg_wait_for_nacl_pr_ref(acl); - core_clear_initiator_node_from_tpg(acl, tpg); core_free_device_list_for_node(acl, tpg); pr_debug("%s_TPG[%hu] - Deleted ACL with TCQ Depth: %d for %s" @@ -501,9 +341,8 @@ int core_tpg_del_initiator_node_acl( tpg->se_tpg_tfo->tpg_get_tag(tpg), acl->queue_depth, tpg->se_tpg_tfo->get_fabric_name(), acl->initiatorname); - return 0; + kfree(acl); } -EXPORT_SYMBOL(core_tpg_del_initiator_node_acl); /* core_tpg_set_initiator_node_queue_depth(): * @@ -520,21 +359,21 @@ int core_tpg_set_initiator_node_queue_depth( unsigned long flags; int dynamic_acl = 0; - spin_lock_irq(&tpg->acl_node_lock); + mutex_lock(&tpg->acl_node_mutex); acl = __core_tpg_get_initiator_node_acl(tpg, initiatorname); if (!acl) { pr_err("Access Control List entry for %s Initiator" " Node %s does not exists for TPG %hu, ignoring" " request.\n", tpg->se_tpg_tfo->get_fabric_name(), initiatorname, tpg->se_tpg_tfo->tpg_get_tag(tpg)); - spin_unlock_irq(&tpg->acl_node_lock); + mutex_unlock(&tpg->acl_node_mutex); return -ENODEV; } if (acl->dynamic_node_acl) { acl->dynamic_node_acl = 0; dynamic_acl = 1; } - spin_unlock_irq(&tpg->acl_node_lock); + mutex_unlock(&tpg->acl_node_mutex); spin_lock_irqsave(&tpg->session_lock, flags); list_for_each_entry(sess, &tpg->tpg_sess_list, sess_list) { @@ -550,10 +389,10 @@ int core_tpg_set_initiator_node_queue_depth( tpg->se_tpg_tfo->get_fabric_name(), initiatorname); spin_unlock_irqrestore(&tpg->session_lock, flags); - spin_lock_irq(&tpg->acl_node_lock); + mutex_lock(&tpg->acl_node_mutex); if (dynamic_acl) acl->dynamic_node_acl = 1; - spin_unlock_irq(&tpg->acl_node_lock); + mutex_unlock(&tpg->acl_node_mutex); return -EEXIST; } /* @@ -588,10 +427,10 @@ int core_tpg_set_initiator_node_queue_depth( if (init_sess) tpg->se_tpg_tfo->close_session(init_sess); - spin_lock_irq(&tpg->acl_node_lock); + mutex_lock(&tpg->acl_node_mutex); if (dynamic_acl) acl->dynamic_node_acl = 1; - spin_unlock_irq(&tpg->acl_node_lock); + mutex_unlock(&tpg->acl_node_mutex); return -EINVAL; } spin_unlock_irqrestore(&tpg->session_lock, flags); @@ -607,10 +446,10 @@ int core_tpg_set_initiator_node_queue_depth( initiatorname, tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_tag(tpg)); - spin_lock_irq(&tpg->acl_node_lock); + mutex_lock(&tpg->acl_node_mutex); if (dynamic_acl) acl->dynamic_node_acl = 1; - spin_unlock_irq(&tpg->acl_node_lock); + mutex_unlock(&tpg->acl_node_mutex); return 0; } @@ -646,78 +485,54 @@ static void core_tpg_lun_ref_release(struct percpu_ref *ref) complete(&lun->lun_ref_comp); } -static int core_tpg_setup_virtual_lun0(struct se_portal_group *se_tpg) -{ - /* Set in core_dev_setup_virtual_lun0() */ - struct se_device *dev = g_lun0_dev; - struct se_lun *lun = &se_tpg->tpg_virt_lun0; - u32 lun_access = TRANSPORT_LUNFLAGS_READ_ONLY; - int ret; - - lun->unpacked_lun = 0; - lun->lun_status = TRANSPORT_LUN_STATUS_FREE; - atomic_set(&lun->lun_acl_count, 0); - init_completion(&lun->lun_shutdown_comp); - INIT_LIST_HEAD(&lun->lun_acl_list); - spin_lock_init(&lun->lun_acl_lock); - spin_lock_init(&lun->lun_sep_lock); - init_completion(&lun->lun_ref_comp); - - ret = core_tpg_add_lun(se_tpg, lun, lun_access, dev); - if (ret < 0) - return ret; - - return 0; -} - int core_tpg_register( - const struct target_core_fabric_ops *tfo, struct se_wwn *se_wwn, struct se_portal_group *se_tpg, - void *tpg_fabric_ptr, - int se_tpg_type) + int proto_id) { - struct se_lun *lun; - u32 i; - - se_tpg->tpg_lun_list = array_zalloc(TRANSPORT_MAX_LUNS_PER_TPG, - sizeof(struct se_lun), GFP_KERNEL); - if (!se_tpg->tpg_lun_list) { - pr_err("Unable to allocate struct se_portal_group->" - "tpg_lun_list\n"); - return -ENOMEM; - } + int ret; + + if (!se_tpg) + return -EINVAL; + /* + * For the typical case where core_tpg_register() is called by a + * fabric driver from target_core_fabric_ops->fabric_make_tpg() + * configfs context, use the original tf_ops pointer already saved + * by target-core in target_fabric_make_wwn(). + * + * Otherwise, for special cases like iscsi-target discovery TPGs + * the caller is responsible for setting ->se_tpg_tfo ahead of + * calling core_tpg_register(). + */ + if (se_wwn) + se_tpg->se_tpg_tfo = se_wwn->wwn_tf->tf_ops; - for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) { - lun = se_tpg->tpg_lun_list[i]; - lun->unpacked_lun = i; - lun->lun_link_magic = SE_LUN_LINK_MAGIC; - lun->lun_status = TRANSPORT_LUN_STATUS_FREE; - atomic_set(&lun->lun_acl_count, 0); - init_completion(&lun->lun_shutdown_comp); - INIT_LIST_HEAD(&lun->lun_acl_list); - spin_lock_init(&lun->lun_acl_lock); - spin_lock_init(&lun->lun_sep_lock); - init_completion(&lun->lun_ref_comp); + if (!se_tpg->se_tpg_tfo) { + pr_err("Unable to locate se_tpg->se_tpg_tfo pointer\n"); + return -EINVAL; } - se_tpg->se_tpg_type = se_tpg_type; - se_tpg->se_tpg_fabric_ptr = tpg_fabric_ptr; - se_tpg->se_tpg_tfo = tfo; + INIT_HLIST_HEAD(&se_tpg->tpg_lun_hlist); + se_tpg->proto_id = proto_id; se_tpg->se_tpg_wwn = se_wwn; atomic_set(&se_tpg->tpg_pr_ref_count, 0); INIT_LIST_HEAD(&se_tpg->acl_node_list); INIT_LIST_HEAD(&se_tpg->se_tpg_node); INIT_LIST_HEAD(&se_tpg->tpg_sess_list); - spin_lock_init(&se_tpg->acl_node_lock); spin_lock_init(&se_tpg->session_lock); - spin_lock_init(&se_tpg->tpg_lun_lock); - - if (se_tpg->se_tpg_type == TRANSPORT_TPG_TYPE_NORMAL) { - if (core_tpg_setup_virtual_lun0(se_tpg) < 0) { - array_free(se_tpg->tpg_lun_list, - TRANSPORT_MAX_LUNS_PER_TPG); - return -ENOMEM; + mutex_init(&se_tpg->tpg_lun_mutex); + mutex_init(&se_tpg->acl_node_mutex); + + if (se_tpg->proto_id >= 0) { + se_tpg->tpg_virt_lun0 = core_tpg_alloc_lun(se_tpg, 0); + if (IS_ERR(se_tpg->tpg_virt_lun0)) + return PTR_ERR(se_tpg->tpg_virt_lun0); + + ret = core_tpg_add_lun(se_tpg, se_tpg->tpg_virt_lun0, + TRANSPORT_LUNFLAGS_READ_ONLY, g_lun0_dev); + if (ret < 0) { + kfree(se_tpg->tpg_virt_lun0); + return ret; } } @@ -725,11 +540,11 @@ int core_tpg_register( list_add_tail(&se_tpg->se_tpg_node, &tpg_list); spin_unlock_bh(&tpg_lock); - pr_debug("TARGET_CORE[%s]: Allocated %s struct se_portal_group for" - " endpoint: %s, Portal Tag: %u\n", tfo->get_fabric_name(), - (se_tpg->se_tpg_type == TRANSPORT_TPG_TYPE_NORMAL) ? - "Normal" : "Discovery", (tfo->tpg_get_wwn(se_tpg) == NULL) ? - "None" : tfo->tpg_get_wwn(se_tpg), tfo->tpg_get_tag(se_tpg)); + pr_debug("TARGET_CORE[%s]: Allocated portal_group for endpoint: %s, " + "Proto: %d, Portal Tag: %u\n", se_tpg->se_tpg_tfo->get_fabric_name(), + se_tpg->se_tpg_tfo->tpg_get_wwn(se_tpg) ? + se_tpg->se_tpg_tfo->tpg_get_wwn(se_tpg) : NULL, + se_tpg->proto_id, se_tpg->se_tpg_tfo->tpg_get_tag(se_tpg)); return 0; } @@ -737,14 +552,14 @@ EXPORT_SYMBOL(core_tpg_register); int core_tpg_deregister(struct se_portal_group *se_tpg) { + const struct target_core_fabric_ops *tfo = se_tpg->se_tpg_tfo; struct se_node_acl *nacl, *nacl_tmp; + LIST_HEAD(node_list); - pr_debug("TARGET_CORE[%s]: Deallocating %s struct se_portal_group" - " for endpoint: %s Portal Tag %u\n", - (se_tpg->se_tpg_type == TRANSPORT_TPG_TYPE_NORMAL) ? - "Normal" : "Discovery", se_tpg->se_tpg_tfo->get_fabric_name(), - se_tpg->se_tpg_tfo->tpg_get_wwn(se_tpg), - se_tpg->se_tpg_tfo->tpg_get_tag(se_tpg)); + pr_debug("TARGET_CORE[%s]: Deallocating portal_group for endpoint: %s, " + "Proto: %d, Portal Tag: %u\n", tfo->get_fabric_name(), + tfo->tpg_get_wwn(se_tpg) ? tfo->tpg_get_wwn(se_tpg) : NULL, + se_tpg->proto_id, tfo->tpg_get_tag(se_tpg)); spin_lock_bh(&tpg_lock); list_del(&se_tpg->se_tpg_node); @@ -752,61 +567,56 @@ int core_tpg_deregister(struct se_portal_group *se_tpg) while (atomic_read(&se_tpg->tpg_pr_ref_count) != 0) cpu_relax(); + + mutex_lock(&se_tpg->acl_node_mutex); + list_splice_init(&se_tpg->acl_node_list, &node_list); + mutex_unlock(&se_tpg->acl_node_mutex); /* * Release any remaining demo-mode generated se_node_acl that have * not been released because of TFO->tpg_check_demo_mode_cache() == 1 * in transport_deregister_session(). */ - spin_lock_irq(&se_tpg->acl_node_lock); - list_for_each_entry_safe(nacl, nacl_tmp, &se_tpg->acl_node_list, - acl_list) { + list_for_each_entry_safe(nacl, nacl_tmp, &node_list, acl_list) { list_del(&nacl->acl_list); se_tpg->num_node_acls--; - spin_unlock_irq(&se_tpg->acl_node_lock); core_tpg_wait_for_nacl_pr_ref(nacl); core_free_device_list_for_node(nacl, se_tpg); - se_tpg->se_tpg_tfo->tpg_release_fabric_acl(se_tpg, nacl); - - spin_lock_irq(&se_tpg->acl_node_lock); + kfree(nacl); } - spin_unlock_irq(&se_tpg->acl_node_lock); - if (se_tpg->se_tpg_type == TRANSPORT_TPG_TYPE_NORMAL) - core_tpg_remove_lun(se_tpg, &se_tpg->tpg_virt_lun0); + if (se_tpg->proto_id >= 0) { + core_tpg_remove_lun(se_tpg, se_tpg->tpg_virt_lun0); + kfree_rcu(se_tpg->tpg_virt_lun0, rcu_head); + } - se_tpg->se_tpg_fabric_ptr = NULL; - array_free(se_tpg->tpg_lun_list, TRANSPORT_MAX_LUNS_PER_TPG); return 0; } EXPORT_SYMBOL(core_tpg_deregister); struct se_lun *core_tpg_alloc_lun( struct se_portal_group *tpg, - u32 unpacked_lun) + u64 unpacked_lun) { struct se_lun *lun; - if (unpacked_lun > (TRANSPORT_MAX_LUNS_PER_TPG-1)) { - pr_err("%s LUN: %u exceeds TRANSPORT_MAX_LUNS_PER_TPG" - "-1: %u for Target Portal Group: %u\n", - tpg->se_tpg_tfo->get_fabric_name(), - unpacked_lun, TRANSPORT_MAX_LUNS_PER_TPG-1, - tpg->se_tpg_tfo->tpg_get_tag(tpg)); - return ERR_PTR(-EOVERFLOW); - } - - spin_lock(&tpg->tpg_lun_lock); - lun = tpg->tpg_lun_list[unpacked_lun]; - if (lun->lun_status == TRANSPORT_LUN_STATUS_ACTIVE) { - pr_err("TPG Logical Unit Number: %u is already active" - " on %s Target Portal Group: %u, ignoring request.\n", - unpacked_lun, tpg->se_tpg_tfo->get_fabric_name(), - tpg->se_tpg_tfo->tpg_get_tag(tpg)); - spin_unlock(&tpg->tpg_lun_lock); - return ERR_PTR(-EINVAL); + lun = kzalloc(sizeof(*lun), GFP_KERNEL); + if (!lun) { + pr_err("Unable to allocate se_lun memory\n"); + return ERR_PTR(-ENOMEM); } - spin_unlock(&tpg->tpg_lun_lock); + lun->unpacked_lun = unpacked_lun; + lun->lun_link_magic = SE_LUN_LINK_MAGIC; + atomic_set(&lun->lun_acl_count, 0); + init_completion(&lun->lun_ref_comp); + INIT_LIST_HEAD(&lun->lun_deve_list); + INIT_LIST_HEAD(&lun->lun_dev_link); + atomic_set(&lun->lun_tg_pt_secondary_offline, 0); + spin_lock_init(&lun->lun_deve_lock); + mutex_init(&lun->lun_tg_pt_md_mutex); + INIT_LIST_HEAD(&lun->lun_tg_pt_gp_link); + spin_lock_init(&lun->lun_tg_pt_gp_lock); + lun->lun_tpg = tpg; return lun; } @@ -822,34 +632,70 @@ int core_tpg_add_lun( ret = percpu_ref_init(&lun->lun_ref, core_tpg_lun_ref_release, 0, GFP_KERNEL); if (ret < 0) - return ret; + goto out; - ret = core_dev_export(dev, tpg, lun); - if (ret < 0) { - percpu_ref_exit(&lun->lun_ref); - return ret; - } + ret = core_alloc_rtpi(lun, dev); + if (ret) + goto out_kill_ref; + + if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) && + !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) + target_attach_tg_pt_gp(lun, dev->t10_alua.default_tg_pt_gp); + + mutex_lock(&tpg->tpg_lun_mutex); + + spin_lock(&dev->se_port_lock); + lun->lun_index = dev->dev_index; + rcu_assign_pointer(lun->lun_se_dev, dev); + dev->export_count++; + list_add_tail(&lun->lun_dev_link, &dev->dev_sep_list); + spin_unlock(&dev->se_port_lock); - spin_lock(&tpg->tpg_lun_lock); lun->lun_access = lun_access; - lun->lun_status = TRANSPORT_LUN_STATUS_ACTIVE; - spin_unlock(&tpg->tpg_lun_lock); + if (!(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) + hlist_add_head_rcu(&lun->link, &tpg->tpg_lun_hlist); + mutex_unlock(&tpg->tpg_lun_mutex); return 0; + +out_kill_ref: + percpu_ref_exit(&lun->lun_ref); +out: + return ret; } void core_tpg_remove_lun( struct se_portal_group *tpg, struct se_lun *lun) { + /* + * rcu_dereference_raw protected by se_lun->lun_group symlink + * reference to se_device->dev_group. + */ + struct se_device *dev = rcu_dereference_raw(lun->lun_se_dev); + core_clear_lun_from_tpg(lun, tpg); + /* + * Wait for any active I/O references to percpu se_lun->lun_ref to + * be released. Also, se_lun->lun_ref is now used by PR and ALUA + * logic when referencing a remote target port during ALL_TGT_PT=1 + * and generating UNIT_ATTENTIONs for ALUA access state transition. + */ transport_clear_lun_ref(lun); - core_dev_unexport(lun->lun_se_dev, tpg, lun); + mutex_lock(&tpg->tpg_lun_mutex); + if (lun->lun_se_dev) { + target_detach_tg_pt_gp(lun); - spin_lock(&tpg->tpg_lun_lock); - lun->lun_status = TRANSPORT_LUN_STATUS_FREE; - spin_unlock(&tpg->tpg_lun_lock); + spin_lock(&dev->se_port_lock); + list_del(&lun->lun_dev_link); + dev->export_count--; + rcu_assign_pointer(lun->lun_se_dev, NULL); + spin_unlock(&dev->se_port_lock); + } + if (!(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) + hlist_del_rcu(&lun->link); + mutex_unlock(&tpg->tpg_lun_mutex); percpu_ref_exit(&lun->lun_ref); } diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index cd3bfc16d25f..ce8574b7220c 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -43,7 +43,6 @@ #include <target/target_core_base.h> #include <target/target_core_backend.h> #include <target/target_core_fabric.h> -#include <target/target_core_configfs.h> #include "target_core_internal.h" #include "target_core_alua.h" @@ -60,7 +59,6 @@ struct kmem_cache *t10_pr_reg_cache; struct kmem_cache *t10_alua_lu_gp_cache; struct kmem_cache *t10_alua_lu_gp_mem_cache; struct kmem_cache *t10_alua_tg_pt_gp_cache; -struct kmem_cache *t10_alua_tg_pt_gp_mem_cache; struct kmem_cache *t10_alua_lba_map_cache; struct kmem_cache *t10_alua_lba_map_mem_cache; @@ -119,16 +117,6 @@ int init_se_kmem_caches(void) "cache failed\n"); goto out_free_lu_gp_mem_cache; } - t10_alua_tg_pt_gp_mem_cache = kmem_cache_create( - "t10_alua_tg_pt_gp_mem_cache", - sizeof(struct t10_alua_tg_pt_gp_member), - __alignof__(struct t10_alua_tg_pt_gp_member), - 0, NULL); - if (!t10_alua_tg_pt_gp_mem_cache) { - pr_err("kmem_cache_create() for t10_alua_tg_pt_gp_" - "mem_t failed\n"); - goto out_free_tg_pt_gp_cache; - } t10_alua_lba_map_cache = kmem_cache_create( "t10_alua_lba_map_cache", sizeof(struct t10_alua_lba_map), @@ -136,7 +124,7 @@ int init_se_kmem_caches(void) if (!t10_alua_lba_map_cache) { pr_err("kmem_cache_create() for t10_alua_lba_map_" "cache failed\n"); - goto out_free_tg_pt_gp_mem_cache; + goto out_free_tg_pt_gp_cache; } t10_alua_lba_map_mem_cache = kmem_cache_create( "t10_alua_lba_map_mem_cache", @@ -159,8 +147,6 @@ out_free_lba_map_mem_cache: kmem_cache_destroy(t10_alua_lba_map_mem_cache); out_free_lba_map_cache: kmem_cache_destroy(t10_alua_lba_map_cache); -out_free_tg_pt_gp_mem_cache: - kmem_cache_destroy(t10_alua_tg_pt_gp_mem_cache); out_free_tg_pt_gp_cache: kmem_cache_destroy(t10_alua_tg_pt_gp_cache); out_free_lu_gp_mem_cache: @@ -186,7 +172,6 @@ void release_se_kmem_caches(void) kmem_cache_destroy(t10_alua_lu_gp_cache); kmem_cache_destroy(t10_alua_lu_gp_mem_cache); kmem_cache_destroy(t10_alua_tg_pt_gp_cache); - kmem_cache_destroy(t10_alua_tg_pt_gp_mem_cache); kmem_cache_destroy(t10_alua_lba_map_cache); kmem_cache_destroy(t10_alua_lba_map_mem_cache); } @@ -406,12 +391,6 @@ EXPORT_SYMBOL(target_get_session); void target_put_session(struct se_session *se_sess) { - struct se_portal_group *tpg = se_sess->se_tpg; - - if (tpg->se_tpg_tfo->put_session != NULL) { - tpg->se_tpg_tfo->put_session(se_sess); - return; - } kref_put(&se_sess->sess_kref, target_release_session); } EXPORT_SYMBOL(target_put_session); @@ -498,7 +477,7 @@ void transport_deregister_session(struct se_session *se_sess) const struct target_core_fabric_ops *se_tfo; struct se_node_acl *se_nacl; unsigned long flags; - bool comp_nacl = true; + bool comp_nacl = true, drop_nacl = false; if (!se_tpg) { transport_free_session(se_sess); @@ -518,22 +497,22 @@ void transport_deregister_session(struct se_session *se_sess) */ se_nacl = se_sess->se_node_acl; - spin_lock_irqsave(&se_tpg->acl_node_lock, flags); + mutex_lock(&se_tpg->acl_node_mutex); if (se_nacl && se_nacl->dynamic_node_acl) { if (!se_tfo->tpg_check_demo_mode_cache(se_tpg)) { list_del(&se_nacl->acl_list); se_tpg->num_node_acls--; - spin_unlock_irqrestore(&se_tpg->acl_node_lock, flags); - core_tpg_wait_for_nacl_pr_ref(se_nacl); - core_free_device_list_for_node(se_nacl, se_tpg); - se_tfo->tpg_release_fabric_acl(se_tpg, se_nacl); - - comp_nacl = false; - spin_lock_irqsave(&se_tpg->acl_node_lock, flags); + drop_nacl = true; } } - spin_unlock_irqrestore(&se_tpg->acl_node_lock, flags); + mutex_unlock(&se_tpg->acl_node_mutex); + if (drop_nacl) { + core_tpg_wait_for_nacl_pr_ref(se_nacl); + core_free_device_list_for_node(se_nacl, se_tpg); + kfree(se_nacl); + comp_nacl = false; + } pr_debug("TARGET_CORE[%s]: Deregistered fabric_sess\n", se_tpg->se_tpg_tfo->get_fabric_name()); /* @@ -593,9 +572,8 @@ static int transport_cmd_check_stop(struct se_cmd *cmd, bool remove_from_lists, * this command for frontend exceptions. */ if (cmd->transport_state & CMD_T_STOP) { - pr_debug("%s:%d CMD_T_STOP for ITT: 0x%08x\n", - __func__, __LINE__, - cmd->se_tfo->get_task_tag(cmd)); + pr_debug("%s:%d CMD_T_STOP for ITT: 0x%08llx\n", + __func__, __LINE__, cmd->tag); spin_unlock_irqrestore(&cmd->t_state_lock, flags); @@ -1148,6 +1126,8 @@ target_cmd_size_check(struct se_cmd *cmd, unsigned int size) /* * Used by fabric modules containing a local struct se_cmd within their * fabric dependent per I/O descriptor. + * + * Preserves the value of @cmd->tag. */ void transport_init_se_cmd( struct se_cmd *cmd, @@ -1274,11 +1254,7 @@ target_setup_cmd_from_cdb(struct se_cmd *cmd, unsigned char *cdb) return ret; cmd->se_cmd_flags |= SCF_SUPPORTED_SAM_OPCODE; - - spin_lock(&cmd->se_lun->lun_sep_lock); - if (cmd->se_lun->lun_sep) - cmd->se_lun->lun_sep->sep_stats.cmd_pdus++; - spin_unlock(&cmd->se_lun->lun_sep_lock); + atomic_long_inc(&cmd->se_lun->lun_stats.cmd_pdus); return 0; } EXPORT_SYMBOL(target_setup_cmd_from_cdb); @@ -1346,11 +1322,9 @@ transport_generic_map_mem_to_cmd(struct se_cmd *cmd, struct scatterlist *sgl, cmd->t_data_sg = sgl; cmd->t_data_nents = sgl_count; + cmd->t_bidi_data_sg = sgl_bidi; + cmd->t_bidi_data_nents = sgl_bidi_count; - if (sgl_bidi && sgl_bidi_count) { - cmd->t_bidi_data_sg = sgl_bidi; - cmd->t_bidi_data_nents = sgl_bidi_count; - } cmd->se_cmd_flags |= SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC; return 0; } @@ -1375,6 +1349,8 @@ transport_generic_map_mem_to_cmd(struct se_cmd *cmd, struct scatterlist *sgl, * @sgl_prot: struct scatterlist memory protection information * @sgl_prot_count: scatterlist count for protection information * + * Task tags are supported if the caller has set @se_cmd->tag. + * * Returns non zero to signal active I/O shutdown failure. All other * setup exceptions will be returned as a SCSI CHECK_CONDITION response, * but still return zero here. @@ -1383,7 +1359,7 @@ transport_generic_map_mem_to_cmd(struct se_cmd *cmd, struct scatterlist *sgl, * assumes internal allocation of fabric payload buffer by target-core. */ int target_submit_cmd_map_sgls(struct se_cmd *se_cmd, struct se_session *se_sess, - unsigned char *cdb, unsigned char *sense, u32 unpacked_lun, + unsigned char *cdb, unsigned char *sense, u64 unpacked_lun, u32 data_length, int task_attr, int data_dir, int flags, struct scatterlist *sgl, u32 sgl_count, struct scatterlist *sgl_bidi, u32 sgl_bidi_count, @@ -1412,7 +1388,7 @@ int target_submit_cmd_map_sgls(struct se_cmd *se_cmd, struct se_session *se_sess * for fabrics using TARGET_SCF_ACK_KREF that expect a second * kref_put() to happen during fabric packet acknowledgement. */ - ret = target_get_sess_cmd(se_sess, se_cmd, (flags & TARGET_SCF_ACK_KREF)); + ret = target_get_sess_cmd(se_cmd, flags & TARGET_SCF_ACK_KREF); if (ret) return ret; /* @@ -1426,7 +1402,7 @@ int target_submit_cmd_map_sgls(struct se_cmd *se_cmd, struct se_session *se_sess rc = transport_lookup_cmd_lun(se_cmd, unpacked_lun); if (rc) { transport_send_check_condition_and_sense(se_cmd, rc, 0); - target_put_sess_cmd(se_sess, se_cmd); + target_put_sess_cmd(se_cmd); return 0; } @@ -1443,6 +1419,7 @@ int target_submit_cmd_map_sgls(struct se_cmd *se_cmd, struct se_session *se_sess if (sgl_prot_count) { se_cmd->t_prot_sg = sgl_prot; se_cmd->t_prot_nents = sgl_prot_count; + se_cmd->se_cmd_flags |= SCF_PASSTHROUGH_PROT_SG_TO_MEM_NOALLOC; } /* @@ -1506,6 +1483,8 @@ EXPORT_SYMBOL(target_submit_cmd_map_sgls); * @data_dir: DMA data direction * @flags: flags for command submission from target_sc_flags_tables * + * Task tags are supported if the caller has set @se_cmd->tag. + * * Returns non zero to signal active I/O shutdown failure. All other * setup exceptions will be returned as a SCSI CHECK_CONDITION response, * but still return zero here. @@ -1516,7 +1495,7 @@ EXPORT_SYMBOL(target_submit_cmd_map_sgls); * It also assumes interal target core SGL memory allocation. */ int target_submit_cmd(struct se_cmd *se_cmd, struct se_session *se_sess, - unsigned char *cdb, unsigned char *sense, u32 unpacked_lun, + unsigned char *cdb, unsigned char *sense, u64 unpacked_lun, u32 data_length, int task_attr, int data_dir, int flags) { return target_submit_cmd_map_sgls(se_cmd, se_sess, cdb, sense, @@ -1553,7 +1532,7 @@ static void target_complete_tmr_failure(struct work_struct *work) **/ int target_submit_tmr(struct se_cmd *se_cmd, struct se_session *se_sess, - unsigned char *sense, u32 unpacked_lun, + unsigned char *sense, u64 unpacked_lun, void *fabric_tmr_ptr, unsigned char tm_type, gfp_t gfp, unsigned int tag, int flags) { @@ -1577,7 +1556,7 @@ int target_submit_tmr(struct se_cmd *se_cmd, struct se_session *se_sess, se_cmd->se_tmr_req->ref_task_tag = tag; /* See target_submit_cmd for commentary */ - ret = target_get_sess_cmd(se_sess, se_cmd, (flags & TARGET_SCF_ACK_KREF)); + ret = target_get_sess_cmd(se_cmd, flags & TARGET_SCF_ACK_KREF); if (ret) { core_tmr_release_req(se_cmd->se_tmr_req); return ret; @@ -1633,9 +1612,8 @@ void transport_generic_request_failure(struct se_cmd *cmd, { int ret = 0; - pr_debug("-----[ Storage Engine Exception for cmd: %p ITT: 0x%08x" - " CDB: 0x%02x\n", cmd, cmd->se_tfo->get_task_tag(cmd), - cmd->t_task_cdb[0]); + pr_debug("-----[ Storage Engine Exception for cmd: %p ITT: 0x%08llx" + " CDB: 0x%02x\n", cmd, cmd->tag, cmd->t_task_cdb[0]); pr_debug("-----[ i_state: %d t_state: %d sense_reason: %d\n", cmd->se_tfo->get_cmd_state(cmd), cmd->t_state, sense_reason); @@ -1692,13 +1670,13 @@ void transport_generic_request_failure(struct se_cmd *cmd, * See spc4r17, section 7.4.6 Control Mode Page, Table 349 */ if (cmd->se_sess && - cmd->se_dev->dev_attrib.emulate_ua_intlck_ctrl == 2) - core_scsi3_ua_allocate(cmd->se_sess->se_node_acl, - cmd->orig_fe_lun, 0x2C, - ASCQ_2CH_PREVIOUS_RESERVATION_CONFLICT_STATUS); - + cmd->se_dev->dev_attrib.emulate_ua_intlck_ctrl == 2) { + target_ua_allocate_lun(cmd->se_sess->se_node_acl, + cmd->orig_fe_lun, 0x2C, + ASCQ_2CH_PREVIOUS_RESERVATION_CONFLICT_STATUS); + } trace_target_cmd_complete(cmd); - ret = cmd->se_tfo-> queue_status(cmd); + ret = cmd->se_tfo->queue_status(cmd); if (ret == -EAGAIN || ret == -ENOMEM) goto queue_full; goto check_stop; @@ -1759,8 +1737,8 @@ static int target_write_prot_action(struct se_cmd *cmd) break; sectors = cmd->data_length >> ilog2(cmd->se_dev->dev_attrib.block_size); - cmd->pi_err = sbc_dif_verify_write(cmd, cmd->t_task_lba, - sectors, 0, NULL, 0); + cmd->pi_err = sbc_dif_verify(cmd, cmd->t_task_lba, + sectors, 0, cmd->t_prot_sg, 0); if (unlikely(cmd->pi_err)) { spin_lock_irq(&cmd->t_state_lock); cmd->transport_state &= ~(CMD_T_BUSY|CMD_T_SENT); @@ -1843,9 +1821,8 @@ void target_execute_cmd(struct se_cmd *cmd) */ spin_lock_irq(&cmd->t_state_lock); if (cmd->transport_state & CMD_T_STOP) { - pr_debug("%s:%d CMD_T_STOP for ITT: 0x%08x\n", - __func__, __LINE__, - cmd->se_tfo->get_task_tag(cmd)); + pr_debug("%s:%d CMD_T_STOP for ITT: 0x%08llx\n", + __func__, __LINE__, cmd->tag); spin_unlock_irq(&cmd->t_state_lock); complete_all(&cmd->t_transport_stop_comp); @@ -1984,16 +1961,17 @@ static void transport_handle_queue_full( static bool target_read_prot_action(struct se_cmd *cmd) { - sense_reason_t rc; - switch (cmd->prot_op) { case TARGET_PROT_DIN_STRIP: if (!(cmd->se_sess->sup_prot_ops & TARGET_PROT_DIN_STRIP)) { - rc = sbc_dif_read_strip(cmd); - if (rc) { - cmd->pi_err = rc; + u32 sectors = cmd->data_length >> + ilog2(cmd->se_dev->dev_attrib.block_size); + + cmd->pi_err = sbc_dif_verify(cmd, cmd->t_task_lba, + sectors, 0, cmd->t_prot_sg, + 0); + if (cmd->pi_err) return true; - } } break; case TARGET_PROT_DIN_INSERT: @@ -2072,12 +2050,8 @@ static void target_complete_ok_work(struct work_struct *work) queue_rsp: switch (cmd->data_direction) { case DMA_FROM_DEVICE: - spin_lock(&cmd->se_lun->lun_sep_lock); - if (cmd->se_lun->lun_sep) { - cmd->se_lun->lun_sep->sep_stats.tx_data_octets += - cmd->data_length; - } - spin_unlock(&cmd->se_lun->lun_sep_lock); + atomic_long_add(cmd->data_length, + &cmd->se_lun->lun_stats.tx_data_octets); /* * Perform READ_STRIP of PI using software emulation when * backend had PI enabled, if the transport will not be @@ -2100,22 +2074,14 @@ queue_rsp: goto queue_full; break; case DMA_TO_DEVICE: - spin_lock(&cmd->se_lun->lun_sep_lock); - if (cmd->se_lun->lun_sep) { - cmd->se_lun->lun_sep->sep_stats.rx_data_octets += - cmd->data_length; - } - spin_unlock(&cmd->se_lun->lun_sep_lock); + atomic_long_add(cmd->data_length, + &cmd->se_lun->lun_stats.rx_data_octets); /* * Check if we need to send READ payload for BIDI-COMMAND */ if (cmd->se_cmd_flags & SCF_BIDI) { - spin_lock(&cmd->se_lun->lun_sep_lock); - if (cmd->se_lun->lun_sep) { - cmd->se_lun->lun_sep->sep_stats.tx_data_octets += - cmd->data_length; - } - spin_unlock(&cmd->se_lun->lun_sep_lock); + atomic_long_add(cmd->data_length, + &cmd->se_lun->lun_stats.tx_data_octets); ret = cmd->se_tfo->queue_data_in(cmd); if (ret == -EAGAIN || ret == -ENOMEM) goto queue_full; @@ -2172,6 +2138,12 @@ static inline void transport_reset_sgl_orig(struct se_cmd *cmd) static inline void transport_free_pages(struct se_cmd *cmd) { + if (!(cmd->se_cmd_flags & SCF_PASSTHROUGH_PROT_SG_TO_MEM_NOALLOC)) { + transport_free_sgl(cmd->t_prot_sg, cmd->t_prot_nents); + cmd->t_prot_sg = NULL; + cmd->t_prot_nents = 0; + } + if (cmd->se_cmd_flags & SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC) { /* * Release special case READ buffer payload required for @@ -2195,10 +2167,6 @@ static inline void transport_free_pages(struct se_cmd *cmd) transport_free_sgl(cmd->t_bidi_data_sg, cmd->t_bidi_data_nents); cmd->t_bidi_data_sg = NULL; cmd->t_bidi_data_nents = 0; - - transport_free_sgl(cmd->t_prot_sg, cmd->t_prot_nents); - cmd->t_prot_sg = NULL; - cmd->t_prot_nents = 0; } /** @@ -2220,7 +2188,7 @@ static int transport_release_cmd(struct se_cmd *cmd) * If this cmd has been setup with target_get_sess_cmd(), drop * the kref and call ->release_cmd() in kref callback. */ - return target_put_sess_cmd(cmd->se_sess, cmd); + return target_put_sess_cmd(cmd); } /** @@ -2337,6 +2305,14 @@ transport_generic_new_cmd(struct se_cmd *cmd) int ret = 0; bool zero_flag = !(cmd->se_cmd_flags & SCF_SCSI_DATA_CDB); + if (cmd->prot_op != TARGET_PROT_NORMAL && + !(cmd->se_cmd_flags & SCF_PASSTHROUGH_PROT_SG_TO_MEM_NOALLOC)) { + ret = target_alloc_sgl(&cmd->t_prot_sg, &cmd->t_prot_nents, + cmd->prot_length, true); + if (ret < 0) + return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + } + /* * Determine is the TCM fabric module has already allocated physical * memory, and is directly calling transport_generic_map_mem_to_cmd() @@ -2362,14 +2338,6 @@ transport_generic_new_cmd(struct se_cmd *cmd) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } - if (cmd->prot_op != TARGET_PROT_NORMAL) { - ret = target_alloc_sgl(&cmd->t_prot_sg, - &cmd->t_prot_nents, - cmd->prot_length, true); - if (ret < 0) - return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - } - ret = target_alloc_sgl(&cmd->t_data_sg, &cmd->t_data_nents, cmd->data_length, zero_flag); if (ret < 0) @@ -2464,13 +2432,12 @@ int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks) EXPORT_SYMBOL(transport_generic_free_cmd); /* target_get_sess_cmd - Add command to active ->sess_cmd_list - * @se_sess: session to reference * @se_cmd: command descriptor to add * @ack_kref: Signal that fabric will perform an ack target_put_sess_cmd() */ -int target_get_sess_cmd(struct se_session *se_sess, struct se_cmd *se_cmd, - bool ack_kref) +int target_get_sess_cmd(struct se_cmd *se_cmd, bool ack_kref) { + struct se_session *se_sess = se_cmd->se_sess; unsigned long flags; int ret = 0; @@ -2492,7 +2459,7 @@ out: spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); if (ret && ack_kref) - target_put_sess_cmd(se_sess, se_cmd); + target_put_sess_cmd(se_cmd); return ret; } @@ -2521,11 +2488,12 @@ static void target_release_cmd_kref(struct kref *kref) } /* target_put_sess_cmd - Check for active I/O shutdown via kref_put - * @se_sess: session to reference * @se_cmd: command descriptor to drop */ -int target_put_sess_cmd(struct se_session *se_sess, struct se_cmd *se_cmd) +int target_put_sess_cmd(struct se_cmd *se_cmd) { + struct se_session *se_sess = se_cmd->se_sess; + if (!se_sess) { se_cmd->se_tfo->release_cmd(se_cmd); return 1; @@ -2591,31 +2559,10 @@ void target_wait_for_sess_cmds(struct se_session *se_sess) } EXPORT_SYMBOL(target_wait_for_sess_cmds); -static int transport_clear_lun_ref_thread(void *p) +void transport_clear_lun_ref(struct se_lun *lun) { - struct se_lun *lun = p; - percpu_ref_kill(&lun->lun_ref); - wait_for_completion(&lun->lun_ref_comp); - complete(&lun->lun_shutdown_comp); - - return 0; -} - -int transport_clear_lun_ref(struct se_lun *lun) -{ - struct task_struct *kt; - - kt = kthread_run(transport_clear_lun_ref_thread, lun, - "tcm_cl_%u", lun->unpacked_lun); - if (IS_ERR(kt)) { - pr_err("Unable to start clear_lun thread\n"); - return PTR_ERR(kt); - } - wait_for_completion(&lun->lun_shutdown_comp); - - return 0; } /** @@ -2649,10 +2596,8 @@ bool transport_wait_for_tasks(struct se_cmd *cmd) cmd->transport_state |= CMD_T_STOP; - pr_debug("wait_for_tasks: Stopping %p ITT: 0x%08x" - " i_state: %d, t_state: %d, CMD_T_STOP\n", - cmd, cmd->se_tfo->get_task_tag(cmd), - cmd->se_tfo->get_cmd_state(cmd), cmd->t_state); + pr_debug("wait_for_tasks: Stopping %p ITT: 0x%08llx i_state: %d, t_state: %d, CMD_T_STOP\n", + cmd, cmd->tag, cmd->se_tfo->get_cmd_state(cmd), cmd->t_state); spin_unlock_irqrestore(&cmd->t_state_lock, flags); @@ -2661,9 +2606,8 @@ bool transport_wait_for_tasks(struct se_cmd *cmd) spin_lock_irqsave(&cmd->t_state_lock, flags); cmd->transport_state &= ~(CMD_T_ACTIVE | CMD_T_STOP); - pr_debug("wait_for_tasks: Stopped wait_for_completion(" - "&cmd->t_transport_stop_comp) for ITT: 0x%08x\n", - cmd->se_tfo->get_task_tag(cmd)); + pr_debug("wait_for_tasks: Stopped wait_for_completion(&cmd->t_transport_stop_comp) for ITT: 0x%08llx\n", + cmd->tag); spin_unlock_irqrestore(&cmd->t_state_lock, flags); @@ -2965,8 +2909,8 @@ int transport_check_aborted_status(struct se_cmd *cmd, int send_status) if (!send_status || !(cmd->se_cmd_flags & SCF_SEND_DELAYED_TAS)) return 1; - pr_debug("Sending delayed SAM_STAT_TASK_ABORTED status for CDB: 0x%02x ITT: 0x%08x\n", - cmd->t_task_cdb[0], cmd->se_tfo->get_task_tag(cmd)); + pr_debug("Sending delayed SAM_STAT_TASK_ABORTED status for CDB: 0x%02x ITT: 0x%08llx\n", + cmd->t_task_cdb[0], cmd->tag); cmd->se_cmd_flags &= ~SCF_SEND_DELAYED_TAS; cmd->scsi_status = SAM_STAT_TASK_ABORTED; @@ -3005,9 +2949,8 @@ void transport_send_task_abort(struct se_cmd *cmd) transport_lun_remove_cmd(cmd); - pr_debug("Setting SAM_STAT_TASK_ABORTED status for CDB: 0x%02x," - " ITT: 0x%08x\n", cmd->t_task_cdb[0], - cmd->se_tfo->get_task_tag(cmd)); + pr_debug("Setting SAM_STAT_TASK_ABORTED status for CDB: 0x%02x, ITT: 0x%08llx\n", + cmd->t_task_cdb[0], cmd->tag); trace_target_cmd_complete(cmd); cmd->se_tfo->queue_status(cmd); @@ -3033,6 +2976,11 @@ static void target_tmr_work(struct work_struct *work) ret = core_tmr_lun_reset(dev, tmr, NULL, NULL); tmr->response = (!ret) ? TMR_FUNCTION_COMPLETE : TMR_FUNCTION_REJECTED; + if (tmr->response == TMR_FUNCTION_COMPLETE) { + target_ua_allocate_lun(cmd->se_sess->se_node_acl, + cmd->orig_fe_lun, 0x29, + ASCQ_29H_BUS_DEVICE_RESET_FUNCTION_OCCURRED); + } break; case TMR_TARGET_WARM_RESET: tmr->response = TMR_FUNCTION_REJECTED; @@ -3067,3 +3015,22 @@ int transport_generic_handle_tmr( return 0; } EXPORT_SYMBOL(transport_generic_handle_tmr); + +bool +target_check_wce(struct se_device *dev) +{ + bool wce = false; + + if (dev->transport->get_write_cache) + wce = dev->transport->get_write_cache(dev); + else if (dev->dev_attrib.emulate_write_cache > 0) + wce = true; + + return wce; +} + +bool +target_check_fua(struct se_device *dev) +{ + return target_check_wce(dev) && dev->dev_attrib.emulate_fua_write > 0; +} diff --git a/drivers/target/target_core_ua.c b/drivers/target/target_core_ua.c index e44cc94b12cb..be25eb807a5f 100644 --- a/drivers/target/target_core_ua.c +++ b/drivers/target/target_core_ua.c @@ -29,7 +29,6 @@ #include <target/target_core_base.h> #include <target/target_core_fabric.h> -#include <target/target_core_configfs.h> #include "target_core_internal.h" #include "target_core_alua.h" @@ -50,9 +49,17 @@ target_scsi3_ua_check(struct se_cmd *cmd) if (!nacl) return 0; - deve = nacl->device_list[cmd->orig_fe_lun]; - if (!atomic_read(&deve->ua_count)) + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, cmd->orig_fe_lun); + if (!deve) { + rcu_read_unlock(); return 0; + } + if (!atomic_read(&deve->ua_count)) { + rcu_read_unlock(); + return 0; + } + rcu_read_unlock(); /* * From sam4r14, section 5.14 Unit attention condition: * @@ -79,18 +86,11 @@ target_scsi3_ua_check(struct se_cmd *cmd) } int core_scsi3_ua_allocate( - struct se_node_acl *nacl, - u32 unpacked_lun, + struct se_dev_entry *deve, u8 asc, u8 ascq) { - struct se_dev_entry *deve; struct se_ua *ua, *ua_p, *ua_tmp; - /* - * PASSTHROUGH OPS - */ - if (!nacl) - return -EINVAL; ua = kmem_cache_zalloc(se_ua_cache, GFP_ATOMIC); if (!ua) { @@ -99,13 +99,9 @@ int core_scsi3_ua_allocate( } INIT_LIST_HEAD(&ua->ua_nacl_list); - ua->ua_nacl = nacl; ua->ua_asc = asc; ua->ua_ascq = ascq; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[unpacked_lun]; - spin_lock(&deve->ua_lock); list_for_each_entry_safe(ua_p, ua_tmp, &deve->ua_list, ua_nacl_list) { /* @@ -113,7 +109,6 @@ int core_scsi3_ua_allocate( */ if ((ua_p->ua_asc == asc) && (ua_p->ua_ascq == ascq)) { spin_unlock(&deve->ua_lock); - spin_unlock_irq(&nacl->device_list_lock); kmem_cache_free(se_ua_cache, ua); return 0; } @@ -158,24 +153,40 @@ int core_scsi3_ua_allocate( list_add_tail(&ua->ua_nacl_list, &deve->ua_list); spin_unlock(&deve->ua_lock); - spin_unlock_irq(&nacl->device_list_lock); atomic_inc_mb(&deve->ua_count); return 0; } list_add_tail(&ua->ua_nacl_list, &deve->ua_list); spin_unlock(&deve->ua_lock); - spin_unlock_irq(&nacl->device_list_lock); - pr_debug("[%s]: Allocated UNIT ATTENTION, mapped LUN: %u, ASC:" - " 0x%02x, ASCQ: 0x%02x\n", - nacl->se_tpg->se_tpg_tfo->get_fabric_name(), unpacked_lun, + pr_debug("Allocated UNIT ATTENTION, mapped LUN: %llu, ASC:" + " 0x%02x, ASCQ: 0x%02x\n", deve->mapped_lun, asc, ascq); atomic_inc_mb(&deve->ua_count); return 0; } +void target_ua_allocate_lun(struct se_node_acl *nacl, + u32 unpacked_lun, u8 asc, u8 ascq) +{ + struct se_dev_entry *deve; + + if (!nacl) + return; + + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, unpacked_lun); + if (!deve) { + rcu_read_unlock(); + return; + } + + core_scsi3_ua_allocate(deve, asc, ascq); + rcu_read_unlock(); +} + void core_scsi3_ua_release_all( struct se_dev_entry *deve) { @@ -210,10 +221,14 @@ void core_scsi3_ua_for_check_condition( if (!nacl) return; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[cmd->orig_fe_lun]; + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, cmd->orig_fe_lun); + if (!deve) { + rcu_read_unlock(); + return; + } if (!atomic_read(&deve->ua_count)) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return; } /* @@ -249,10 +264,10 @@ void core_scsi3_ua_for_check_condition( atomic_dec_mb(&deve->ua_count); } spin_unlock(&deve->ua_lock); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); pr_debug("[%s]: %s UNIT ATTENTION condition with" - " INTLCK_CTRL: %d, mapped LUN: %u, got CDB: 0x%02x" + " INTLCK_CTRL: %d, mapped LUN: %llu, got CDB: 0x%02x" " reported ASC: 0x%02x, ASCQ: 0x%02x\n", nacl->se_tpg->se_tpg_tfo->get_fabric_name(), (dev->dev_attrib.emulate_ua_intlck_ctrl != 0) ? "Reporting" : @@ -278,10 +293,14 @@ int core_scsi3_ua_clear_for_request_sense( if (!nacl) return -EINVAL; - spin_lock_irq(&nacl->device_list_lock); - deve = nacl->device_list[cmd->orig_fe_lun]; + rcu_read_lock(); + deve = target_nacl_find_deve(nacl, cmd->orig_fe_lun); + if (!deve) { + rcu_read_unlock(); + return -EINVAL; + } if (!atomic_read(&deve->ua_count)) { - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); return -EPERM; } /* @@ -307,10 +326,10 @@ int core_scsi3_ua_clear_for_request_sense( atomic_dec_mb(&deve->ua_count); } spin_unlock(&deve->ua_lock); - spin_unlock_irq(&nacl->device_list_lock); + rcu_read_unlock(); pr_debug("[%s]: Released UNIT ATTENTION condition, mapped" - " LUN: %u, got REQUEST_SENSE reported ASC: 0x%02x," + " LUN: %llu, got REQUEST_SENSE reported ASC: 0x%02x," " ASCQ: 0x%02x\n", nacl->se_tpg->se_tpg_tfo->get_fabric_name(), cmd->orig_fe_lun, *asc, *ascq); diff --git a/drivers/target/target_core_ua.h b/drivers/target/target_core_ua.h index a6b56b364e7a..bd6e78ba153d 100644 --- a/drivers/target/target_core_ua.h +++ b/drivers/target/target_core_ua.h @@ -25,10 +25,14 @@ #define ASCQ_2CH_PREVIOUS_RESERVATION_CONFLICT_STATUS 0x09 +#define ASCQ_3FH_INQUIRY_DATA_HAS_CHANGED 0x03 +#define ASCQ_3FH_REPORTED_LUNS_DATA_HAS_CHANGED 0x0E + extern struct kmem_cache *se_ua_cache; extern sense_reason_t target_scsi3_ua_check(struct se_cmd *); -extern int core_scsi3_ua_allocate(struct se_node_acl *, u32, u8, u8); +extern int core_scsi3_ua_allocate(struct se_dev_entry *, u8, u8); +extern void target_ua_allocate_lun(struct se_node_acl *, u32, u8, u8); extern void core_scsi3_ua_release_all(struct se_dev_entry *); extern void core_scsi3_ua_for_check_condition(struct se_cmd *, u8 *, u8 *); extern int core_scsi3_ua_clear_for_request_sense(struct se_cmd *, diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 549af9847c28..c448ef421ce7 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1,6 +1,7 @@ /* * Copyright (C) 2013 Shaohua Li <shli@kernel.org> * Copyright (C) 2014 Red Hat, Inc. + * Copyright (C) 2015 Arrikto, Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -30,7 +31,6 @@ #include <target/target_core_base.h> #include <target/target_core_fabric.h> #include <target/target_core_backend.h> -#include <target/target_core_backend_configfs.h> #include <linux/target_core_user.h> @@ -168,6 +168,11 @@ static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd) tcmu_cmd->tcmu_dev = udev; tcmu_cmd->data_length = se_cmd->data_length; + if (se_cmd->se_cmd_flags & SCF_BIDI) { + BUG_ON(!(se_cmd->t_bidi_data_sg && se_cmd->t_bidi_data_nents)); + tcmu_cmd->data_length += se_cmd->t_bidi_data_sg->length; + } + tcmu_cmd->deadline = jiffies + msecs_to_jiffies(TCMU_TIME_OUT); idr_preload(GFP_KERNEL); @@ -226,9 +231,106 @@ static inline size_t head_to_end(size_t head, size_t size) #define UPDATE_HEAD(head, used, size) smp_store_release(&head, ((head % size) + used) % size) +static void alloc_and_scatter_data_area(struct tcmu_dev *udev, + struct scatterlist *data_sg, unsigned int data_nents, + struct iovec **iov, int *iov_cnt, bool copy_data) +{ + int i; + void *from, *to; + size_t copy_bytes; + struct scatterlist *sg; + + for_each_sg(data_sg, sg, data_nents, i) { + copy_bytes = min_t(size_t, sg->length, + head_to_end(udev->data_head, udev->data_size)); + from = kmap_atomic(sg_page(sg)) + sg->offset; + to = (void *) udev->mb_addr + udev->data_off + udev->data_head; + + if (copy_data) { + memcpy(to, from, copy_bytes); + tcmu_flush_dcache_range(to, copy_bytes); + } + + /* Even iov_base is relative to mb_addr */ + (*iov)->iov_len = copy_bytes; + (*iov)->iov_base = (void __user *) udev->data_off + + udev->data_head; + (*iov_cnt)++; + (*iov)++; + + UPDATE_HEAD(udev->data_head, copy_bytes, udev->data_size); + + /* Uh oh, we wrapped the buffer. Must split sg across 2 iovs. */ + if (sg->length != copy_bytes) { + void *from_skip = from + copy_bytes; + + copy_bytes = sg->length - copy_bytes; + + (*iov)->iov_len = copy_bytes; + (*iov)->iov_base = (void __user *) udev->data_off + + udev->data_head; + + if (copy_data) { + to = (void *) udev->mb_addr + + udev->data_off + udev->data_head; + memcpy(to, from_skip, copy_bytes); + tcmu_flush_dcache_range(to, copy_bytes); + } + + (*iov_cnt)++; + (*iov)++; + + UPDATE_HEAD(udev->data_head, + copy_bytes, udev->data_size); + } + + kunmap_atomic(from - sg->offset); + } +} + +static void gather_and_free_data_area(struct tcmu_dev *udev, + struct scatterlist *data_sg, unsigned int data_nents) +{ + int i; + void *from, *to; + size_t copy_bytes; + struct scatterlist *sg; + + /* It'd be easier to look at entry's iovec again, but UAM */ + for_each_sg(data_sg, sg, data_nents, i) { + copy_bytes = min_t(size_t, sg->length, + head_to_end(udev->data_tail, udev->data_size)); + + to = kmap_atomic(sg_page(sg)) + sg->offset; + WARN_ON(sg->length + sg->offset > PAGE_SIZE); + from = (void *) udev->mb_addr + + udev->data_off + udev->data_tail; + tcmu_flush_dcache_range(from, copy_bytes); + memcpy(to, from, copy_bytes); + + UPDATE_HEAD(udev->data_tail, copy_bytes, udev->data_size); + + /* Uh oh, wrapped the data buffer for this sg's data */ + if (sg->length != copy_bytes) { + void *to_skip = to + copy_bytes; + + from = (void *) udev->mb_addr + + udev->data_off + udev->data_tail; + WARN_ON(udev->data_tail); + copy_bytes = sg->length - copy_bytes; + tcmu_flush_dcache_range(from, copy_bytes); + memcpy(to_skip, from, copy_bytes); + + UPDATE_HEAD(udev->data_tail, + copy_bytes, udev->data_size); + } + kunmap_atomic(to - sg->offset); + } +} + /* - * We can't queue a command until we have space available on the cmd ring *and* space - * space avail on the data ring. + * We can't queue a command until we have space available on the cmd ring *and* + * space available on the data ring. * * Called with ring lock held. */ @@ -276,12 +378,11 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) size_t base_command_size, command_size; struct tcmu_mailbox *mb; struct tcmu_cmd_entry *entry; - int i; - struct scatterlist *sg; struct iovec *iov; - int iov_cnt = 0; + int iov_cnt; uint32_t cmd_head; uint64_t cdb_off; + bool copy_to_data_area; if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags)) return -EINVAL; @@ -294,7 +395,8 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) * b/c size == offsetof one-past-element. */ base_command_size = max(offsetof(struct tcmu_cmd_entry, - req.iov[se_cmd->t_data_nents + 2]), + req.iov[se_cmd->t_bidi_data_nents + + se_cmd->t_data_nents + 2]), sizeof(struct tcmu_cmd_entry)); command_size = base_command_size + round_up(scsi_command_size(se_cmd->t_task_cdb), TCMU_OP_ALIGN_SIZE); @@ -362,53 +464,20 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) * Fix up iovecs, and handle if allocation in data ring wrapped. */ iov = &entry->req.iov[0]; - for_each_sg(se_cmd->t_data_sg, sg, se_cmd->t_data_nents, i) { - size_t copy_bytes = min((size_t)sg->length, - head_to_end(udev->data_head, udev->data_size)); - void *from = kmap_atomic(sg_page(sg)) + sg->offset; - void *to = (void *) mb + udev->data_off + udev->data_head; - - if (tcmu_cmd->se_cmd->data_direction == DMA_TO_DEVICE) { - memcpy(to, from, copy_bytes); - tcmu_flush_dcache_range(to, copy_bytes); - } - - /* Even iov_base is relative to mb_addr */ - iov->iov_len = copy_bytes; - iov->iov_base = (void __user *) udev->data_off + - udev->data_head; - iov_cnt++; - iov++; - - UPDATE_HEAD(udev->data_head, copy_bytes, udev->data_size); - - /* Uh oh, we wrapped the buffer. Must split sg across 2 iovs. */ - if (sg->length != copy_bytes) { - from += copy_bytes; - copy_bytes = sg->length - copy_bytes; - - iov->iov_len = copy_bytes; - iov->iov_base = (void __user *) udev->data_off + - udev->data_head; - - if (se_cmd->data_direction == DMA_TO_DEVICE) { - to = (void *) mb + udev->data_off + udev->data_head; - memcpy(to, from, copy_bytes); - tcmu_flush_dcache_range(to, copy_bytes); - } - - iov_cnt++; - iov++; - - UPDATE_HEAD(udev->data_head, copy_bytes, udev->data_size); - } - - kunmap_atomic(from); - } + iov_cnt = 0; + copy_to_data_area = (se_cmd->data_direction == DMA_TO_DEVICE + || se_cmd->se_cmd_flags & SCF_BIDI); + alloc_and_scatter_data_area(udev, se_cmd->t_data_sg, + se_cmd->t_data_nents, &iov, &iov_cnt, copy_to_data_area); entry->req.iov_cnt = iov_cnt; - entry->req.iov_bidi_cnt = 0; entry->req.iov_dif_cnt = 0; + /* Handle BIDI commands */ + iov_cnt = 0; + alloc_and_scatter_data_area(udev, se_cmd->t_bidi_data_sg, + se_cmd->t_bidi_data_nents, &iov, &iov_cnt, false); + entry->req.iov_bidi_cnt = iov_cnt; + /* All offsets relative to mb_addr, not start of entry! */ cdb_off = CMDR_OFF + cmd_head + base_command_size; memcpy((void *) mb + cdb_off, se_cmd->t_task_cdb, scsi_command_size(se_cmd->t_task_cdb)); @@ -481,47 +550,22 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry * se_cmd->scsi_sense_length); UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size); - } - else if (se_cmd->data_direction == DMA_FROM_DEVICE) { - struct scatterlist *sg; - int i; - - /* It'd be easier to look at entry's iovec again, but UAM */ - for_each_sg(se_cmd->t_data_sg, sg, se_cmd->t_data_nents, i) { - size_t copy_bytes; - void *to; - void *from; - - copy_bytes = min((size_t)sg->length, - head_to_end(udev->data_tail, udev->data_size)); - - to = kmap_atomic(sg_page(sg)) + sg->offset; - WARN_ON(sg->length + sg->offset > PAGE_SIZE); - from = (void *) udev->mb_addr + udev->data_off + udev->data_tail; - tcmu_flush_dcache_range(from, copy_bytes); - memcpy(to, from, copy_bytes); - - UPDATE_HEAD(udev->data_tail, copy_bytes, udev->data_size); - - /* Uh oh, wrapped the data buffer for this sg's data */ - if (sg->length != copy_bytes) { - from = (void *) udev->mb_addr + udev->data_off + udev->data_tail; - WARN_ON(udev->data_tail); - to += copy_bytes; - copy_bytes = sg->length - copy_bytes; - tcmu_flush_dcache_range(from, copy_bytes); - memcpy(to, from, copy_bytes); - - UPDATE_HEAD(udev->data_tail, copy_bytes, udev->data_size); - } - - kunmap_atomic(to); - } - + } else if (se_cmd->se_cmd_flags & SCF_BIDI) { + /* Discard data_out buffer */ + UPDATE_HEAD(udev->data_tail, + (size_t)se_cmd->t_data_sg->length, udev->data_size); + + /* Get Data-In buffer */ + gather_and_free_data_area(udev, + se_cmd->t_bidi_data_sg, se_cmd->t_bidi_data_nents); + } else if (se_cmd->data_direction == DMA_FROM_DEVICE) { + gather_and_free_data_area(udev, + se_cmd->t_data_sg, se_cmd->t_data_nents); } else if (se_cmd->data_direction == DMA_TO_DEVICE) { UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size); - } else { - pr_warn("TCMU: data direction was %d!\n", se_cmd->data_direction); + } else if (se_cmd->data_direction != DMA_NONE) { + pr_warn("TCMU: data direction was %d!\n", + se_cmd->data_direction); } target_complete_cmd(cmd->se_cmd, entry->rsp.scsi_status); @@ -910,6 +954,14 @@ static int tcmu_check_pending_cmd(int id, void *p, void *data) return -EINVAL; } +static void tcmu_dev_call_rcu(struct rcu_head *p) +{ + struct se_device *dev = container_of(p, struct se_device, rcu_head); + struct tcmu_dev *udev = TCMU_DEV(dev); + + kfree(udev); +} + static void tcmu_free_device(struct se_device *dev) { struct tcmu_dev *udev = TCMU_DEV(dev); @@ -935,8 +987,7 @@ static void tcmu_free_device(struct se_device *dev) kfree(udev->uio_info.name); kfree(udev->name); } - - kfree(udev); + call_rcu(&dev->rcu_head, tcmu_dev_call_rcu); } enum { @@ -1054,27 +1105,7 @@ tcmu_parse_cdb(struct se_cmd *cmd) return passthrough_parse_cdb(cmd, tcmu_pass_op); } -DEF_TB_DEV_ATTRIB_RO(tcmu, hw_pi_prot_type); -TB_DEV_ATTR_RO(tcmu, hw_pi_prot_type); - -DEF_TB_DEV_ATTRIB_RO(tcmu, hw_block_size); -TB_DEV_ATTR_RO(tcmu, hw_block_size); - -DEF_TB_DEV_ATTRIB_RO(tcmu, hw_max_sectors); -TB_DEV_ATTR_RO(tcmu, hw_max_sectors); - -DEF_TB_DEV_ATTRIB_RO(tcmu, hw_queue_depth); -TB_DEV_ATTR_RO(tcmu, hw_queue_depth); - -static struct configfs_attribute *tcmu_backend_dev_attrs[] = { - &tcmu_dev_attrib_hw_pi_prot_type.attr, - &tcmu_dev_attrib_hw_block_size.attr, - &tcmu_dev_attrib_hw_max_sectors.attr, - &tcmu_dev_attrib_hw_queue_depth.attr, - NULL, -}; - -static struct se_subsystem_api tcmu_template = { +static const struct target_backend_ops tcmu_ops = { .name = "user", .inquiry_prod = "USER", .inquiry_rev = TCMU_VERSION, @@ -1090,11 +1121,11 @@ static struct se_subsystem_api tcmu_template = { .show_configfs_dev_params = tcmu_show_configfs_dev_params, .get_device_type = sbc_get_device_type, .get_blocks = tcmu_get_blocks, + .tb_dev_attrib_attrs = passthrough_attrib_attrs, }; static int __init tcmu_module_init(void) { - struct target_backend_cits *tbc = &tcmu_template.tb_cits; int ret; BUILD_BUG_ON((sizeof(struct tcmu_cmd_entry) % TCMU_OP_ALIGN_SIZE) != 0); @@ -1117,10 +1148,7 @@ static int __init tcmu_module_init(void) goto out_unreg_device; } - target_core_setup_sub_cits(&tcmu_template); - tbc->tb_dev_attrib_cit.ct_attrs = tcmu_backend_dev_attrs; - - ret = transport_subsystem_register(&tcmu_template); + ret = transport_backend_register(&tcmu_ops); if (ret) goto out_unreg_genl; @@ -1138,7 +1166,7 @@ out_free_cache: static void __exit tcmu_module_exit(void) { - transport_subsystem_release(&tcmu_template); + target_backend_unregister(&tcmu_ops); genl_unregister_family(&tcmu_genl_family); root_device_unregister(tcmu_root_device); kmem_cache_destroy(tcmu_cmd_cache); diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 5ec0d00edaa3..4515f52546f8 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -31,7 +31,6 @@ #include <target/target_core_base.h> #include <target/target_core_backend.h> #include <target/target_core_fabric.h> -#include <target/target_core_configfs.h> #include "target_core_internal.h" #include "target_core_pr.h" @@ -348,8 +347,7 @@ struct xcopy_pt_cmd { unsigned char sense_buffer[TRANSPORT_SENSE_BUFFER]; }; -static struct se_port xcopy_pt_port; -static struct se_portal_group xcopy_pt_tpg; +struct se_portal_group xcopy_pt_tpg; static struct se_session xcopy_pt_sess; static struct se_node_acl xcopy_pt_nacl; @@ -358,11 +356,6 @@ static char *xcopy_pt_get_fabric_name(void) return "xcopy-pt"; } -static u32 xcopy_pt_get_tag(struct se_cmd *se_cmd) -{ - return 0; -} - static int xcopy_pt_get_cmd_state(struct se_cmd *se_cmd) { return 0; @@ -423,7 +416,6 @@ static int xcopy_pt_queue_status(struct se_cmd *se_cmd) static const struct target_core_fabric_ops xcopy_pt_tfo = { .get_fabric_name = xcopy_pt_get_fabric_name, - .get_task_tag = xcopy_pt_get_tag, .get_cmd_state = xcopy_pt_get_cmd_state, .release_cmd = xcopy_pt_release_cmd, .check_stop_free = xcopy_pt_check_stop_free, @@ -445,17 +437,11 @@ int target_xcopy_setup_pt(void) return -ENOMEM; } - memset(&xcopy_pt_port, 0, sizeof(struct se_port)); - INIT_LIST_HEAD(&xcopy_pt_port.sep_alua_list); - INIT_LIST_HEAD(&xcopy_pt_port.sep_list); - mutex_init(&xcopy_pt_port.sep_tg_pt_md_mutex); - memset(&xcopy_pt_tpg, 0, sizeof(struct se_portal_group)); INIT_LIST_HEAD(&xcopy_pt_tpg.se_tpg_node); INIT_LIST_HEAD(&xcopy_pt_tpg.acl_node_list); INIT_LIST_HEAD(&xcopy_pt_tpg.tpg_sess_list); - xcopy_pt_port.sep_tpg = &xcopy_pt_tpg; xcopy_pt_tpg.se_tpg_tfo = &xcopy_pt_tfo; memset(&xcopy_pt_nacl, 0, sizeof(struct se_node_acl)); @@ -496,10 +482,6 @@ static void target_xcopy_setup_pt_port( */ if (remote_port) { xpt_cmd->remote_port = remote_port; - pt_cmd->se_lun->lun_sep = &xcopy_pt_port; - pr_debug("Setup emulated remote DEST xcopy_pt_port: %p to" - " cmd->se_lun->lun_sep for X-COPY data PUSH\n", - pt_cmd->se_lun->lun_sep); } else { pt_cmd->se_lun = ec_cmd->se_lun; pt_cmd->se_dev = ec_cmd->se_dev; @@ -519,10 +501,6 @@ static void target_xcopy_setup_pt_port( */ if (remote_port) { xpt_cmd->remote_port = remote_port; - pt_cmd->se_lun->lun_sep = &xcopy_pt_port; - pr_debug("Setup emulated remote SRC xcopy_pt_port: %p to" - " cmd->se_lun->lun_sep for X-COPY data PULL\n", - pt_cmd->se_lun->lun_sep); } else { pt_cmd->se_lun = ec_cmd->se_lun; pt_cmd->se_dev = ec_cmd->se_dev; @@ -574,6 +552,7 @@ static int target_xcopy_setup_pt_cmd( xpt_cmd->xcopy_op = xop; target_xcopy_setup_pt_port(xpt_cmd, xop, remote_port); + cmd->tag = 0; sense_rc = target_setup_cmd_from_cdb(cmd, cdb); if (sense_rc) { ret = -EINVAL; diff --git a/drivers/target/tcm_fc/tcm_fc.h b/drivers/target/tcm_fc/tcm_fc.h index 881deb3d499a..39909dadef3e 100644 --- a/drivers/target/tcm_fc/tcm_fc.h +++ b/drivers/target/tcm_fc/tcm_fc.h @@ -80,8 +80,8 @@ struct ft_node_auth { * Node ACL for FC remote port session. */ struct ft_node_acl { - struct ft_node_auth node_auth; struct se_node_acl se_node_acl; + struct ft_node_auth node_auth; }; struct ft_lun { @@ -157,7 +157,6 @@ int ft_queue_status(struct se_cmd *); int ft_queue_data_in(struct se_cmd *); int ft_write_pending(struct se_cmd *); int ft_write_pending_status(struct se_cmd *); -u32 ft_get_task_tag(struct se_cmd *); int ft_get_cmd_state(struct se_cmd *); void ft_queue_tm_resp(struct se_cmd *); void ft_aborted_task(struct se_cmd *); diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c index 1bf78e7c994c..68031723e5be 100644 --- a/drivers/target/tcm_fc/tfc_cmd.c +++ b/drivers/target/tcm_fc/tfc_cmd.c @@ -36,7 +36,6 @@ #include <target/target_core_base.h> #include <target/target_core_fabric.h> -#include <target/target_core_configfs.h> #include <target/configfs_macros.h> #include "tcm_fc.h" @@ -243,15 +242,6 @@ int ft_write_pending(struct se_cmd *se_cmd) return 0; } -u32 ft_get_task_tag(struct se_cmd *se_cmd) -{ - struct ft_cmd *cmd = container_of(se_cmd, struct ft_cmd, se_cmd); - - if (cmd->aborted) - return ~0; - return fc_seq_exch(cmd->seq)->rxid; -} - int ft_get_cmd_state(struct se_cmd *se_cmd) { return 0; @@ -564,6 +554,7 @@ static void ft_send_work(struct work_struct *work) } fc_seq_exch(cmd->seq)->lp->tt.seq_set_resp(cmd->seq, ft_recv_seq, cmd); + cmd->se_cmd.tag = fc_seq_exch(cmd->seq)->rxid; /* * Use a single se_cmd->cmd_kref as we expect to release se_cmd * directly from ft_check_stop_free callback in response path. diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c index 86b699b94c7b..16670933013b 100644 --- a/drivers/target/tcm_fc/tfc_conf.c +++ b/drivers/target/tcm_fc/tfc_conf.c @@ -39,13 +39,10 @@ #include <target/target_core_base.h> #include <target/target_core_fabric.h> #include <target/target_core_fabric_configfs.h> -#include <target/target_core_configfs.h> #include <target/configfs_macros.h> #include "tcm_fc.h" -static const struct target_core_fabric_ops ft_fabric_ops; - static LIST_HEAD(ft_wwn_list); DEFINE_MUTEX(ft_lport_lock); @@ -194,48 +191,17 @@ static struct configfs_attribute *ft_nacl_base_attrs[] = { * Add ACL for an initiator. The ACL is named arbitrarily. * The port_name and/or node_name are attributes. */ -static struct se_node_acl *ft_add_acl( - struct se_portal_group *se_tpg, - struct config_group *group, - const char *name) +static int ft_init_nodeacl(struct se_node_acl *nacl, const char *name) { - struct ft_node_acl *acl; - struct ft_tpg *tpg; + struct ft_node_acl *acl = + container_of(nacl, struct ft_node_acl, se_node_acl); u64 wwpn; - u32 q_depth; - - pr_debug("add acl %s\n", name); - tpg = container_of(se_tpg, struct ft_tpg, se_tpg); if (ft_parse_wwn(name, &wwpn, 1) < 0) - return ERR_PTR(-EINVAL); + return -EINVAL; - acl = kzalloc(sizeof(struct ft_node_acl), GFP_KERNEL); - if (!acl) - return ERR_PTR(-ENOMEM); acl->node_auth.port_name = wwpn; - - q_depth = 32; /* XXX bogus default - get from tpg? */ - return core_tpg_add_initiator_node_acl(&tpg->se_tpg, - &acl->se_node_acl, name, q_depth); -} - -static void ft_del_acl(struct se_node_acl *se_acl) -{ - struct se_portal_group *se_tpg = se_acl->se_tpg; - struct ft_tpg *tpg; - struct ft_node_acl *acl = container_of(se_acl, - struct ft_node_acl, se_node_acl); - - pr_debug("del acl %s\n", - config_item_name(&se_acl->acl_group.cg_item)); - - tpg = container_of(se_tpg, struct ft_tpg, se_tpg); - pr_debug("del acl %p se_acl %p tpg %p se_tpg %p\n", - acl, se_acl, tpg, &tpg->se_tpg); - - core_tpg_del_initiator_node_acl(&tpg->se_tpg, se_acl, 1); - kfree(acl); + return 0; } struct ft_node_acl *ft_acl_get(struct ft_tpg *tpg, struct fc_rport_priv *rdata) @@ -245,7 +211,7 @@ struct ft_node_acl *ft_acl_get(struct ft_tpg *tpg, struct fc_rport_priv *rdata) struct se_portal_group *se_tpg = &tpg->se_tpg; struct se_node_acl *se_acl; - spin_lock_irq(&se_tpg->acl_node_lock); + mutex_lock(&se_tpg->acl_node_mutex); list_for_each_entry(se_acl, &se_tpg->acl_node_list, acl_list) { acl = container_of(se_acl, struct ft_node_acl, se_node_acl); pr_debug("acl %p port_name %llx\n", @@ -259,33 +225,10 @@ struct ft_node_acl *ft_acl_get(struct ft_tpg *tpg, struct fc_rport_priv *rdata) break; } } - spin_unlock_irq(&se_tpg->acl_node_lock); + mutex_unlock(&se_tpg->acl_node_mutex); return found; } -static struct se_node_acl *ft_tpg_alloc_fabric_acl(struct se_portal_group *se_tpg) -{ - struct ft_node_acl *acl; - - acl = kzalloc(sizeof(*acl), GFP_KERNEL); - if (!acl) { - pr_err("Unable to allocate struct ft_node_acl\n"); - return NULL; - } - pr_debug("acl %p\n", acl); - return &acl->se_node_acl; -} - -static void ft_tpg_release_fabric_acl(struct se_portal_group *se_tpg, - struct se_node_acl *se_acl) -{ - struct ft_node_acl *acl = container_of(se_acl, - struct ft_node_acl, se_node_acl); - - pr_debug("acl %p\n", acl); - kfree(acl); -} - /* * local_port port_group (tpg) ops. */ @@ -333,8 +276,7 @@ static struct se_portal_group *ft_add_tpg( return NULL; } - ret = core_tpg_register(&ft_fabric_ops, wwn, &tpg->se_tpg, - tpg, TRANSPORT_TPG_TYPE_NORMAL); + ret = core_tpg_register(wwn, &tpg->se_tpg, SCSI_PROTOCOL_FCP); if (ret < 0) { destroy_workqueue(wq); kfree(tpg); @@ -459,6 +401,11 @@ static struct configfs_attribute *ft_wwn_attrs[] = { NULL, }; +static inline struct ft_tpg *ft_tpg(struct se_portal_group *se_tpg) +{ + return container_of(se_tpg, struct ft_tpg, se_tpg); +} + static char *ft_get_fabric_name(void) { return "fc"; @@ -466,25 +413,16 @@ static char *ft_get_fabric_name(void) static char *ft_get_fabric_wwn(struct se_portal_group *se_tpg) { - struct ft_tpg *tpg = se_tpg->se_tpg_fabric_ptr; - - return tpg->lport_wwn->name; + return ft_tpg(se_tpg)->lport_wwn->name; } static u16 ft_get_tag(struct se_portal_group *se_tpg) { - struct ft_tpg *tpg = se_tpg->se_tpg_fabric_ptr; - /* * This tag is used when forming SCSI Name identifier in EVPD=1 0x83 * to represent the SCSI Target Port. */ - return tpg->index; -} - -static u32 ft_get_default_depth(struct se_portal_group *se_tpg) -{ - return 1; + return ft_tpg(se_tpg)->index; } static int ft_check_false(struct se_portal_group *se_tpg) @@ -498,28 +436,20 @@ static void ft_set_default_node_attr(struct se_node_acl *se_nacl) static u32 ft_tpg_get_inst_index(struct se_portal_group *se_tpg) { - struct ft_tpg *tpg = se_tpg->se_tpg_fabric_ptr; - - return tpg->index; + return ft_tpg(se_tpg)->index; } static const struct target_core_fabric_ops ft_fabric_ops = { .module = THIS_MODULE, .name = "fc", + .node_acl_size = sizeof(struct ft_node_acl), .get_fabric_name = ft_get_fabric_name, - .get_fabric_proto_ident = fc_get_fabric_proto_ident, .tpg_get_wwn = ft_get_fabric_wwn, .tpg_get_tag = ft_get_tag, - .tpg_get_default_depth = ft_get_default_depth, - .tpg_get_pr_transport_id = fc_get_pr_transport_id, - .tpg_get_pr_transport_id_len = fc_get_pr_transport_id_len, - .tpg_parse_pr_out_transport_id = fc_parse_pr_out_transport_id, .tpg_check_demo_mode = ft_check_false, .tpg_check_demo_mode_cache = ft_check_false, .tpg_check_demo_mode_write_protect = ft_check_false, .tpg_check_prod_mode_write_protect = ft_check_false, - .tpg_alloc_fabric_acl = ft_tpg_alloc_fabric_acl, - .tpg_release_fabric_acl = ft_tpg_release_fabric_acl, .tpg_get_inst_index = ft_tpg_get_inst_index, .check_stop_free = ft_check_stop_free, .release_cmd = ft_release_cmd, @@ -530,7 +460,6 @@ static const struct target_core_fabric_ops ft_fabric_ops = { .write_pending = ft_write_pending, .write_pending_status = ft_write_pending_status, .set_default_node_attributes = ft_set_default_node_attr, - .get_task_tag = ft_get_task_tag, .get_cmd_state = ft_get_cmd_state, .queue_data_in = ft_queue_data_in, .queue_status = ft_queue_status, @@ -544,12 +473,7 @@ static const struct target_core_fabric_ops ft_fabric_ops = { .fabric_drop_wwn = &ft_del_wwn, .fabric_make_tpg = &ft_add_tpg, .fabric_drop_tpg = &ft_del_tpg, - .fabric_post_link = NULL, - .fabric_pre_unlink = NULL, - .fabric_make_np = NULL, - .fabric_drop_np = NULL, - .fabric_make_nodeacl = &ft_add_acl, - .fabric_drop_nodeacl = &ft_del_acl, + .fabric_init_nodeacl = &ft_init_nodeacl, .tfc_wwn_attrs = ft_wwn_attrs, .tfc_tpg_nacl_base_attrs = ft_nacl_base_attrs, diff --git a/drivers/target/tcm_fc/tfc_io.c b/drivers/target/tcm_fc/tfc_io.c index fe585d1cce23..4b0fedd6bd4b 100644 --- a/drivers/target/tcm_fc/tfc_io.c +++ b/drivers/target/tcm_fc/tfc_io.c @@ -44,7 +44,6 @@ #include <target/target_core_base.h> #include <target/target_core_fabric.h> -#include <target/target_core_configfs.h> #include <target/configfs_macros.h> #include "tcm_fc.h" diff --git a/drivers/target/tcm_fc/tfc_sess.c b/drivers/target/tcm_fc/tfc_sess.c index f2a616d4f2c4..31a9e3fb98c5 100644 --- a/drivers/target/tcm_fc/tfc_sess.c +++ b/drivers/target/tcm_fc/tfc_sess.c @@ -36,7 +36,6 @@ #include <target/target_core_base.h> #include <target/target_core_fabric.h> -#include <target/target_core_configfs.h> #include <target/configfs_macros.h> #include "tcm_fc.h" diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 978204333c94..d75a66c72750 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -22,6 +22,7 @@ #include <linux/pm_runtime.h> #include <linux/console.h> #include <linux/pm_qos.h> +#include <linux/pm_wakeirq.h> #include <linux/dma-mapping.h> #include "8250.h" @@ -552,17 +553,6 @@ static void omap8250_uart_qos_work(struct work_struct *work) pm_qos_update_request(&priv->pm_qos_request, priv->latency); } -static irqreturn_t omap_wake_irq(int irq, void *dev_id) -{ - struct uart_port *port = dev_id; - int ret; - - ret = port->handle_irq(port); - if (ret) - return IRQ_HANDLED; - return IRQ_NONE; -} - #ifdef CONFIG_SERIAL_8250_DMA static int omap_8250_dma_handle_irq(struct uart_port *port); #endif @@ -596,11 +586,9 @@ static int omap_8250_startup(struct uart_port *port) int ret; if (priv->wakeirq) { - ret = request_irq(priv->wakeirq, omap_wake_irq, - port->irqflags, "uart wakeup irq", port); + ret = dev_pm_set_dedicated_wake_irq(port->dev, priv->wakeirq); if (ret) return ret; - disable_irq(priv->wakeirq); } pm_runtime_get_sync(port->dev); @@ -649,8 +637,7 @@ static int omap_8250_startup(struct uart_port *port) err: pm_runtime_mark_last_busy(port->dev); pm_runtime_put_autosuspend(port->dev); - if (priv->wakeirq) - free_irq(priv->wakeirq, port); + dev_pm_clear_wake_irq(port->dev); return ret; } @@ -682,10 +669,8 @@ static void omap_8250_shutdown(struct uart_port *port) pm_runtime_mark_last_busy(port->dev); pm_runtime_put_autosuspend(port->dev); - free_irq(port->irq, port); - if (priv->wakeirq) - free_irq(priv->wakeirq, port); + dev_pm_clear_wake_irq(port->dev); } static void omap_8250_throttle(struct uart_port *port) @@ -1226,31 +1211,6 @@ static int omap8250_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM - -static inline void omap8250_enable_wakeirq(struct omap8250_priv *priv, - bool enable) -{ - if (!priv->wakeirq) - return; - - if (enable) - enable_irq(priv->wakeirq); - else - disable_irq_nosync(priv->wakeirq); -} - -static void omap8250_enable_wakeup(struct omap8250_priv *priv, - bool enable) -{ - if (enable == priv->wakeups_enabled) - return; - - omap8250_enable_wakeirq(priv, enable); - priv->wakeups_enabled = enable; -} -#endif - #ifdef CONFIG_PM_SLEEP static int omap8250_prepare(struct device *dev) { @@ -1277,11 +1237,6 @@ static int omap8250_suspend(struct device *dev) serial8250_suspend_port(priv->line); flush_work(&priv->qos_work); - - if (device_may_wakeup(dev)) - omap8250_enable_wakeup(priv, true); - else - omap8250_enable_wakeup(priv, false); return 0; } @@ -1289,9 +1244,6 @@ static int omap8250_resume(struct device *dev) { struct omap8250_priv *priv = dev_get_drvdata(dev); - if (device_may_wakeup(dev)) - omap8250_enable_wakeup(priv, false); - serial8250_resume_port(priv->line); return 0; } @@ -1333,7 +1285,6 @@ static int omap8250_runtime_suspend(struct device *dev) return -EBUSY; } - omap8250_enable_wakeup(priv, true); if (up->dma) omap_8250_rx_dma(up, UART_IIR_RX_TIMEOUT); @@ -1354,7 +1305,6 @@ static int omap8250_runtime_resume(struct device *dev) return 0; up = serial8250_get_port(priv->line); - omap8250_enable_wakeup(priv, false); loss_cntx = omap8250_lost_context(up); if (loss_cntx) diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c index 7f49172ccd86..7a2172b5e93c 100644 --- a/drivers/tty/serial/omap-serial.c +++ b/drivers/tty/serial/omap-serial.c @@ -38,6 +38,7 @@ #include <linux/serial_core.h> #include <linux/irq.h> #include <linux/pm_runtime.h> +#include <linux/pm_wakeirq.h> #include <linux/of.h> #include <linux/of_irq.h> #include <linux/gpio.h> @@ -160,7 +161,6 @@ struct uart_omap_port { unsigned long port_activity; int context_loss_cnt; u32 errata; - u8 wakeups_enabled; u32 features; int rts_gpio; @@ -209,28 +209,11 @@ static int serial_omap_get_context_loss_count(struct uart_omap_port *up) return pdata->get_context_loss_count(up->dev); } -static inline void serial_omap_enable_wakeirq(struct uart_omap_port *up, - bool enable) -{ - if (!up->wakeirq) - return; - - if (enable) - enable_irq(up->wakeirq); - else - disable_irq_nosync(up->wakeirq); -} - +/* REVISIT: Remove this when omap3 boots in device tree only mode */ static void serial_omap_enable_wakeup(struct uart_omap_port *up, bool enable) { struct omap_uart_port_info *pdata = dev_get_platdata(up->dev); - if (enable == up->wakeups_enabled) - return; - - serial_omap_enable_wakeirq(up, enable); - up->wakeups_enabled = enable; - if (!pdata || !pdata->enable_wakeup) return; @@ -750,13 +733,11 @@ static int serial_omap_startup(struct uart_port *port) /* Optional wake-up IRQ */ if (up->wakeirq) { - retval = request_irq(up->wakeirq, serial_omap_irq, - up->port.irqflags, up->name, up); + retval = dev_pm_set_dedicated_wake_irq(up->dev, up->wakeirq); if (retval) { free_irq(up->port.irq, up); return retval; } - disable_irq(up->wakeirq); } dev_dbg(up->port.dev, "serial_omap_startup+%d\n", up->port.line); @@ -845,8 +826,7 @@ static void serial_omap_shutdown(struct uart_port *port) pm_runtime_mark_last_busy(up->dev); pm_runtime_put_autosuspend(up->dev); free_irq(up->port.irq, up); - if (up->wakeirq) - free_irq(up->wakeirq, up); + dev_pm_clear_wake_irq(up->dev); } static void serial_omap_uart_qos_work(struct work_struct *work) @@ -1139,13 +1119,6 @@ serial_omap_pm(struct uart_port *port, unsigned int state, serial_out(up, UART_EFR, efr); serial_out(up, UART_LCR, 0); - if (!device_may_wakeup(up->dev)) { - if (!state) - pm_runtime_forbid(up->dev); - else - pm_runtime_allow(up->dev); - } - pm_runtime_mark_last_busy(up->dev); pm_runtime_put_autosuspend(up->dev); } diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index 3cc109f3c9c8..d2259c663996 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -2936,7 +2936,7 @@ int fsg_common_create_lun(struct fsg_common *common, struct fsg_lun_config *cfg, if (fsg_lun_is_open(lun)) { p = "(error)"; if (pathbuf) { - p = d_path(&lun->filp->f_path, pathbuf, PATH_MAX); + p = file_path(lun->filp, pathbuf, PATH_MAX); if (IS_ERR(p)) p = "(error)"; } diff --git a/drivers/usb/gadget/function/storage_common.c b/drivers/usb/gadget/function/storage_common.c index 648f9e489b39..d62683017cf3 100644 --- a/drivers/usb/gadget/function/storage_common.c +++ b/drivers/usb/gadget/function/storage_common.c @@ -341,7 +341,7 @@ ssize_t fsg_show_file(struct fsg_lun *curlun, struct rw_semaphore *filesem, down_read(filesem); if (fsg_lun_is_open(curlun)) { /* Get the complete pathname */ - p = d_path(&curlun->filp->f_path, buf, PAGE_SIZE - 1); + p = file_path(curlun->filp, buf, PAGE_SIZE - 1); if (IS_ERR(p)) rc = PTR_ERR(p); else { diff --git a/drivers/usb/gadget/legacy/tcm_usb_gadget.c b/drivers/usb/gadget/legacy/tcm_usb_gadget.c index 6ce932f90ef8..c3c48088fced 100644 --- a/drivers/usb/gadget/legacy/tcm_usb_gadget.c +++ b/drivers/usb/gadget/legacy/tcm_usb_gadget.c @@ -20,7 +20,6 @@ #include <target/target_core_base.h> #include <target/target_core_fabric.h> #include <target/target_core_fabric_configfs.h> -#include <target/target_core_configfs.h> #include <target/configfs_macros.h> #include <asm/unaligned.h> @@ -28,8 +27,6 @@ USB_GADGET_COMPOSITE_OPTIONS(); -static const struct target_core_fabric_ops usbg_ops; - static inline struct f_uas *to_f_uas(struct usb_function *f) { return container_of(f, struct f_uas, function); @@ -1111,6 +1108,7 @@ static int usbg_submit_command(struct f_uas *fu, memcpy(cmd->cmd_buf, cmd_iu->cdb, cmd_len); cmd->tag = be16_to_cpup(&cmd_iu->tag); + cmd->se_cmd.tag = cmd->tag; if (fu->flags & USBG_USE_STREAMS) { if (cmd->tag > UASP_SS_EP_COMP_NUM_STREAMS) goto err; @@ -1244,6 +1242,7 @@ static int bot_submit_command(struct f_uas *fu, cmd->unpacked_lun = cbw->Lun; cmd->is_read = cbw->Flags & US_BULK_FLAG_IN ? 1 : 0; cmd->data_len = le32_to_cpu(cbw->DataTransferLength); + cmd->se_cmd.tag = le32_to_cpu(cmd->bot_tag); INIT_WORK(&cmd->work, bot_cmd_work); ret = queue_work(tpg->workqueue, &cmd->work); @@ -1273,23 +1272,6 @@ static char *usbg_get_fabric_name(void) return "usb_gadget"; } -static u8 usbg_get_fabric_proto_ident(struct se_portal_group *se_tpg) -{ - struct usbg_tpg *tpg = container_of(se_tpg, - struct usbg_tpg, se_tpg); - struct usbg_tport *tport = tpg->tport; - u8 proto_id; - - switch (tport->tport_proto_id) { - case SCSI_PROTOCOL_SAS: - default: - proto_id = sas_get_fabric_proto_ident(se_tpg); - break; - } - - return proto_id; -} - static char *usbg_get_fabric_wwn(struct se_portal_group *se_tpg) { struct usbg_tpg *tpg = container_of(se_tpg, @@ -1306,97 +1288,6 @@ static u16 usbg_get_tag(struct se_portal_group *se_tpg) return tpg->tport_tpgt; } -static u32 usbg_get_default_depth(struct se_portal_group *se_tpg) -{ - return 1; -} - -static u32 usbg_get_pr_transport_id( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code, - unsigned char *buf) -{ - struct usbg_tpg *tpg = container_of(se_tpg, - struct usbg_tpg, se_tpg); - struct usbg_tport *tport = tpg->tport; - int ret = 0; - - switch (tport->tport_proto_id) { - case SCSI_PROTOCOL_SAS: - default: - ret = sas_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); - break; - } - - return ret; -} - -static u32 usbg_get_pr_transport_id_len( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code) -{ - struct usbg_tpg *tpg = container_of(se_tpg, - struct usbg_tpg, se_tpg); - struct usbg_tport *tport = tpg->tport; - int ret = 0; - - switch (tport->tport_proto_id) { - case SCSI_PROTOCOL_SAS: - default: - ret = sas_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); - break; - } - - return ret; -} - -static char *usbg_parse_pr_out_transport_id( - struct se_portal_group *se_tpg, - const char *buf, - u32 *out_tid_len, - char **port_nexus_ptr) -{ - struct usbg_tpg *tpg = container_of(se_tpg, - struct usbg_tpg, se_tpg); - struct usbg_tport *tport = tpg->tport; - char *tid = NULL; - - switch (tport->tport_proto_id) { - case SCSI_PROTOCOL_SAS: - default: - tid = sas_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); - } - - return tid; -} - -static struct se_node_acl *usbg_alloc_fabric_acl(struct se_portal_group *se_tpg) -{ - struct usbg_nacl *nacl; - - nacl = kzalloc(sizeof(struct usbg_nacl), GFP_KERNEL); - if (!nacl) - return NULL; - - return &nacl->se_node_acl; -} - -static void usbg_release_fabric_acl( - struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl) -{ - struct usbg_nacl *nacl = container_of(se_nacl, - struct usbg_nacl, se_node_acl); - kfree(nacl); -} - static u32 usbg_tpg_get_inst_index(struct se_portal_group *se_tpg) { return 1; @@ -1447,18 +1338,6 @@ static void usbg_set_default_node_attrs(struct se_node_acl *nacl) return; } -static u32 usbg_get_task_tag(struct se_cmd *se_cmd) -{ - struct usbg_cmd *cmd = container_of(se_cmd, struct usbg_cmd, - se_cmd); - struct f_uas *fu = cmd->fu; - - if (fu->flags & USBG_IS_BOT) - return le32_to_cpu(cmd->bot_tag); - else - return cmd->tag; -} - static int usbg_get_cmd_state(struct se_cmd *se_cmd) { return 0; @@ -1488,50 +1367,11 @@ static const char *usbg_check_wwn(const char *name) return n; } -static struct se_node_acl *usbg_make_nodeacl( - struct se_portal_group *se_tpg, - struct config_group *group, - const char *name) -{ - struct se_node_acl *se_nacl, *se_nacl_new; - struct usbg_nacl *nacl; - u64 wwpn = 0; - u32 nexus_depth; - const char *wnn_name; - - wnn_name = usbg_check_wwn(name); - if (!wnn_name) - return ERR_PTR(-EINVAL); - se_nacl_new = usbg_alloc_fabric_acl(se_tpg); - if (!(se_nacl_new)) - return ERR_PTR(-ENOMEM); - - nexus_depth = 1; - /* - * se_nacl_new may be released by core_tpg_add_initiator_node_acl() - * when converting a NodeACL from demo mode -> explict - */ - se_nacl = core_tpg_add_initiator_node_acl(se_tpg, se_nacl_new, - name, nexus_depth); - if (IS_ERR(se_nacl)) { - usbg_release_fabric_acl(se_tpg, se_nacl_new); - return se_nacl; - } - /* - * Locate our struct usbg_nacl and set the FC Nport WWPN - */ - nacl = container_of(se_nacl, struct usbg_nacl, se_node_acl); - nacl->iport_wwpn = wwpn; - snprintf(nacl->iport_name, sizeof(nacl->iport_name), "%s", name); - return se_nacl; -} - -static void usbg_drop_nodeacl(struct se_node_acl *se_acl) +static int usbg_init_nodeacl(struct se_node_acl *se_nacl, const char *name) { - struct usbg_nacl *nacl = container_of(se_acl, - struct usbg_nacl, se_node_acl); - core_tpg_del_initiator_node_acl(se_acl->se_tpg, se_acl, 1); - kfree(nacl); + if (!usbg_check_wwn(name)) + return -EINVAL; + return 0; } struct usbg_tpg *the_only_tpg_I_currently_have; @@ -1571,8 +1411,11 @@ static struct se_portal_group *usbg_make_tpg( tpg->tport = tport; tpg->tport_tpgt = tpgt; - ret = core_tpg_register(&usbg_ops, wwn, &tpg->se_tpg, tpg, - TRANSPORT_TPG_TYPE_NORMAL); + /* + * SPC doesn't assign a protocol identifier for USB-SCSI, so we + * pretend to be SAS.. + */ + ret = core_tpg_register(wwn, &tpg->se_tpg, SCSI_PROTOCOL_SAS); if (ret < 0) { destroy_workqueue(tpg->workqueue); kfree(tpg); @@ -1866,19 +1709,12 @@ static const struct target_core_fabric_ops usbg_ops = { .module = THIS_MODULE, .name = "usb_gadget", .get_fabric_name = usbg_get_fabric_name, - .get_fabric_proto_ident = usbg_get_fabric_proto_ident, .tpg_get_wwn = usbg_get_fabric_wwn, .tpg_get_tag = usbg_get_tag, - .tpg_get_default_depth = usbg_get_default_depth, - .tpg_get_pr_transport_id = usbg_get_pr_transport_id, - .tpg_get_pr_transport_id_len = usbg_get_pr_transport_id_len, - .tpg_parse_pr_out_transport_id = usbg_parse_pr_out_transport_id, .tpg_check_demo_mode = usbg_check_true, .tpg_check_demo_mode_cache = usbg_check_false, .tpg_check_demo_mode_write_protect = usbg_check_false, .tpg_check_prod_mode_write_protect = usbg_check_false, - .tpg_alloc_fabric_acl = usbg_alloc_fabric_acl, - .tpg_release_fabric_acl = usbg_release_fabric_acl, .tpg_get_inst_index = usbg_tpg_get_inst_index, .release_cmd = usbg_release_cmd, .shutdown_session = usbg_shutdown_session, @@ -1888,7 +1724,6 @@ static const struct target_core_fabric_ops usbg_ops = { .write_pending = usbg_send_write_request, .write_pending_status = usbg_write_pending_status, .set_default_node_attributes = usbg_set_default_node_attrs, - .get_task_tag = usbg_get_task_tag, .get_cmd_state = usbg_get_cmd_state, .queue_data_in = usbg_send_read_response, .queue_status = usbg_send_status_response, @@ -1902,10 +1737,7 @@ static const struct target_core_fabric_ops usbg_ops = { .fabric_drop_tpg = usbg_drop_tpg, .fabric_post_link = usbg_port_link, .fabric_pre_unlink = usbg_port_unlink, - .fabric_make_np = NULL, - .fabric_drop_np = NULL, - .fabric_make_nodeacl = usbg_make_nodeacl, - .fabric_drop_nodeacl = usbg_drop_nodeacl, + .fabric_init_nodeacl = usbg_init_nodeacl, .tfc_wwn_attrs = usbg_wwn_attrs, .tfc_tpg_base_attrs = usbg_base_attrs, diff --git a/drivers/usb/gadget/legacy/tcm_usb_gadget.h b/drivers/usb/gadget/legacy/tcm_usb_gadget.h index 9fb3544cc80f..0b749e1aa2f1 100644 --- a/drivers/usb/gadget/legacy/tcm_usb_gadget.h +++ b/drivers/usb/gadget/legacy/tcm_usb_gadget.h @@ -24,15 +24,6 @@ enum { #define USB_G_ALT_INT_BBB 0 #define USB_G_ALT_INT_UAS 1 -struct usbg_nacl { - /* Binary World Wide unique Port Name for SAS Initiator port */ - u64 iport_wwpn; - /* ASCII formatted WWPN for Sas Initiator port */ - char iport_name[USBG_NAMELEN]; - /* Returned by usbg_make_nodeacl() */ - struct se_node_acl se_node_acl; -}; - struct tcm_usbg_nexus { struct se_session *tvn_se_sess; }; @@ -52,8 +43,6 @@ struct usbg_tpg { }; struct usbg_tport { - /* SCSI protocol the tport is providing */ - u8 tport_proto_id; /* Binary World Wide unique Port Name for SAS Target port */ u64 tport_wwpn; /* ASCII formatted WWPN for SAS Target port */ diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 017a1e8a8f6f..533eaf04f12f 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -32,3 +32,18 @@ config VHOST ---help--- This option is selected by any driver which needs to access the core of vhost. + +config VHOST_CROSS_ENDIAN_LEGACY + bool "Cross-endian support for vhost" + default n + ---help--- + This option allows vhost to support guests with a different byte + ordering from host while using legacy virtio. + + Userspace programs can control the feature using the + VHOST_SET_VRING_ENDIAN and VHOST_GET_VRING_ENDIAN ioctls. + + This is only useful on a few platforms (ppc64 and arm64). Since it + adds some overhead, it is disabled by default. + + If unsure, say "N". diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 55722feeb898..dfcc02c93648 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -43,7 +43,6 @@ #include <target/target_core_base.h> #include <target/target_core_fabric.h> #include <target/target_core_fabric_configfs.h> -#include <target/target_core_configfs.h> #include <target/configfs_macros.h> #include <linux/vhost.h> #include <linux/virtio_scsi.h> @@ -117,15 +116,6 @@ struct vhost_scsi_nexus { struct se_session *tvn_se_sess; }; -struct vhost_scsi_nacl { - /* Binary World Wide unique Port Name for Vhost Initiator port */ - u64 iport_wwpn; - /* ASCII formatted WWPN for Sas Initiator port */ - char iport_name[VHOST_SCSI_NAMELEN]; - /* Returned by vhost_scsi_make_nodeacl() */ - struct se_node_acl se_node_acl; -}; - struct vhost_scsi_tpg { /* Vhost port target portal group tag for TCM */ u16 tport_tpgt; @@ -218,7 +208,6 @@ struct vhost_scsi { int vs_events_nr; /* num of pending events, protected by vq->mutex */ }; -static struct target_core_fabric_ops vhost_scsi_ops; static struct workqueue_struct *vhost_scsi_workqueue; /* Global spinlock to protect vhost_scsi TPG list for vhost IOCTL access */ @@ -299,28 +288,6 @@ static char *vhost_scsi_get_fabric_name(void) return "vhost"; } -static u8 vhost_scsi_get_fabric_proto_ident(struct se_portal_group *se_tpg) -{ - struct vhost_scsi_tpg *tpg = container_of(se_tpg, - struct vhost_scsi_tpg, se_tpg); - struct vhost_scsi_tport *tport = tpg->tport; - - switch (tport->tport_proto_id) { - case SCSI_PROTOCOL_SAS: - return sas_get_fabric_proto_ident(se_tpg); - case SCSI_PROTOCOL_FCP: - return fc_get_fabric_proto_ident(se_tpg); - case SCSI_PROTOCOL_ISCSI: - return iscsi_get_fabric_proto_ident(se_tpg); - default: - pr_err("Unknown tport_proto_id: 0x%02x, using" - " SAS emulation\n", tport->tport_proto_id); - break; - } - - return sas_get_fabric_proto_ident(se_tpg); -} - static char *vhost_scsi_get_fabric_wwn(struct se_portal_group *se_tpg) { struct vhost_scsi_tpg *tpg = container_of(se_tpg, @@ -337,102 +304,6 @@ static u16 vhost_scsi_get_tpgt(struct se_portal_group *se_tpg) return tpg->tport_tpgt; } -static u32 vhost_scsi_get_default_depth(struct se_portal_group *se_tpg) -{ - return 1; -} - -static u32 -vhost_scsi_get_pr_transport_id(struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code, - unsigned char *buf) -{ - struct vhost_scsi_tpg *tpg = container_of(se_tpg, - struct vhost_scsi_tpg, se_tpg); - struct vhost_scsi_tport *tport = tpg->tport; - - switch (tport->tport_proto_id) { - case SCSI_PROTOCOL_SAS: - return sas_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); - case SCSI_PROTOCOL_FCP: - return fc_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); - case SCSI_PROTOCOL_ISCSI: - return iscsi_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); - default: - pr_err("Unknown tport_proto_id: 0x%02x, using" - " SAS emulation\n", tport->tport_proto_id); - break; - } - - return sas_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); -} - -static u32 -vhost_scsi_get_pr_transport_id_len(struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code) -{ - struct vhost_scsi_tpg *tpg = container_of(se_tpg, - struct vhost_scsi_tpg, se_tpg); - struct vhost_scsi_tport *tport = tpg->tport; - - switch (tport->tport_proto_id) { - case SCSI_PROTOCOL_SAS: - return sas_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); - case SCSI_PROTOCOL_FCP: - return fc_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); - case SCSI_PROTOCOL_ISCSI: - return iscsi_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); - default: - pr_err("Unknown tport_proto_id: 0x%02x, using" - " SAS emulation\n", tport->tport_proto_id); - break; - } - - return sas_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); -} - -static char * -vhost_scsi_parse_pr_out_transport_id(struct se_portal_group *se_tpg, - const char *buf, - u32 *out_tid_len, - char **port_nexus_ptr) -{ - struct vhost_scsi_tpg *tpg = container_of(se_tpg, - struct vhost_scsi_tpg, se_tpg); - struct vhost_scsi_tport *tport = tpg->tport; - - switch (tport->tport_proto_id) { - case SCSI_PROTOCOL_SAS: - return sas_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); - case SCSI_PROTOCOL_FCP: - return fc_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); - case SCSI_PROTOCOL_ISCSI: - return iscsi_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); - default: - pr_err("Unknown tport_proto_id: 0x%02x, using" - " SAS emulation\n", tport->tport_proto_id); - break; - } - - return sas_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); -} - static int vhost_scsi_check_prot_fabric_only(struct se_portal_group *se_tpg) { struct vhost_scsi_tpg *tpg = container_of(se_tpg, @@ -441,29 +312,6 @@ static int vhost_scsi_check_prot_fabric_only(struct se_portal_group *se_tpg) return tpg->tv_fabric_prot_type; } -static struct se_node_acl * -vhost_scsi_alloc_fabric_acl(struct se_portal_group *se_tpg) -{ - struct vhost_scsi_nacl *nacl; - - nacl = kzalloc(sizeof(struct vhost_scsi_nacl), GFP_KERNEL); - if (!nacl) { - pr_err("Unable to allocate struct vhost_scsi_nacl\n"); - return NULL; - } - - return &nacl->se_node_acl; -} - -static void -vhost_scsi_release_fabric_acl(struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl) -{ - struct vhost_scsi_nacl *nacl = container_of(se_nacl, - struct vhost_scsi_nacl, se_node_acl); - kfree(nacl); -} - static u32 vhost_scsi_tpg_get_inst_index(struct se_portal_group *se_tpg) { return 1; @@ -521,11 +369,6 @@ static void vhost_scsi_set_default_node_attrs(struct se_node_acl *nacl) return; } -static u32 vhost_scsi_get_task_tag(struct se_cmd *se_cmd) -{ - return 0; -} - static int vhost_scsi_get_cmd_state(struct se_cmd *se_cmd) { return 0; @@ -609,7 +452,7 @@ static void vhost_scsi_free_cmd(struct vhost_scsi_cmd *cmd) static int vhost_scsi_check_stop_free(struct se_cmd *se_cmd) { - return target_put_sess_cmd(se_cmd->se_sess, se_cmd); + return target_put_sess_cmd(se_cmd); } static void @@ -970,6 +813,7 @@ static void vhost_scsi_submission_work(struct work_struct *work) } tv_nexus = cmd->tvc_nexus; + se_cmd->tag = 0; rc = target_submit_cmd_map_sgls(se_cmd, tv_nexus->tvn_se_sess, cmd->tvc_cdb, &cmd->tvc_sense_buf[0], cmd->tvc_lun, cmd->tvc_exp_data_len, @@ -1824,50 +1668,6 @@ static void vhost_scsi_port_unlink(struct se_portal_group *se_tpg, mutex_unlock(&vhost_scsi_mutex); } -static struct se_node_acl * -vhost_scsi_make_nodeacl(struct se_portal_group *se_tpg, - struct config_group *group, - const char *name) -{ - struct se_node_acl *se_nacl, *se_nacl_new; - struct vhost_scsi_nacl *nacl; - u64 wwpn = 0; - u32 nexus_depth; - - /* vhost_scsi_parse_wwn(name, &wwpn, 1) < 0) - return ERR_PTR(-EINVAL); */ - se_nacl_new = vhost_scsi_alloc_fabric_acl(se_tpg); - if (!se_nacl_new) - return ERR_PTR(-ENOMEM); - - nexus_depth = 1; - /* - * se_nacl_new may be released by core_tpg_add_initiator_node_acl() - * when converting a NodeACL from demo mode -> explict - */ - se_nacl = core_tpg_add_initiator_node_acl(se_tpg, se_nacl_new, - name, nexus_depth); - if (IS_ERR(se_nacl)) { - vhost_scsi_release_fabric_acl(se_tpg, se_nacl_new); - return se_nacl; - } - /* - * Locate our struct vhost_scsi_nacl and set the FC Nport WWPN - */ - nacl = container_of(se_nacl, struct vhost_scsi_nacl, se_node_acl); - nacl->iport_wwpn = wwpn; - - return se_nacl; -} - -static void vhost_scsi_drop_nodeacl(struct se_node_acl *se_acl) -{ - struct vhost_scsi_nacl *nacl = container_of(se_acl, - struct vhost_scsi_nacl, se_node_acl); - core_tpg_del_initiator_node_acl(se_acl->se_tpg, se_acl, 1); - kfree(nacl); -} - static void vhost_scsi_free_cmd_map_res(struct vhost_scsi_nexus *nexus, struct se_session *se_sess) { @@ -2202,8 +2002,7 @@ vhost_scsi_make_tpg(struct se_wwn *wwn, tpg->tport = tport; tpg->tport_tpgt = tpgt; - ret = core_tpg_register(&vhost_scsi_ops, wwn, - &tpg->se_tpg, tpg, TRANSPORT_TPG_TYPE_NORMAL); + ret = core_tpg_register(wwn, &tpg->se_tpg, tport->tport_proto_id); if (ret < 0) { kfree(tpg); return NULL; @@ -2327,20 +2126,13 @@ static struct target_core_fabric_ops vhost_scsi_ops = { .module = THIS_MODULE, .name = "vhost", .get_fabric_name = vhost_scsi_get_fabric_name, - .get_fabric_proto_ident = vhost_scsi_get_fabric_proto_ident, .tpg_get_wwn = vhost_scsi_get_fabric_wwn, .tpg_get_tag = vhost_scsi_get_tpgt, - .tpg_get_default_depth = vhost_scsi_get_default_depth, - .tpg_get_pr_transport_id = vhost_scsi_get_pr_transport_id, - .tpg_get_pr_transport_id_len = vhost_scsi_get_pr_transport_id_len, - .tpg_parse_pr_out_transport_id = vhost_scsi_parse_pr_out_transport_id, .tpg_check_demo_mode = vhost_scsi_check_true, .tpg_check_demo_mode_cache = vhost_scsi_check_true, .tpg_check_demo_mode_write_protect = vhost_scsi_check_false, .tpg_check_prod_mode_write_protect = vhost_scsi_check_false, .tpg_check_prot_fabric_only = vhost_scsi_check_prot_fabric_only, - .tpg_alloc_fabric_acl = vhost_scsi_alloc_fabric_acl, - .tpg_release_fabric_acl = vhost_scsi_release_fabric_acl, .tpg_get_inst_index = vhost_scsi_tpg_get_inst_index, .release_cmd = vhost_scsi_release_cmd, .check_stop_free = vhost_scsi_check_stop_free, @@ -2351,7 +2143,6 @@ static struct target_core_fabric_ops vhost_scsi_ops = { .write_pending = vhost_scsi_write_pending, .write_pending_status = vhost_scsi_write_pending_status, .set_default_node_attributes = vhost_scsi_set_default_node_attrs, - .get_task_tag = vhost_scsi_get_task_tag, .get_cmd_state = vhost_scsi_get_cmd_state, .queue_data_in = vhost_scsi_queue_data_in, .queue_status = vhost_scsi_queue_status, @@ -2366,10 +2157,6 @@ static struct target_core_fabric_ops vhost_scsi_ops = { .fabric_drop_tpg = vhost_scsi_drop_tpg, .fabric_post_link = vhost_scsi_port_link, .fabric_pre_unlink = vhost_scsi_port_unlink, - .fabric_make_np = NULL, - .fabric_drop_np = NULL, - .fabric_make_nodeacl = vhost_scsi_make_nodeacl, - .fabric_drop_nodeacl = vhost_scsi_drop_nodeacl, .tfc_wwn_attrs = vhost_scsi_wwn_attrs, .tfc_tpg_base_attrs = vhost_scsi_tpg_attrs, diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 2ee28266fd07..9e8e004bb1c3 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -36,6 +36,77 @@ enum { #define vhost_used_event(vq) ((__virtio16 __user *)&vq->avail->ring[vq->num]) #define vhost_avail_event(vq) ((__virtio16 __user *)&vq->used->ring[vq->num]) +#ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY +static void vhost_vq_reset_user_be(struct vhost_virtqueue *vq) +{ + vq->user_be = !virtio_legacy_is_little_endian(); +} + +static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp) +{ + struct vhost_vring_state s; + + if (vq->private_data) + return -EBUSY; + + if (copy_from_user(&s, argp, sizeof(s))) + return -EFAULT; + + if (s.num != VHOST_VRING_LITTLE_ENDIAN && + s.num != VHOST_VRING_BIG_ENDIAN) + return -EINVAL; + + vq->user_be = s.num; + + return 0; +} + +static long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx, + int __user *argp) +{ + struct vhost_vring_state s = { + .index = idx, + .num = vq->user_be + }; + + if (copy_to_user(argp, &s, sizeof(s))) + return -EFAULT; + + return 0; +} + +static void vhost_init_is_le(struct vhost_virtqueue *vq) +{ + /* Note for legacy virtio: user_be is initialized at reset time + * according to the host endianness. If userspace does not set an + * explicit endianness, the default behavior is native endian, as + * expected by legacy virtio. + */ + vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be; +} +#else +static void vhost_vq_reset_user_be(struct vhost_virtqueue *vq) +{ +} + +static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp) +{ + return -ENOIOCTLCMD; +} + +static long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx, + int __user *argp) +{ + return -ENOIOCTLCMD; +} + +static void vhost_init_is_le(struct vhost_virtqueue *vq) +{ + if (vhost_has_feature(vq, VIRTIO_F_VERSION_1)) + vq->is_le = true; +} +#endif /* CONFIG_VHOST_CROSS_ENDIAN_LEGACY */ + static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, poll_table *pt) { @@ -199,6 +270,8 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->call = NULL; vq->log_ctx = NULL; vq->memory = NULL; + vq->is_le = virtio_legacy_is_little_endian(); + vhost_vq_reset_user_be(vq); } static int vhost_worker(void *data) @@ -806,6 +879,12 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) } else filep = eventfp; break; + case VHOST_SET_VRING_ENDIAN: + r = vhost_set_vring_endian(vq, argp); + break; + case VHOST_GET_VRING_ENDIAN: + r = vhost_get_vring_endian(vq, idx, argp); + break; default: r = -ENOIOCTLCMD; } @@ -1044,8 +1123,12 @@ int vhost_init_used(struct vhost_virtqueue *vq) { __virtio16 last_used_idx; int r; - if (!vq->private_data) + if (!vq->private_data) { + vq->is_le = virtio_legacy_is_little_endian(); return 0; + } + + vhost_init_is_le(vq); r = vhost_update_used_flags(vq); if (r) diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 8c1c792900ba..ce6f6da4b09f 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -106,6 +106,14 @@ struct vhost_virtqueue { /* Log write descriptors */ void __user *log_base; struct vhost_log *log; + + /* Ring endianness. Defaults to legacy native endianness. + * Set to true when starting a modern virtio device. */ + bool is_le; +#ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY + /* Ring endianness requested by userspace for cross-endian support. */ + bool user_be; +#endif }; struct vhost_dev { @@ -173,34 +181,39 @@ static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit) return vq->acked_features & (1ULL << bit); } +static inline bool vhost_is_little_endian(struct vhost_virtqueue *vq) +{ + return vq->is_le; +} + /* Memory accessors */ static inline u16 vhost16_to_cpu(struct vhost_virtqueue *vq, __virtio16 val) { - return __virtio16_to_cpu(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val); + return __virtio16_to_cpu(vhost_is_little_endian(vq), val); } static inline __virtio16 cpu_to_vhost16(struct vhost_virtqueue *vq, u16 val) { - return __cpu_to_virtio16(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val); + return __cpu_to_virtio16(vhost_is_little_endian(vq), val); } static inline u32 vhost32_to_cpu(struct vhost_virtqueue *vq, __virtio32 val) { - return __virtio32_to_cpu(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val); + return __virtio32_to_cpu(vhost_is_little_endian(vq), val); } static inline __virtio32 cpu_to_vhost32(struct vhost_virtqueue *vq, u32 val) { - return __cpu_to_virtio32(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val); + return __cpu_to_virtio32(vhost_is_little_endian(vq), val); } static inline u64 vhost64_to_cpu(struct vhost_virtqueue *vq, __virtio64 val) { - return __virtio64_to_cpu(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val); + return __virtio64_to_cpu(vhost_is_little_endian(vq), val); } static inline __virtio64 cpu_to_vhost64(struct vhost_virtqueue *vq, u64 val) { - return __cpu_to_virtio64(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val); + return __cpu_to_virtio64(vhost_is_little_endian(vq), val); } #endif diff --git a/drivers/video/fbdev/omap2/dss/dss.c b/drivers/video/fbdev/omap2/dss/dss.c index 612b093831d5..9200a8668b49 100644 --- a/drivers/video/fbdev/omap2/dss/dss.c +++ b/drivers/video/fbdev/omap2/dss/dss.c @@ -1225,6 +1225,15 @@ static int dss_add_child_component(struct device *dev, void *data) { struct component_match **match = data; + /* + * HACK + * We don't have a working driver for rfbi, so skip it here always. + * Otherwise dss will never get probed successfully, as it will wait + * for rfbi to get probed. + */ + if (strstr(dev_name(dev), "rfbi")) + return 0; + component_match_add(dev->parent, match, dss_component_compare, dev); return 0; diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c index 86621fabbb8b..735355b0e023 100644 --- a/drivers/video/fbdev/stifb.c +++ b/drivers/video/fbdev/stifb.c @@ -121,6 +121,7 @@ static int __initdata stifb_bpp_pref[MAX_STI_ROMS]; #define REG_3 0x0004a0 #define REG_4 0x000600 #define REG_6 0x000800 +#define REG_7 0x000804 #define REG_8 0x000820 #define REG_9 0x000a04 #define REG_10 0x018000 @@ -135,6 +136,8 @@ static int __initdata stifb_bpp_pref[MAX_STI_ROMS]; #define REG_21 0x200218 #define REG_22 0x0005a0 #define REG_23 0x0005c0 +#define REG_24 0x000808 +#define REG_25 0x000b00 #define REG_26 0x200118 #define REG_27 0x200308 #define REG_32 0x21003c @@ -429,6 +432,9 @@ ARTIST_ENABLE_DISABLE_DISPLAY(struct stifb_info *fb, int enable) #define SET_LENXY_START_RECFILL(fb, lenxy) \ WRITE_WORD(lenxy, fb, REG_9) +#define SETUP_COPYAREA(fb) \ + WRITE_BYTE(0, fb, REG_16b1) + static void HYPER_ENABLE_DISABLE_DISPLAY(struct stifb_info *fb, int enable) { @@ -1004,6 +1010,36 @@ stifb_blank(int blank_mode, struct fb_info *info) return 0; } +static void +stifb_copyarea(struct fb_info *info, const struct fb_copyarea *area) +{ + struct stifb_info *fb = container_of(info, struct stifb_info, info); + + SETUP_COPYAREA(fb); + + SETUP_HW(fb); + if (fb->info.var.bits_per_pixel == 32) { + WRITE_WORD(0xBBA0A000, fb, REG_10); + + NGLE_REALLY_SET_IMAGE_PLANEMASK(fb, 0xffffffff); + } else { + WRITE_WORD(fb->id == S9000_ID_HCRX ? 0x13a02000 : 0x13a01000, fb, REG_10); + + NGLE_REALLY_SET_IMAGE_PLANEMASK(fb, 0xff); + } + + NGLE_QUICK_SET_IMAGE_BITMAP_OP(fb, + IBOvals(RopSrc, MaskAddrOffset(0), + BitmapExtent08, StaticReg(1), + DataDynamic, MaskOtc, BGx(0), FGx(0))); + + WRITE_WORD(((area->sx << 16) | area->sy), fb, REG_24); + WRITE_WORD(((area->width << 16) | area->height), fb, REG_7); + WRITE_WORD(((area->dx << 16) | area->dy), fb, REG_25); + + SETUP_FB(fb); +} + static void __init stifb_init_display(struct stifb_info *fb) { @@ -1069,7 +1105,7 @@ static struct fb_ops stifb_ops = { .fb_setcolreg = stifb_setcolreg, .fb_blank = stifb_blank, .fb_fillrect = cfb_fillrect, - .fb_copyarea = cfb_copyarea, + .fb_copyarea = stifb_copyarea, .fb_imageblit = cfb_imageblit, }; @@ -1258,7 +1294,7 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref) info->fbops = &stifb_ops; info->screen_base = ioremap_nocache(REGION_BASE(fb,1), fix->smem_len); info->screen_size = fix->smem_len; - info->flags = FBINFO_DEFAULT; + info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_COPYAREA; info->pseudo_palette = &fb->pseudo_palette; /* This has to be done !!! */ diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 5447b8186332..78f804af6c20 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -507,10 +507,6 @@ static int virtio_pci_probe(struct pci_dev *pci_dev, if (rc) goto err_enable_device; - rc = pci_request_regions(pci_dev, "virtio-pci"); - if (rc) - goto err_request_regions; - if (force_legacy) { rc = virtio_pci_legacy_probe(vp_dev); /* Also try modern mode if we can't map BAR0 (no IO space). */ @@ -540,8 +536,6 @@ err_register: else virtio_pci_modern_remove(vp_dev); err_probe: - pci_release_regions(pci_dev); -err_request_regions: pci_disable_device(pci_dev); err_enable_device: kfree(vp_dev); @@ -559,7 +553,6 @@ static void virtio_pci_remove(struct pci_dev *pci_dev) else virtio_pci_modern_remove(vp_dev); - pci_release_regions(pci_dev); pci_disable_device(pci_dev); } diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index 28ee4e56badf..b976d968e793 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -75,6 +75,8 @@ struct virtio_pci_device { /* Multiply queue_notify_off by this value. (non-legacy mode). */ u32 notify_offset_multiplier; + int modern_bars; + /* Legacy only field */ /* the IO mapping for the PCI config space */ void __iomem *ioaddr; diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index 256a5278a515..48bc9797e530 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -215,6 +215,7 @@ static const struct virtio_config_ops virtio_pci_config_ops = { int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev) { struct pci_dev *pci_dev = vp_dev->pci_dev; + int rc; /* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */ if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f) @@ -226,9 +227,14 @@ int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev) return -ENODEV; } + rc = pci_request_region(pci_dev, 0, "virtio-pci-legacy"); + if (rc) + return rc; + + rc = -ENOMEM; vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0); if (!vp_dev->ioaddr) - return -ENOMEM; + goto err_iomap; vp_dev->isr = vp_dev->ioaddr + VIRTIO_PCI_ISR; @@ -246,6 +252,10 @@ int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev) vp_dev->del_vq = del_vq; return 0; + +err_iomap: + pci_release_region(pci_dev, 0); + return rc; } void virtio_pci_legacy_remove(struct virtio_pci_device *vp_dev) @@ -253,4 +263,5 @@ void virtio_pci_legacy_remove(struct virtio_pci_device *vp_dev) struct pci_dev *pci_dev = vp_dev->pci_dev; pci_iounmap(pci_dev, vp_dev->ioaddr); + pci_release_region(pci_dev, 0); } diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index e88e0997a889..8e5cf194cc0b 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -499,7 +499,7 @@ static const struct virtio_config_ops virtio_pci_config_ops = { * Returns offset of the capability, or 0. */ static inline int virtio_pci_find_capability(struct pci_dev *dev, u8 cfg_type, - u32 ioresource_types) + u32 ioresource_types, int *bars) { int pos; @@ -520,8 +520,10 @@ static inline int virtio_pci_find_capability(struct pci_dev *dev, u8 cfg_type, if (type == cfg_type) { if (pci_resource_len(dev, bar) && - pci_resource_flags(dev, bar) & ioresource_types) + pci_resource_flags(dev, bar) & ioresource_types) { + *bars |= (1 << bar); return pos; + } } } return 0; @@ -617,7 +619,8 @@ int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev) /* check for a common config: if not, use legacy mode (bar 0). */ common = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_COMMON_CFG, - IORESOURCE_IO | IORESOURCE_MEM); + IORESOURCE_IO | IORESOURCE_MEM, + &vp_dev->modern_bars); if (!common) { dev_info(&pci_dev->dev, "virtio_pci: leaving for legacy driver\n"); @@ -626,9 +629,11 @@ int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev) /* If common is there, these should be too... */ isr = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_ISR_CFG, - IORESOURCE_IO | IORESOURCE_MEM); + IORESOURCE_IO | IORESOURCE_MEM, + &vp_dev->modern_bars); notify = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_NOTIFY_CFG, - IORESOURCE_IO | IORESOURCE_MEM); + IORESOURCE_IO | IORESOURCE_MEM, + &vp_dev->modern_bars); if (!isr || !notify) { dev_err(&pci_dev->dev, "virtio_pci: missing capabilities %i/%i/%i\n", @@ -640,7 +645,13 @@ int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev) * device-specific configuration. */ device = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_DEVICE_CFG, - IORESOURCE_IO | IORESOURCE_MEM); + IORESOURCE_IO | IORESOURCE_MEM, + &vp_dev->modern_bars); + + err = pci_request_selected_regions(pci_dev, vp_dev->modern_bars, + "virtio-pci-modern"); + if (err) + return err; err = -EINVAL; vp_dev->common = map_capability(pci_dev, common, @@ -727,4 +738,5 @@ void virtio_pci_modern_remove(struct virtio_pci_device *vp_dev) pci_iounmap(pci_dev, vp_dev->notify_base); pci_iounmap(pci_dev, vp_dev->isr); pci_iounmap(pci_dev, vp_dev->common); + pci_release_selected_regions(pci_dev, vp_dev->modern_bars); } diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index 39223c3e99ad..9eeefd7cad41 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -53,7 +53,6 @@ #include <target/target_core_base.h> #include <target/target_core_fabric.h> -#include <target/target_core_configfs.h> #include <target/target_core_fabric_configfs.h> #include <asm/hypervisor.h> @@ -201,8 +200,6 @@ static LIST_HEAD(scsiback_free_pages); static DEFINE_MUTEX(scsiback_mutex); static LIST_HEAD(scsiback_list); -static const struct target_core_fabric_ops scsiback_ops; - static void scsiback_get(struct vscsibk_info *info) { atomic_inc(&info->nr_unreplied_reqs); @@ -397,6 +394,7 @@ static void scsiback_cmd_exec(struct vscsibk_pend *pending_req) memset(se_cmd, 0, sizeof(*se_cmd)); scsiback_get(pending_req->info); + se_cmd->tag = pending_req->rqid; rc = target_submit_cmd_map_sgls(se_cmd, sess, pending_req->cmnd, pending_req->sense_buffer, pending_req->v2p->lun, pending_req->data_len, 0, @@ -863,7 +861,8 @@ static int scsiback_add_translation_entry(struct vscsibk_info *info, struct list_head *head = &(info->v2p_entry_lists); unsigned long flags; char *lunp; - unsigned int lun; + unsigned long long unpacked_lun; + struct se_lun *se_lun; struct scsiback_tpg *tpg_entry, *tpg = NULL; char *error = "doesn't exist"; @@ -874,24 +873,27 @@ static int scsiback_add_translation_entry(struct vscsibk_info *info, } *lunp = 0; lunp++; - if (kstrtouint(lunp, 10, &lun) || lun >= TRANSPORT_MAX_LUNS_PER_TPG) { + err = kstrtoull(lunp, 10, &unpacked_lun); + if (err < 0) { pr_err("lun number not valid: %s\n", lunp); - return -EINVAL; + return err; } mutex_lock(&scsiback_mutex); list_for_each_entry(tpg_entry, &scsiback_list, tv_tpg_list) { if (!strcmp(phy, tpg_entry->tport->tport_name) || !strcmp(phy, tpg_entry->param_alias)) { - spin_lock(&tpg_entry->se_tpg.tpg_lun_lock); - if (tpg_entry->se_tpg.tpg_lun_list[lun]->lun_status == - TRANSPORT_LUN_STATUS_ACTIVE) { - if (!tpg_entry->tpg_nexus) - error = "nexus undefined"; - else - tpg = tpg_entry; + mutex_lock(&tpg_entry->se_tpg.tpg_lun_mutex); + hlist_for_each_entry(se_lun, &tpg_entry->se_tpg.tpg_lun_hlist, link) { + if (se_lun->unpacked_lun == unpacked_lun) { + if (!tpg_entry->tpg_nexus) + error = "nexus undefined"; + else + tpg = tpg_entry; + break; + } } - spin_unlock(&tpg_entry->se_tpg.tpg_lun_lock); + mutex_unlock(&tpg_entry->se_tpg.tpg_lun_mutex); break; } } @@ -903,7 +905,7 @@ static int scsiback_add_translation_entry(struct vscsibk_info *info, mutex_unlock(&scsiback_mutex); if (!tpg) { - pr_err("%s:%d %s\n", phy, lun, error); + pr_err("%s:%llu %s\n", phy, unpacked_lun, error); return -ENODEV; } @@ -931,7 +933,7 @@ static int scsiback_add_translation_entry(struct vscsibk_info *info, kref_init(&new->kref); new->v = *v; new->tpg = tpg; - new->lun = lun; + new->lun = unpacked_lun; list_add_tail(&new->l, head); out: @@ -1251,28 +1253,6 @@ static char *scsiback_dump_proto_id(struct scsiback_tport *tport) return "Unknown"; } -static u8 scsiback_get_fabric_proto_ident(struct se_portal_group *se_tpg) -{ - struct scsiback_tpg *tpg = container_of(se_tpg, - struct scsiback_tpg, se_tpg); - struct scsiback_tport *tport = tpg->tport; - - switch (tport->tport_proto_id) { - case SCSI_PROTOCOL_SAS: - return sas_get_fabric_proto_ident(se_tpg); - case SCSI_PROTOCOL_FCP: - return fc_get_fabric_proto_ident(se_tpg); - case SCSI_PROTOCOL_ISCSI: - return iscsi_get_fabric_proto_ident(se_tpg); - default: - pr_err("Unknown tport_proto_id: 0x%02x, using SAS emulation\n", - tport->tport_proto_id); - break; - } - - return sas_get_fabric_proto_ident(se_tpg); -} - static char *scsiback_get_fabric_wwn(struct se_portal_group *se_tpg) { struct scsiback_tpg *tpg = container_of(se_tpg, @@ -1289,102 +1269,6 @@ static u16 scsiback_get_tag(struct se_portal_group *se_tpg) return tpg->tport_tpgt; } -static u32 scsiback_get_default_depth(struct se_portal_group *se_tpg) -{ - return 1; -} - -static u32 -scsiback_get_pr_transport_id(struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code, - unsigned char *buf) -{ - struct scsiback_tpg *tpg = container_of(se_tpg, - struct scsiback_tpg, se_tpg); - struct scsiback_tport *tport = tpg->tport; - - switch (tport->tport_proto_id) { - case SCSI_PROTOCOL_SAS: - return sas_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); - case SCSI_PROTOCOL_FCP: - return fc_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); - case SCSI_PROTOCOL_ISCSI: - return iscsi_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); - default: - pr_err("Unknown tport_proto_id: 0x%02x, using SAS emulation\n", - tport->tport_proto_id); - break; - } - - return sas_get_pr_transport_id(se_tpg, se_nacl, pr_reg, - format_code, buf); -} - -static u32 -scsiback_get_pr_transport_id_len(struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code) -{ - struct scsiback_tpg *tpg = container_of(se_tpg, - struct scsiback_tpg, se_tpg); - struct scsiback_tport *tport = tpg->tport; - - switch (tport->tport_proto_id) { - case SCSI_PROTOCOL_SAS: - return sas_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); - case SCSI_PROTOCOL_FCP: - return fc_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); - case SCSI_PROTOCOL_ISCSI: - return iscsi_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); - default: - pr_err("Unknown tport_proto_id: 0x%02x, using SAS emulation\n", - tport->tport_proto_id); - break; - } - - return sas_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg, - format_code); -} - -static char * -scsiback_parse_pr_out_transport_id(struct se_portal_group *se_tpg, - const char *buf, - u32 *out_tid_len, - char **port_nexus_ptr) -{ - struct scsiback_tpg *tpg = container_of(se_tpg, - struct scsiback_tpg, se_tpg); - struct scsiback_tport *tport = tpg->tport; - - switch (tport->tport_proto_id) { - case SCSI_PROTOCOL_SAS: - return sas_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); - case SCSI_PROTOCOL_FCP: - return fc_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); - case SCSI_PROTOCOL_ISCSI: - return iscsi_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); - default: - pr_err("Unknown tport_proto_id: 0x%02x, using SAS emulation\n", - tport->tport_proto_id); - break; - } - - return sas_parse_pr_out_transport_id(se_tpg, buf, out_tid_len, - port_nexus_ptr); -} - static struct se_wwn * scsiback_make_tport(struct target_fabric_configfs *tf, struct config_group *group, @@ -1451,19 +1335,6 @@ static void scsiback_drop_tport(struct se_wwn *wwn) kfree(tport); } -static struct se_node_acl * -scsiback_alloc_fabric_acl(struct se_portal_group *se_tpg) -{ - return kzalloc(sizeof(struct se_node_acl), GFP_KERNEL); -} - -static void -scsiback_release_fabric_acl(struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl) -{ - kfree(se_nacl); -} - static u32 scsiback_tpg_get_inst_index(struct se_portal_group *se_tpg) { return 1; @@ -1522,14 +1393,6 @@ static void scsiback_set_default_node_attrs(struct se_node_acl *nacl) { } -static u32 scsiback_get_task_tag(struct se_cmd *se_cmd) -{ - struct vscsibk_pend *pending_req = container_of(se_cmd, - struct vscsibk_pend, se_cmd); - - return pending_req->rqid; -} - static int scsiback_get_cmd_state(struct se_cmd *se_cmd) { return 0; @@ -1898,8 +1761,7 @@ scsiback_make_tpg(struct se_wwn *wwn, tpg->tport = tport; tpg->tport_tpgt = tpgt; - ret = core_tpg_register(&scsiback_ops, wwn, - &tpg->se_tpg, tpg, TRANSPORT_TPG_TYPE_NORMAL); + ret = core_tpg_register(wwn, &tpg->se_tpg, tport->tport_proto_id); if (ret < 0) { kfree(tpg); return NULL; @@ -1944,23 +1806,15 @@ static const struct target_core_fabric_ops scsiback_ops = { .module = THIS_MODULE, .name = "xen-pvscsi", .get_fabric_name = scsiback_get_fabric_name, - .get_fabric_proto_ident = scsiback_get_fabric_proto_ident, .tpg_get_wwn = scsiback_get_fabric_wwn, .tpg_get_tag = scsiback_get_tag, - .tpg_get_default_depth = scsiback_get_default_depth, - .tpg_get_pr_transport_id = scsiback_get_pr_transport_id, - .tpg_get_pr_transport_id_len = scsiback_get_pr_transport_id_len, - .tpg_parse_pr_out_transport_id = scsiback_parse_pr_out_transport_id, .tpg_check_demo_mode = scsiback_check_true, .tpg_check_demo_mode_cache = scsiback_check_true, .tpg_check_demo_mode_write_protect = scsiback_check_false, .tpg_check_prod_mode_write_protect = scsiback_check_false, - .tpg_alloc_fabric_acl = scsiback_alloc_fabric_acl, - .tpg_release_fabric_acl = scsiback_release_fabric_acl, .tpg_get_inst_index = scsiback_tpg_get_inst_index, .check_stop_free = scsiback_check_stop_free, .release_cmd = scsiback_release_cmd, - .put_session = NULL, .shutdown_session = scsiback_shutdown_session, .close_session = scsiback_close_session, .sess_get_index = scsiback_sess_get_index, @@ -1968,7 +1822,6 @@ static const struct target_core_fabric_ops scsiback_ops = { .write_pending = scsiback_write_pending, .write_pending_status = scsiback_write_pending_status, .set_default_node_attributes = scsiback_set_default_node_attrs, - .get_task_tag = scsiback_get_task_tag, .get_cmd_state = scsiback_get_cmd_state, .queue_data_in = scsiback_queue_data_in, .queue_status = scsiback_queue_status, @@ -1983,12 +1836,6 @@ static const struct target_core_fabric_ops scsiback_ops = { .fabric_drop_tpg = scsiback_drop_tpg, .fabric_post_link = scsiback_port_link, .fabric_pre_unlink = scsiback_port_unlink, - .fabric_make_np = NULL, - .fabric_drop_np = NULL, -#if 0 - .fabric_make_nodeacl = scsiback_make_nodeacl, - .fabric_drop_nodeacl = scsiback_drop_nodeacl, -#endif .tfc_wwn_attrs = scsiback_wwn_attrs, .tfc_tpg_base_attrs = scsiback_tpg_attrs, diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 510040b04c96..b1dc51888048 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -540,8 +540,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb, unlock_new_inode(inode); return inode; error: - unlock_new_inode(inode); - iput(inode); + iget_failed(inode); return ERR_PTR(retval); } diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 09e4433717b8..e8aa57dc8d6d 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -149,8 +149,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb, unlock_new_inode(inode); return inode; error: - unlock_new_inode(inode); - iput(inode); + iget_failed(inode); return ERR_PTR(retval); } diff --git a/fs/affs/affs.h b/fs/affs/affs.h index cffe8370fb44..c69a87eaf57d 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -64,7 +64,7 @@ struct affs_inode_info { /* short cut to get to the affs specific inode data */ static inline struct affs_inode_info *AFFS_I(struct inode *inode) { - return list_entry(inode, struct affs_inode_info, vfs_inode); + return container_of(inode, struct affs_inode_info, vfs_inode); } /* diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 5b700ef1e59d..c37149b929be 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -238,11 +238,6 @@ static inline u64 autofs4_get_ino(struct autofs_sb_info *sbi) return d_inode(sbi->sb->s_root)->i_ino; } -static inline int simple_positive(struct dentry *dentry) -{ - return d_really_is_positive(dentry) && !d_unhashed(dentry); -} - static inline void __autofs4_add_expiring(struct dentry *dentry) { struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); diff --git a/fs/befs/befs.h b/fs/befs/befs.h index 1fead8d56a98..35d19e8731e3 100644 --- a/fs/befs/befs.h +++ b/fs/befs/befs.h @@ -112,7 +112,7 @@ BEFS_SB(const struct super_block *super) static inline struct befs_inode_info * BEFS_I(const struct inode *inode) { - return list_entry(inode, struct befs_inode_info, vfs_inode); + return container_of(inode, struct befs_inode_info, vfs_inode); } static inline befs_blocknr_t diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index cd46e4158830..6b659967898e 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1530,7 +1530,7 @@ static int fill_files_note(struct memelfnote *note) file = vma->vm_file; if (!file) continue; - filename = d_path(&file->f_path, name_curpos, remaining); + filename = file_path(file, name_curpos, remaining); if (IS_ERR(filename)) { if (PTR_ERR(filename) == -ENAMETOOLONG) { vfree(data); @@ -1540,7 +1540,7 @@ static int fill_files_note(struct memelfnote *note) continue; } - /* d_path() fills at the end, move name down */ + /* file_path() fills at the end, move name down */ /* n = strlen(filename) + 1: */ n = (name_curpos + remaining) - filename; remaining = filename - name_curpos; diff --git a/fs/block_dev.c b/fs/block_dev.c index 4fe10f93db8a..198243717da5 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -152,6 +152,9 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; + if (IS_DAX(inode)) + return dax_do_io(iocb, inode, iter, offset, blkdev_get_block, + NULL, DIO_SKIP_DIO_COUNT); return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, offset, blkdev_get_block, NULL, NULL, DIO_SKIP_DIO_COUNT); @@ -443,6 +446,12 @@ long bdev_direct_access(struct block_device *bdev, sector_t sector, long avail; const struct block_device_operations *ops = bdev->bd_disk->fops; + /* + * The device driver is allowed to sleep, in order to make the + * memory directly accessible. + */ + might_sleep(); + if (size < 0) return size; if (!ops->direct_access) @@ -1170,6 +1179,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_disk = disk; bdev->bd_queue = disk->queue; bdev->bd_contains = bdev; + bdev->bd_inode->i_flags = disk->fops->direct_access ? S_DAX : 0; if (!partno) { ret = -ENXIO; bdev->bd_part = disk_get_part(disk, partno); diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 0ef5cc13fae2..81220b2203c6 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -44,6 +44,8 @@ #define BTRFS_INODE_IN_DELALLOC_LIST 9 #define BTRFS_INODE_READDIO_NEED_LOCK 10 #define BTRFS_INODE_HAS_PROPS 11 +/* DIO is ready to submit */ +#define BTRFS_INODE_DIO_READY 12 /* * The following 3 bits are meant only for the btree inode. * When any of them is set, it means an error happened while writing an diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 80a9aefb0c46..aac314e14188 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1778,6 +1778,7 @@ struct btrfs_fs_info { spinlock_t unused_bgs_lock; struct list_head unused_bgs; struct mutex unused_bg_unpin_mutex; + struct mutex delete_unused_bgs_mutex; /* For btrfs to record security options */ struct security_mnt_opts security_opts; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3f43bfea3684..a9aadb2ad525 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1751,6 +1751,7 @@ static int cleaner_kthread(void *arg) { struct btrfs_root *root = arg; int again; + struct btrfs_trans_handle *trans; do { again = 0; @@ -1772,7 +1773,6 @@ static int cleaner_kthread(void *arg) } btrfs_run_delayed_iputs(root); - btrfs_delete_unused_bgs(root->fs_info); again = btrfs_clean_one_deleted_snapshot(root); mutex_unlock(&root->fs_info->cleaner_mutex); @@ -1781,6 +1781,16 @@ static int cleaner_kthread(void *arg) * needn't do anything special here. */ btrfs_run_defrag_inodes(root->fs_info); + + /* + * Acquires fs_info->delete_unused_bgs_mutex to avoid racing + * with relocation (btrfs_relocate_chunk) and relocation + * acquires fs_info->cleaner_mutex (btrfs_relocate_block_group) + * after acquiring fs_info->delete_unused_bgs_mutex. So we + * can't hold, nor need to, fs_info->cleaner_mutex when deleting + * unused block groups. + */ + btrfs_delete_unused_bgs(root->fs_info); sleep: if (!try_to_freeze() && !again) { set_current_state(TASK_INTERRUPTIBLE); @@ -1789,6 +1799,34 @@ sleep: __set_current_state(TASK_RUNNING); } } while (!kthread_should_stop()); + + /* + * Transaction kthread is stopped before us and wakes us up. + * However we might have started a new transaction and COWed some + * tree blocks when deleting unused block groups for example. So + * make sure we commit the transaction we started to have a clean + * shutdown when evicting the btree inode - if it has dirty pages + * when we do the final iput() on it, eviction will trigger a + * writeback for it which will fail with null pointer dereferences + * since work queues and other resources were already released and + * destroyed by the time the iput/eviction/writeback is made. + */ + trans = btrfs_attach_transaction(root); + if (IS_ERR(trans)) { + if (PTR_ERR(trans) != -ENOENT) + btrfs_err(root->fs_info, + "cleaner transaction attach returned %ld", + PTR_ERR(trans)); + } else { + int ret; + + ret = btrfs_commit_transaction(trans, root); + if (ret) + btrfs_err(root->fs_info, + "cleaner open transaction commit returned %d", + ret); + } + return 0; } @@ -2492,6 +2530,7 @@ int open_ctree(struct super_block *sb, spin_lock_init(&fs_info->unused_bgs_lock); rwlock_init(&fs_info->tree_mod_log_lock); mutex_init(&fs_info->unused_bg_unpin_mutex); + mutex_init(&fs_info->delete_unused_bgs_mutex); mutex_init(&fs_info->reloc_mutex); mutex_init(&fs_info->delalloc_root_mutex); seqlock_init(&fs_info->profiles_lock); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 38b76cc02f48..1c2bd1723e40 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -9889,6 +9889,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) } spin_unlock(&fs_info->unused_bgs_lock); + mutex_lock(&root->fs_info->delete_unused_bgs_mutex); + /* Don't want to race with allocators so take the groups_sem */ down_write(&space_info->groups_sem); spin_lock(&block_group->lock); @@ -9983,6 +9985,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) end_trans: btrfs_end_transaction(trans, root); next: + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); btrfs_put_block_group(block_group); spin_lock(&fs_info->unused_bgs_lock); } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 795d754327a7..b823fac91c92 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1748,7 +1748,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, } current->backing_dev_info = inode_to_bdi(inode); - err = file_remove_suid(file); + err = file_remove_privs(file); if (err) { mutex_unlock(&inode->i_mutex); goto out; diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index f6a596d5a637..d4a582ac3f73 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -246,6 +246,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) { struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset; + spinlock_t *rbroot_lock = &root->free_ino_pinned->tree_lock; struct btrfs_free_space *info; struct rb_node *n; u64 count; @@ -254,24 +255,30 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) return; while (1) { + bool add_to_ctl = true; + + spin_lock(rbroot_lock); n = rb_first(rbroot); - if (!n) + if (!n) { + spin_unlock(rbroot_lock); break; + } info = rb_entry(n, struct btrfs_free_space, offset_index); BUG_ON(info->bitmap); /* Logic error */ if (info->offset > root->ino_cache_progress) - goto free; + add_to_ctl = false; else if (info->offset + info->bytes > root->ino_cache_progress) count = root->ino_cache_progress - info->offset + 1; else count = info->bytes; - __btrfs_add_free_space(ctl, info->offset, count); -free: rb_erase(&info->offset_index, rbroot); - kfree(info); + spin_unlock(rbroot_lock); + if (add_to_ctl) + __btrfs_add_free_space(ctl, info->offset, count); + kmem_cache_free(btrfs_free_space_cachep, info); } } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 855935f6671a..b33c0cf02668 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4989,8 +4989,9 @@ static void evict_inode_truncate_pages(struct inode *inode) /* * Keep looping until we have no more ranges in the io tree. * We can have ongoing bios started by readpages (called from readahead) - * that didn't get their end io callbacks called yet or they are still - * in progress ((extent_io.c:end_bio_extent_readpage()). This means some + * that have their endio callback (extent_io.c:end_bio_extent_readpage) + * still in progress (unlocked the pages in the bio but did not yet + * unlocked the ranges in the io tree). Therefore this means some * ranges can still be locked and eviction started because before * submitting those bios, which are executed by a separate task (work * queue kthread), inode references (inode->i_count) were not taken @@ -7546,6 +7547,7 @@ unlock: current->journal_info = outstanding_extents; btrfs_free_reserved_data_space(inode, len); + set_bit(BTRFS_INODE_DIO_READY, &BTRFS_I(inode)->runtime_flags); } /* @@ -7871,8 +7873,6 @@ static void btrfs_endio_direct_write(struct bio *bio, int err) struct bio *dio_bio; int ret; - if (err) - goto out_done; again: ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, &ordered_offset, @@ -7895,7 +7895,6 @@ out_test: ordered = NULL; goto again; } -out_done: dio_bio = dip->dio_bio; kfree(dip); @@ -8163,9 +8162,8 @@ out_err: static void btrfs_submit_direct(int rw, struct bio *dio_bio, struct inode *inode, loff_t file_offset) { - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_dio_private *dip; - struct bio *io_bio; + struct btrfs_dio_private *dip = NULL; + struct bio *io_bio = NULL; struct btrfs_io_bio *btrfs_bio; int skip_sum; int write = rw & REQ_WRITE; @@ -8182,7 +8180,7 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, dip = kzalloc(sizeof(*dip), GFP_NOFS); if (!dip) { ret = -ENOMEM; - goto free_io_bio; + goto free_ordered; } dip->private = dio_bio->bi_private; @@ -8210,25 +8208,55 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, if (btrfs_bio->end_io) btrfs_bio->end_io(btrfs_bio, ret); -free_io_bio: - bio_put(io_bio); free_ordered: /* - * If this is a write, we need to clean up the reserved space and kill - * the ordered extent. + * If we arrived here it means either we failed to submit the dip + * or we either failed to clone the dio_bio or failed to allocate the + * dip. If we cloned the dio_bio and allocated the dip, we can just + * call bio_endio against our io_bio so that we get proper resource + * cleanup if we fail to submit the dip, otherwise, we must do the + * same as btrfs_endio_direct_[write|read] because we can't call these + * callbacks - they require an allocated dip and a clone of dio_bio. */ - if (write) { - struct btrfs_ordered_extent *ordered; - ordered = btrfs_lookup_ordered_extent(inode, file_offset); - if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && - !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) - btrfs_free_reserved_extent(root, ordered->start, - ordered->disk_len, 1); - btrfs_put_ordered_extent(ordered); - btrfs_put_ordered_extent(ordered); + if (io_bio && dip) { + bio_endio(io_bio, ret); + /* + * The end io callbacks free our dip, do the final put on io_bio + * and all the cleanup and final put for dio_bio (through + * dio_end_io()). + */ + dip = NULL; + io_bio = NULL; + } else { + if (write) { + struct btrfs_ordered_extent *ordered; + + ordered = btrfs_lookup_ordered_extent(inode, + file_offset); + set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); + /* + * Decrements our ref on the ordered extent and removes + * the ordered extent from the inode's ordered tree, + * doing all the proper resource cleanup such as for the + * reserved space and waking up any waiters for this + * ordered extent (through btrfs_remove_ordered_extent). + */ + btrfs_finish_ordered_io(ordered); + } else { + unlock_extent(&BTRFS_I(inode)->io_tree, file_offset, + file_offset + dio_bio->bi_iter.bi_size - 1); + } + clear_bit(BIO_UPTODATE, &dio_bio->bi_flags); + /* + * Releases and cleans up our dio_bio, no need to bio_put() + * nor bio_endio()/bio_io_error() against dio_bio. + */ + dio_end_io(dio_bio, ret); } - bio_endio(dio_bio, ret); + if (io_bio) + bio_put(io_bio); + kfree(dip); } static ssize_t check_direct_IO(struct btrfs_root *root, struct kiocb *iocb, @@ -8330,9 +8358,18 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, btrfs_submit_direct, flags); if (iov_iter_rw(iter) == WRITE) { current->journal_info = NULL; - if (ret < 0 && ret != -EIOCBQUEUED) - btrfs_delalloc_release_space(inode, count); - else if (ret >= 0 && (size_t)ret < count) + if (ret < 0 && ret != -EIOCBQUEUED) { + /* + * If the error comes from submitting stage, + * btrfs_get_blocsk_direct() has free'd data space, + * and metadata space will be handled by + * finish_ordered_fn, don't do that again to make + * sure bytes_may_use is correct. + */ + if (!test_and_clear_bit(BTRFS_INODE_DIO_READY, + &BTRFS_I(inode)->runtime_flags)) + btrfs_delalloc_release_space(inode, count); + } else if (ret >= 0 && (size_t)ret < count) btrfs_delalloc_release_space(inode, count - (size_t)ret); } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c86b835da7a8..5d91776e12a2 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -87,7 +87,8 @@ struct btrfs_ioctl_received_subvol_args_32 { static int btrfs_clone(struct inode *src, struct inode *inode, - u64 off, u64 olen, u64 olen_aligned, u64 destoff); + u64 off, u64 olen, u64 olen_aligned, u64 destoff, + int no_time_update); /* Mask out flags that are inappropriate for the given type of inode. */ static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) @@ -2765,14 +2766,11 @@ out: return ret; } -static struct page *extent_same_get_page(struct inode *inode, u64 off) +static struct page *extent_same_get_page(struct inode *inode, pgoff_t index) { struct page *page; - pgoff_t index; struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; - index = off >> PAGE_CACHE_SHIFT; - page = grab_cache_page(inode->i_mapping, index); if (!page) return NULL; @@ -2793,6 +2791,20 @@ static struct page *extent_same_get_page(struct inode *inode, u64 off) return page; } +static int gather_extent_pages(struct inode *inode, struct page **pages, + int num_pages, u64 off) +{ + int i; + pgoff_t index = off >> PAGE_CACHE_SHIFT; + + for (i = 0; i < num_pages; i++) { + pages[i] = extent_same_get_page(inode, index + i); + if (!pages[i]) + return -ENOMEM; + } + return 0; +} + static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) { /* do any pending delalloc/csum calc on src, one way or @@ -2818,52 +2830,120 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) } } -static void btrfs_double_unlock(struct inode *inode1, u64 loff1, - struct inode *inode2, u64 loff2, u64 len) +static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2) { - unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); - unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); - mutex_unlock(&inode1->i_mutex); mutex_unlock(&inode2->i_mutex); } -static void btrfs_double_lock(struct inode *inode1, u64 loff1, - struct inode *inode2, u64 loff2, u64 len) +static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2) +{ + if (inode1 < inode2) + swap(inode1, inode2); + + mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); + if (inode1 != inode2) + mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); +} + +static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1, + struct inode *inode2, u64 loff2, u64 len) +{ + unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); + unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); +} + +static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1, + struct inode *inode2, u64 loff2, u64 len) { if (inode1 < inode2) { swap(inode1, inode2); swap(loff1, loff2); } - - mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); lock_extent_range(inode1, loff1, len); - if (inode1 != inode2) { - mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); + if (inode1 != inode2) lock_extent_range(inode2, loff2, len); +} + +struct cmp_pages { + int num_pages; + struct page **src_pages; + struct page **dst_pages; +}; + +static void btrfs_cmp_data_free(struct cmp_pages *cmp) +{ + int i; + struct page *pg; + + for (i = 0; i < cmp->num_pages; i++) { + pg = cmp->src_pages[i]; + if (pg) + page_cache_release(pg); + pg = cmp->dst_pages[i]; + if (pg) + page_cache_release(pg); + } + kfree(cmp->src_pages); + kfree(cmp->dst_pages); +} + +static int btrfs_cmp_data_prepare(struct inode *src, u64 loff, + struct inode *dst, u64 dst_loff, + u64 len, struct cmp_pages *cmp) +{ + int ret; + int num_pages = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT; + struct page **src_pgarr, **dst_pgarr; + + /* + * We must gather up all the pages before we initiate our + * extent locking. We use an array for the page pointers. Size + * of the array is bounded by len, which is in turn bounded by + * BTRFS_MAX_DEDUPE_LEN. + */ + src_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS); + dst_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS); + if (!src_pgarr || !dst_pgarr) { + kfree(src_pgarr); + kfree(dst_pgarr); + return -ENOMEM; } + cmp->num_pages = num_pages; + cmp->src_pages = src_pgarr; + cmp->dst_pages = dst_pgarr; + + ret = gather_extent_pages(src, cmp->src_pages, cmp->num_pages, loff); + if (ret) + goto out; + + ret = gather_extent_pages(dst, cmp->dst_pages, cmp->num_pages, dst_loff); + +out: + if (ret) + btrfs_cmp_data_free(cmp); + return 0; } static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, - u64 dst_loff, u64 len) + u64 dst_loff, u64 len, struct cmp_pages *cmp) { int ret = 0; + int i; struct page *src_page, *dst_page; unsigned int cmp_len = PAGE_CACHE_SIZE; void *addr, *dst_addr; + i = 0; while (len) { if (len < PAGE_CACHE_SIZE) cmp_len = len; - src_page = extent_same_get_page(src, loff); - if (!src_page) - return -EINVAL; - dst_page = extent_same_get_page(dst, dst_loff); - if (!dst_page) { - page_cache_release(src_page); - return -EINVAL; - } + BUG_ON(i >= cmp->num_pages); + + src_page = cmp->src_pages[i]; + dst_page = cmp->dst_pages[i]; + addr = kmap_atomic(src_page); dst_addr = kmap_atomic(dst_page); @@ -2875,15 +2955,12 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, kunmap_atomic(addr); kunmap_atomic(dst_addr); - page_cache_release(src_page); - page_cache_release(dst_page); if (ret) break; - loff += cmp_len; - dst_loff += cmp_len; len -= cmp_len; + i++; } return ret; @@ -2914,27 +2991,62 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, { int ret; u64 len = olen; + struct cmp_pages cmp; + int same_inode = 0; + u64 same_lock_start = 0; + u64 same_lock_len = 0; - /* - * btrfs_clone() can't handle extents in the same file - * yet. Once that works, we can drop this check and replace it - * with a check for the same inode, but overlapping extents. - */ if (src == dst) - return -EINVAL; + same_inode = 1; if (len == 0) return 0; - btrfs_double_lock(src, loff, dst, dst_loff, len); + if (same_inode) { + mutex_lock(&src->i_mutex); - ret = extent_same_check_offsets(src, loff, &len, olen); - if (ret) - goto out_unlock; + ret = extent_same_check_offsets(src, loff, &len, olen); + if (ret) + goto out_unlock; - ret = extent_same_check_offsets(dst, dst_loff, &len, olen); - if (ret) - goto out_unlock; + /* + * Single inode case wants the same checks, except we + * don't want our length pushed out past i_size as + * comparing that data range makes no sense. + * + * extent_same_check_offsets() will do this for an + * unaligned length at i_size, so catch it here and + * reject the request. + * + * This effectively means we require aligned extents + * for the single-inode case, whereas the other cases + * allow an unaligned length so long as it ends at + * i_size. + */ + if (len != olen) { + ret = -EINVAL; + goto out_unlock; + } + + /* Check for overlapping ranges */ + if (dst_loff + len > loff && dst_loff < loff + len) { + ret = -EINVAL; + goto out_unlock; + } + + same_lock_start = min_t(u64, loff, dst_loff); + same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start; + } else { + btrfs_double_inode_lock(src, dst); + + ret = extent_same_check_offsets(src, loff, &len, olen); + if (ret) + goto out_unlock; + + ret = extent_same_check_offsets(dst, dst_loff, &len, olen); + if (ret) + goto out_unlock; + } /* don't make the dst file partly checksummed */ if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != @@ -2943,12 +3055,32 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, goto out_unlock; } - ret = btrfs_cmp_data(src, loff, dst, dst_loff, len); + ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp); + if (ret) + goto out_unlock; + + if (same_inode) + lock_extent_range(src, same_lock_start, same_lock_len); + else + btrfs_double_extent_lock(src, loff, dst, dst_loff, len); + + /* pass original length for comparison so we stay within i_size */ + ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp); if (ret == 0) - ret = btrfs_clone(src, dst, loff, olen, len, dst_loff); + ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1); + + if (same_inode) + unlock_extent(&BTRFS_I(src)->io_tree, same_lock_start, + same_lock_start + same_lock_len - 1); + else + btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); + btrfs_cmp_data_free(&cmp); out_unlock: - btrfs_double_unlock(src, loff, dst, dst_loff, len); + if (same_inode) + mutex_unlock(&src->i_mutex); + else + btrfs_double_inode_unlock(src, dst); return ret; } @@ -3100,13 +3232,15 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans, struct inode *inode, u64 endoff, const u64 destoff, - const u64 olen) + const u64 olen, + int no_time_update) { struct btrfs_root *root = BTRFS_I(inode)->root; int ret; inode_inc_iversion(inode); - inode->i_mtime = inode->i_ctime = CURRENT_TIME; + if (!no_time_update) + inode->i_mtime = inode->i_ctime = CURRENT_TIME; /* * We round up to the block size at eof when determining which * extents to clone above, but shouldn't round up the file size. @@ -3191,13 +3325,13 @@ static void clone_update_extent_map(struct inode *inode, * @inode: Inode to clone to * @off: Offset within source to start clone from * @olen: Original length, passed by user, of range to clone - * @olen_aligned: Block-aligned value of olen, extent_same uses - * identical values here + * @olen_aligned: Block-aligned value of olen * @destoff: Offset within @inode to start clone + * @no_time_update: Whether to update mtime/ctime on the target inode */ static int btrfs_clone(struct inode *src, struct inode *inode, const u64 off, const u64 olen, const u64 olen_aligned, - const u64 destoff) + const u64 destoff, int no_time_update) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_path *path = NULL; @@ -3521,7 +3655,8 @@ process_slot: root->sectorsize); ret = clone_finish_inode_update(trans, inode, last_dest_end, - destoff, olen); + destoff, olen, + no_time_update); if (ret) goto out; if (new_key.offset + datal >= destoff + len) @@ -3559,7 +3694,7 @@ process_slot: clone_update_extent_map(inode, trans, NULL, last_dest_end, destoff + len - last_dest_end); ret = clone_finish_inode_update(trans, inode, destoff + len, - destoff, olen); + destoff, olen, no_time_update); } out: @@ -3696,7 +3831,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, lock_extent_range(inode, destoff, len); } - ret = btrfs_clone(src, inode, off, olen, len, destoff); + ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); if (same_inode) { u64 lock_start = min_t(u64, off, destoff); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 89656d799ff6..52170cf1757e 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -552,6 +552,10 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) trace_btrfs_ordered_extent_put(entry->inode, entry); if (atomic_dec_and_test(&entry->refs)) { + ASSERT(list_empty(&entry->log_list)); + ASSERT(list_empty(&entry->trans_list)); + ASSERT(list_empty(&entry->root_extent_list)); + ASSERT(RB_EMPTY_NODE(&entry->rb_node)); if (entry->inode) btrfs_add_delayed_iput(entry->inode); while (!list_empty(&entry->list)) { @@ -579,6 +583,7 @@ void btrfs_remove_ordered_extent(struct inode *inode, spin_lock_irq(&tree->lock); node = &entry->rb_node; rb_erase(node, &tree->tree); + RB_CLEAR_NODE(node); if (tree->last == node) tree->last = NULL; set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index d5f1f033b7a0..e9ace099162c 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1349,6 +1349,11 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, struct btrfs_root *quota_root; struct btrfs_qgroup *qgroup; int ret = 0; + /* Sometimes we would want to clear the limit on this qgroup. + * To meet this requirement, we treat the -1 as a special value + * which tell kernel to clear the limit on this qgroup. + */ + const u64 CLEAR_VALUE = -1; mutex_lock(&fs_info->qgroup_ioctl_lock); quota_root = fs_info->quota_root; @@ -1364,14 +1369,42 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, } spin_lock(&fs_info->qgroup_lock); - if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) - qgroup->max_rfer = limit->max_rfer; - if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) - qgroup->max_excl = limit->max_excl; - if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) - qgroup->rsv_rfer = limit->rsv_rfer; - if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) - qgroup->rsv_excl = limit->rsv_excl; + if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) { + if (limit->max_rfer == CLEAR_VALUE) { + qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; + limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; + qgroup->max_rfer = 0; + } else { + qgroup->max_rfer = limit->max_rfer; + } + } + if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) { + if (limit->max_excl == CLEAR_VALUE) { + qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; + limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; + qgroup->max_excl = 0; + } else { + qgroup->max_excl = limit->max_excl; + } + } + if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) { + if (limit->rsv_rfer == CLEAR_VALUE) { + qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; + limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; + qgroup->rsv_rfer = 0; + } else { + qgroup->rsv_rfer = limit->rsv_rfer; + } + } + if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) { + if (limit->rsv_excl == CLEAR_VALUE) { + qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; + limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; + qgroup->rsv_excl = 0; + } else { + qgroup->rsv_excl = limit->rsv_excl; + } + } qgroup->lim_flags |= limit->flags; spin_unlock(&fs_info->qgroup_lock); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 827951fbf7fc..88cbb5995667 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4049,7 +4049,7 @@ restart: if (trans && progress && err == -ENOSPC) { ret = btrfs_force_chunk_alloc(trans, rc->extent_root, rc->block_group->flags); - if (ret == 0) { + if (ret == 1) { err = 0; progress = 0; goto restart; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 9f2feabe99f2..94db0fa5225a 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3571,7 +3571,6 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx, static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, int is_dev_replace) { - int ret = 0; unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND; int max_active = fs_info->thread_pool_size; @@ -3584,34 +3583,36 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, fs_info->scrub_workers = btrfs_alloc_workqueue("btrfs-scrub", flags, max_active, 4); - if (!fs_info->scrub_workers) { - ret = -ENOMEM; - goto out; - } + if (!fs_info->scrub_workers) + goto fail_scrub_workers; + fs_info->scrub_wr_completion_workers = btrfs_alloc_workqueue("btrfs-scrubwrc", flags, max_active, 2); - if (!fs_info->scrub_wr_completion_workers) { - ret = -ENOMEM; - goto out; - } + if (!fs_info->scrub_wr_completion_workers) + goto fail_scrub_wr_completion_workers; + fs_info->scrub_nocow_workers = btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0); - if (!fs_info->scrub_nocow_workers) { - ret = -ENOMEM; - goto out; - } + if (!fs_info->scrub_nocow_workers) + goto fail_scrub_nocow_workers; fs_info->scrub_parity_workers = btrfs_alloc_workqueue("btrfs-scrubparity", flags, max_active, 2); - if (!fs_info->scrub_parity_workers) { - ret = -ENOMEM; - goto out; - } + if (!fs_info->scrub_parity_workers) + goto fail_scrub_parity_workers; } ++fs_info->scrub_workers_refcnt; -out: - return ret; + return 0; + +fail_scrub_parity_workers: + btrfs_destroy_workqueue(fs_info->scrub_nocow_workers); +fail_scrub_nocow_workers: + btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers); +fail_scrub_wr_completion_workers: + btrfs_destroy_workqueue(fs_info->scrub_workers); +fail_scrub_workers: + return -ENOMEM; } static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1ce80c1c4eb6..9c45431e69ab 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4117,6 +4117,187 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode, return 0; } +/* + * At the moment we always log all xattrs. This is to figure out at log replay + * time which xattrs must have their deletion replayed. If a xattr is missing + * in the log tree and exists in the fs/subvol tree, we delete it. This is + * because if a xattr is deleted, the inode is fsynced and a power failure + * happens, causing the log to be replayed the next time the fs is mounted, + * we want the xattr to not exist anymore (same behaviour as other filesystems + * with a journal, ext3/4, xfs, f2fs, etc). + */ +static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, + struct btrfs_path *path, + struct btrfs_path *dst_path) +{ + int ret; + struct btrfs_key key; + const u64 ino = btrfs_ino(inode); + int ins_nr = 0; + int start_slot = 0; + + key.objectid = ino; + key.type = BTRFS_XATTR_ITEM_KEY; + key.offset = 0; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + return ret; + + while (true) { + int slot = path->slots[0]; + struct extent_buffer *leaf = path->nodes[0]; + int nritems = btrfs_header_nritems(leaf); + + if (slot >= nritems) { + if (ins_nr > 0) { + u64 last_extent = 0; + + ret = copy_items(trans, inode, dst_path, path, + &last_extent, start_slot, + ins_nr, 1, 0); + /* can't be 1, extent items aren't processed */ + ASSERT(ret <= 0); + if (ret < 0) + return ret; + ins_nr = 0; + } + ret = btrfs_next_leaf(root, path); + if (ret < 0) + return ret; + else if (ret > 0) + break; + continue; + } + + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY) + break; + + if (ins_nr == 0) + start_slot = slot; + ins_nr++; + path->slots[0]++; + cond_resched(); + } + if (ins_nr > 0) { + u64 last_extent = 0; + + ret = copy_items(trans, inode, dst_path, path, + &last_extent, start_slot, + ins_nr, 1, 0); + /* can't be 1, extent items aren't processed */ + ASSERT(ret <= 0); + if (ret < 0) + return ret; + } + + return 0; +} + +/* + * If the no holes feature is enabled we need to make sure any hole between the + * last extent and the i_size of our inode is explicitly marked in the log. This + * is to make sure that doing something like: + * + * 1) create file with 128Kb of data + * 2) truncate file to 64Kb + * 3) truncate file to 256Kb + * 4) fsync file + * 5) <crash/power failure> + * 6) mount fs and trigger log replay + * + * Will give us a file with a size of 256Kb, the first 64Kb of data match what + * the file had in its first 64Kb of data at step 1 and the last 192Kb of the + * file correspond to a hole. The presence of explicit holes in a log tree is + * what guarantees that log replay will remove/adjust file extent items in the + * fs/subvol tree. + * + * Here we do not need to care about holes between extents, that is already done + * by copy_items(). We also only need to do this in the full sync path, where we + * lookup for extents from the fs/subvol tree only. In the fast path case, we + * lookup the list of modified extent maps and if any represents a hole, we + * insert a corresponding extent representing a hole in the log tree. + */ +static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, + struct btrfs_path *path) +{ + int ret; + struct btrfs_key key; + u64 hole_start; + u64 hole_size; + struct extent_buffer *leaf; + struct btrfs_root *log = root->log_root; + const u64 ino = btrfs_ino(inode); + const u64 i_size = i_size_read(inode); + + if (!btrfs_fs_incompat(root->fs_info, NO_HOLES)) + return 0; + + key.objectid = ino; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = (u64)-1; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + ASSERT(ret != 0); + if (ret < 0) + return ret; + + ASSERT(path->slots[0] > 0); + path->slots[0]--; + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + + if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) { + /* inode does not have any extents */ + hole_start = 0; + hole_size = i_size; + } else { + struct btrfs_file_extent_item *extent; + u64 len; + + /* + * If there's an extent beyond i_size, an explicit hole was + * already inserted by copy_items(). + */ + if (key.offset >= i_size) + return 0; + + extent = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + + if (btrfs_file_extent_type(leaf, extent) == + BTRFS_FILE_EXTENT_INLINE) { + len = btrfs_file_extent_inline_len(leaf, + path->slots[0], + extent); + ASSERT(len == i_size); + return 0; + } + + len = btrfs_file_extent_num_bytes(leaf, extent); + /* Last extent goes beyond i_size, no need to log a hole. */ + if (key.offset + len > i_size) + return 0; + hole_start = key.offset + len; + hole_size = i_size - hole_start; + } + btrfs_release_path(path); + + /* Last extent ends at i_size. */ + if (hole_size == 0) + return 0; + + hole_size = ALIGN(hole_size, root->sectorsize); + ret = btrfs_insert_file_extent(trans, log, ino, hole_start, 0, 0, + hole_size, 0, hole_size, 0, 0, 0); + return ret; +} + /* log a single inode in the tree log. * At least one parent directory for this inode must exist in the tree * or be logged already. @@ -4155,6 +4336,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, u64 ino = btrfs_ino(inode); struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; u64 logged_isize = 0; + bool need_log_inode_item = true; path = btrfs_alloc_path(); if (!path) @@ -4263,11 +4445,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, } else { if (inode_only == LOG_INODE_ALL) fast_search = true; - ret = log_inode_item(trans, log, dst_path, inode); - if (ret) { - err = ret; - goto out_unlock; - } goto log_extents; } @@ -4290,6 +4467,28 @@ again: if (min_key.type > max_key.type) break; + if (min_key.type == BTRFS_INODE_ITEM_KEY) + need_log_inode_item = false; + + /* Skip xattrs, we log them later with btrfs_log_all_xattrs() */ + if (min_key.type == BTRFS_XATTR_ITEM_KEY) { + if (ins_nr == 0) + goto next_slot; + ret = copy_items(trans, inode, dst_path, path, + &last_extent, ins_start_slot, + ins_nr, inode_only, logged_isize); + if (ret < 0) { + err = ret; + goto out_unlock; + } + ins_nr = 0; + if (ret) { + btrfs_release_path(path); + continue; + } + goto next_slot; + } + src = path->nodes[0]; if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { ins_nr++; @@ -4357,9 +4556,26 @@ next_slot: ins_nr = 0; } + btrfs_release_path(path); + btrfs_release_path(dst_path); + err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path); + if (err) + goto out_unlock; + if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) { + btrfs_release_path(path); + btrfs_release_path(dst_path); + err = btrfs_log_trailing_hole(trans, root, inode, path); + if (err) + goto out_unlock; + } log_extents: btrfs_release_path(path); btrfs_release_path(dst_path); + if (need_log_inode_item) { + err = log_inode_item(trans, log, dst_path, inode); + if (err) + goto out_unlock; + } if (fast_search) { /* * Some ordered extents started by fsync might have completed diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 4b438b4c8c91..fbe7c104531c 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2766,6 +2766,20 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, root = root->fs_info->chunk_root; extent_root = root->fs_info->extent_root; + /* + * Prevent races with automatic removal of unused block groups. + * After we relocate and before we remove the chunk with offset + * chunk_offset, automatic removal of the block group can kick in, + * resulting in a failure when calling btrfs_remove_chunk() below. + * + * Make sure to acquire this mutex before doing a tree search (dev + * or chunk trees) to find chunks. Otherwise the cleaner kthread might + * call btrfs_remove_chunk() (through btrfs_delete_unused_bgs()) after + * we release the path used to search the chunk/dev tree and before + * the current task acquires this mutex and calls us. + */ + ASSERT(mutex_is_locked(&root->fs_info->delete_unused_bgs_mutex)); + ret = btrfs_can_relocate(extent_root, chunk_offset); if (ret) return -ENOSPC; @@ -2814,13 +2828,18 @@ again: key.type = BTRFS_CHUNK_ITEM_KEY; while (1) { + mutex_lock(&root->fs_info->delete_unused_bgs_mutex); ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); - if (ret < 0) + if (ret < 0) { + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); goto error; + } BUG_ON(ret == 0); /* Corruption */ ret = btrfs_previous_item(chunk_root, path, key.objectid, key.type); + if (ret) + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); if (ret < 0) goto error; if (ret > 0) @@ -2843,6 +2862,7 @@ again: else BUG_ON(ret); } + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); if (found_key.offset == 0) break; @@ -3299,9 +3319,12 @@ again: goto error; } + mutex_lock(&fs_info->delete_unused_bgs_mutex); ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); - if (ret < 0) + if (ret < 0) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); goto error; + } /* * this shouldn't happen, it means the last relocate @@ -3313,6 +3336,7 @@ again: ret = btrfs_previous_item(chunk_root, path, 0, BTRFS_CHUNK_ITEM_KEY); if (ret) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); ret = 0; break; } @@ -3321,8 +3345,10 @@ again: slot = path->slots[0]; btrfs_item_key_to_cpu(leaf, &found_key, slot); - if (found_key.objectid != key.objectid) + if (found_key.objectid != key.objectid) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); break; + } chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); @@ -3335,10 +3361,13 @@ again: ret = should_balance_chunk(chunk_root, leaf, chunk, found_key.offset); btrfs_release_path(path); - if (!ret) + if (!ret) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); goto loop; + } if (counting) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); spin_lock(&fs_info->balance_lock); bctl->stat.expected++; spin_unlock(&fs_info->balance_lock); @@ -3348,6 +3377,7 @@ again: ret = btrfs_relocate_chunk(chunk_root, found_key.objectid, found_key.offset); + mutex_unlock(&fs_info->delete_unused_bgs_mutex); if (ret && ret != -ENOSPC) goto error; if (ret == -ENOSPC) { @@ -4087,11 +4117,16 @@ again: key.type = BTRFS_DEV_EXTENT_KEY; do { + mutex_lock(&root->fs_info->delete_unused_bgs_mutex); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) + if (ret < 0) { + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); goto done; + } ret = btrfs_previous_item(root, path, 0, key.type); + if (ret) + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); if (ret < 0) goto done; if (ret) { @@ -4105,6 +4140,7 @@ again: btrfs_item_key_to_cpu(l, &key, path->slots[0]); if (key.objectid != device->devid) { + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); btrfs_release_path(path); break; } @@ -4113,6 +4149,7 @@ again: length = btrfs_dev_extent_length(l, dev_extent); if (key.offset + length <= new_size) { + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); btrfs_release_path(path); break; } @@ -4122,6 +4159,7 @@ again: btrfs_release_path(path); ret = btrfs_relocate_chunk(root, chunk_objectid, chunk_offset); + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); if (ret && ret != -ENOSPC) goto done; if (ret == -ENOSPC) @@ -5715,7 +5753,6 @@ static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int e static void btrfs_end_bio(struct bio *bio, int err) { struct btrfs_bio *bbio = bio->bi_private; - struct btrfs_device *dev = bbio->stripes[0].dev; int is_orig_bio = 0; if (err) { @@ -5723,6 +5760,7 @@ static void btrfs_end_bio(struct bio *bio, int err) if (err == -EIO || err == -EREMOTEIO) { unsigned int stripe_index = btrfs_io_bio(bio)->stripe_index; + struct btrfs_device *dev; BUG_ON(stripe_index >= bbio->num_stripes); dev = bbio->stripes[stripe_index].dev; diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 8c52472d2efa..aecd0859eacb 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -43,7 +43,6 @@ struct cachefiles_object { loff_t i_size; /* object size */ unsigned long flags; #define CACHEFILES_OBJECT_ACTIVE 0 /* T if marked active */ -#define CACHEFILES_OBJECT_BURIED 1 /* T if preemptively buried */ atomic_t usage; /* object usage count */ uint8_t type; /* object type */ uint8_t new; /* T if object new */ diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index ab857ab9f40d..fc1056f5c96a 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -97,7 +97,8 @@ static noinline void cachefiles_printk_object(struct cachefiles_object *object, * call vfs_unlink(), vfs_rmdir() or vfs_rename() */ static void cachefiles_mark_object_buried(struct cachefiles_cache *cache, - struct dentry *dentry) + struct dentry *dentry, + enum fscache_why_object_killed why) { struct cachefiles_object *object; struct rb_node *p; @@ -132,8 +133,9 @@ found_dentry: pr_err("\n"); pr_err("Error: Can't preemptively bury live object\n"); cachefiles_printk_object(object, NULL); - } else if (test_and_set_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) { - pr_err("Error: Object already preemptively buried\n"); + } else { + if (why != FSCACHE_OBJECT_IS_STALE) + fscache_object_mark_killed(&object->fscache, why); } write_unlock(&cache->active_lock); @@ -265,7 +267,8 @@ requeue: static int cachefiles_bury_object(struct cachefiles_cache *cache, struct dentry *dir, struct dentry *rep, - bool preemptive) + bool preemptive, + enum fscache_why_object_killed why) { struct dentry *grave, *trap; struct path path, path_to_graveyard; @@ -289,7 +292,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache, ret = vfs_unlink(d_inode(dir), rep, NULL); if (preemptive) - cachefiles_mark_object_buried(cache, rep); + cachefiles_mark_object_buried(cache, rep, why); } mutex_unlock(&d_inode(dir)->i_mutex); @@ -394,7 +397,7 @@ try_again: "Rename failed with error %d", ret); if (preemptive) - cachefiles_mark_object_buried(cache, rep); + cachefiles_mark_object_buried(cache, rep, why); } unlock_rename(cache->graveyard, dir); @@ -422,7 +425,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache, mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT); - if (test_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) { + if (test_bit(FSCACHE_OBJECT_KILLED_BY_CACHE, &object->fscache.flags)) { /* object allocation for the same key preemptively deleted this * object's file so that it could create its own file */ _debug("object preemptively buried"); @@ -433,7 +436,8 @@ int cachefiles_delete_object(struct cachefiles_cache *cache, * may have been renamed */ if (dir == object->dentry->d_parent) { ret = cachefiles_bury_object(cache, dir, - object->dentry, false); + object->dentry, false, + FSCACHE_OBJECT_WAS_RETIRED); } else { /* it got moved, presumably by cachefilesd culling it, * so it's no longer in the key path and we can ignore @@ -522,7 +526,7 @@ lookup_again: if (d_is_negative(next)) { ret = cachefiles_has_space(cache, 1, 0); if (ret < 0) - goto create_error; + goto no_space_error; path.dentry = dir; ret = security_path_mkdir(&path, next, 0); @@ -551,7 +555,7 @@ lookup_again: if (d_is_negative(next)) { ret = cachefiles_has_space(cache, 1, 0); if (ret < 0) - goto create_error; + goto no_space_error; path.dentry = dir; ret = security_path_mknod(&path, next, S_IFREG, 0); @@ -602,7 +606,8 @@ lookup_again: * mutex) */ object->dentry = NULL; - ret = cachefiles_bury_object(cache, dir, next, true); + ret = cachefiles_bury_object(cache, dir, next, true, + FSCACHE_OBJECT_IS_STALE); dput(next); next = NULL; @@ -610,6 +615,7 @@ lookup_again: goto delete_error; _debug("redo lookup"); + fscache_object_retrying_stale(&object->fscache); goto lookup_again; } } @@ -662,6 +668,8 @@ lookup_again: _leave(" = 0 [%lu]", d_backing_inode(object->dentry)->i_ino); return 0; +no_space_error: + fscache_object_mark_killed(&object->fscache, FSCACHE_OBJECT_NO_SPACE); create_error: _debug("create error %d", ret); if (ret == -EIO) @@ -927,7 +935,8 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, /* actually remove the victim (drops the dir mutex) */ _debug("bury"); - ret = cachefiles_bury_object(cache, dir, victim, false); + ret = cachefiles_bury_object(cache, dir, victim, false, + FSCACHE_OBJECT_WAS_CULLED); if (ret < 0) goto error; diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index 64fa248343f6..8f84646f10e9 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -187,10 +187,10 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode, val_size2 = posix_acl_xattr_size(default_acl->a_count); err = -ENOMEM; - tmp_buf = kmalloc(max(val_size1, val_size2), GFP_NOFS); + tmp_buf = kmalloc(max(val_size1, val_size2), GFP_KERNEL); if (!tmp_buf) goto out_err; - pagelist = kmalloc(sizeof(struct ceph_pagelist), GFP_NOFS); + pagelist = kmalloc(sizeof(struct ceph_pagelist), GFP_KERNEL); if (!pagelist) goto out_err; ceph_pagelist_init(pagelist); diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index e162bcd105ee..890c50971a69 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -87,17 +87,21 @@ static int ceph_set_page_dirty(struct page *page) inode = mapping->host; ci = ceph_inode(inode); - /* - * Note that we're grabbing a snapc ref here without holding - * any locks! - */ - snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context); - /* dirty the head */ spin_lock(&ci->i_ceph_lock); - if (ci->i_head_snapc == NULL) - ci->i_head_snapc = ceph_get_snap_context(snapc); - ++ci->i_wrbuffer_ref_head; + BUG_ON(ci->i_wr_ref == 0); // caller should hold Fw reference + if (__ceph_have_pending_cap_snap(ci)) { + struct ceph_cap_snap *capsnap = + list_last_entry(&ci->i_cap_snaps, + struct ceph_cap_snap, + ci_item); + snapc = ceph_get_snap_context(capsnap->context); + capsnap->dirty_pages++; + } else { + BUG_ON(!ci->i_head_snapc); + snapc = ceph_get_snap_context(ci->i_head_snapc); + ++ci->i_wrbuffer_ref_head; + } if (ci->i_wrbuffer_ref == 0) ihold(inode); ++ci->i_wrbuffer_ref; @@ -346,7 +350,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) /* build page vector */ nr_pages = calc_pages_for(0, len); - pages = kmalloc(sizeof(*pages) * nr_pages, GFP_NOFS); + pages = kmalloc(sizeof(*pages) * nr_pages, GFP_KERNEL); ret = -ENOMEM; if (!pages) goto out; @@ -358,7 +362,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) dout("start_read %p adding %p idx %lu\n", inode, page, page->index); if (add_to_page_cache_lru(page, &inode->i_data, page->index, - GFP_NOFS)) { + GFP_KERNEL)) { ceph_fscache_uncache_page(inode, page); page_cache_release(page); dout("start_read %p add_to_page_cache failed %p\n", @@ -436,7 +440,7 @@ out: * only snap context we are allowed to write back. */ static struct ceph_snap_context *get_oldest_context(struct inode *inode, - u64 *snap_size) + loff_t *snap_size) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_snap_context *snapc = NULL; @@ -476,8 +480,9 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) struct ceph_osd_client *osdc; struct ceph_snap_context *snapc, *oldest; loff_t page_off = page_offset(page); + loff_t snap_size = -1; long writeback_stat; - u64 truncate_size, snap_size = 0; + u64 truncate_size; u32 truncate_seq; int err = 0, len = PAGE_CACHE_SIZE; @@ -512,7 +517,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) spin_lock(&ci->i_ceph_lock); truncate_seq = ci->i_truncate_seq; truncate_size = ci->i_truncate_size; - if (!snap_size) + if (snap_size == -1) snap_size = i_size_read(inode); spin_unlock(&ci->i_ceph_lock); @@ -695,7 +700,8 @@ static int ceph_writepages_start(struct address_space *mapping, unsigned wsize = 1 << inode->i_blkbits; struct ceph_osd_request *req = NULL; int do_sync = 0; - u64 truncate_size, snap_size; + loff_t snap_size, i_size; + u64 truncate_size; u32 truncate_seq; /* @@ -741,7 +747,7 @@ static int ceph_writepages_start(struct address_space *mapping, retry: /* find oldest snap context with dirty data */ ceph_put_snap_context(snapc); - snap_size = 0; + snap_size = -1; snapc = get_oldest_context(inode, &snap_size); if (!snapc) { /* hmm, why does writepages get called when there @@ -749,16 +755,13 @@ retry: dout(" no snap context with dirty data?\n"); goto out; } - if (snap_size == 0) - snap_size = i_size_read(inode); dout(" oldest snapc is %p seq %lld (%d snaps)\n", snapc, snapc->seq, snapc->num_snaps); spin_lock(&ci->i_ceph_lock); truncate_seq = ci->i_truncate_seq; truncate_size = ci->i_truncate_size; - if (!snap_size) - snap_size = i_size_read(inode); + i_size = i_size_read(inode); spin_unlock(&ci->i_ceph_lock); if (last_snapc && snapc != last_snapc) { @@ -828,8 +831,10 @@ get_more_pages: dout("waiting on writeback %p\n", page); wait_on_page_writeback(page); } - if (page_offset(page) >= snap_size) { - dout("%p page eof %llu\n", page, snap_size); + if (page_offset(page) >= + (snap_size == -1 ? i_size : snap_size)) { + dout("%p page eof %llu\n", page, + (snap_size == -1 ? i_size : snap_size)); done = 1; unlock_page(page); break; @@ -884,7 +889,8 @@ get_more_pages: } if (do_sync) - osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC); + osd_req_op_init(req, 1, + CEPH_OSD_OP_STARTSYNC, 0); req->r_callback = writepages_finish; req->r_inode = inode; @@ -944,10 +950,18 @@ get_more_pages: } /* Format the osd request message and submit the write */ - offset = page_offset(pages[0]); - len = min(snap_size - offset, - (u64)locked_pages << PAGE_CACHE_SHIFT); + len = (u64)locked_pages << PAGE_CACHE_SHIFT; + if (snap_size == -1) { + len = min(len, (u64)i_size_read(inode) - offset); + /* writepages_finish() clears writeback pages + * according to the data length, so make sure + * data length covers all locked pages */ + len = max(len, 1 + + ((u64)(locked_pages - 1) << PAGE_CACHE_SHIFT)); + } else { + len = min(len, snap_size - offset); + } dout("writepages got %d pages at %llu~%llu\n", locked_pages, offset, len); @@ -1032,7 +1046,6 @@ static int ceph_update_writeable_page(struct file *file, { struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; loff_t page_off = pos & PAGE_CACHE_MASK; int pos_in_page = pos & ~PAGE_CACHE_MASK; int end_in_page = pos_in_page + len; @@ -1044,10 +1057,6 @@ retry_locked: /* writepages currently holds page lock, but if we change that later, */ wait_on_page_writeback(page); - /* check snap context */ - BUG_ON(!ci->i_snap_realm); - down_read(&mdsc->snap_rwsem); - BUG_ON(!ci->i_snap_realm->cached_context); snapc = page_snap_context(page); if (snapc && snapc != ci->i_head_snapc) { /* @@ -1055,7 +1064,6 @@ retry_locked: * context! is it writeable now? */ oldest = get_oldest_context(inode, NULL); - up_read(&mdsc->snap_rwsem); if (snapc->seq > oldest->seq) { ceph_put_snap_context(oldest); @@ -1112,7 +1120,6 @@ retry_locked: } /* we need to read it. */ - up_read(&mdsc->snap_rwsem); r = readpage_nounlock(file, page); if (r < 0) goto fail_nosnap; @@ -1157,16 +1164,13 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, /* * we don't do anything in here that simple_write_end doesn't do - * except adjust dirty page accounting and drop read lock on - * mdsc->snap_rwsem. + * except adjust dirty page accounting */ static int ceph_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) { struct inode *inode = file_inode(file); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); - struct ceph_mds_client *mdsc = fsc->mdsc; unsigned from = pos & (PAGE_CACHE_SIZE - 1); int check_cap = 0; @@ -1188,7 +1192,6 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, set_page_dirty(page); unlock_page(page); - up_read(&mdsc->snap_rwsem); page_cache_release(page); if (check_cap) @@ -1314,13 +1317,17 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) struct inode *inode = file_inode(vma->vm_file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_file_info *fi = vma->vm_file->private_data; - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_cap_flush *prealloc_cf; struct page *page = vmf->page; loff_t off = page_offset(page); loff_t size = i_size_read(inode); size_t len; int want, got, ret; + prealloc_cf = ceph_alloc_cap_flush(); + if (!prealloc_cf) + return VM_FAULT_SIGBUS; + if (ci->i_inline_version != CEPH_INLINE_NONE) { struct page *locked_page = NULL; if (off == 0) { @@ -1330,8 +1337,10 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) ret = ceph_uninline_data(vma->vm_file, locked_page); if (locked_page) unlock_page(locked_page); - if (ret < 0) - return VM_FAULT_SIGBUS; + if (ret < 0) { + ret = VM_FAULT_SIGBUS; + goto out_free; + } } if (off + PAGE_CACHE_SIZE <= size) @@ -1353,7 +1362,8 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) break; if (ret != -ERESTARTSYS) { WARN_ON(1); - return VM_FAULT_SIGBUS; + ret = VM_FAULT_SIGBUS; + goto out_free; } } dout("page_mkwrite %p %llu~%zd got cap refs on %s\n", @@ -1373,7 +1383,6 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) if (ret == 0) { /* success. we'll keep the page locked. */ set_page_dirty(page); - up_read(&mdsc->snap_rwsem); ret = VM_FAULT_LOCKED; } else { if (ret == -ENOMEM) @@ -1389,7 +1398,8 @@ out: int dirty; spin_lock(&ci->i_ceph_lock); ci->i_inline_version = CEPH_INLINE_NONE; - dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); + dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, + &prealloc_cf); spin_unlock(&ci->i_ceph_lock); if (dirty) __mark_inode_dirty(inode, dirty); @@ -1398,6 +1408,8 @@ out: dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %d\n", inode, off, len, ceph_cap_string(got), ret); ceph_put_cap_refs(ci, got); +out_free: + ceph_free_cap_flush(prealloc_cf); return ret; } @@ -1509,8 +1521,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) ceph_vino(inode), 0, &len, 0, 1, CEPH_OSD_OP_CREATE, CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, - ci->i_snap_realm->cached_context, - 0, 0, false); + ceph_empty_snapc, 0, 0, false); if (IS_ERR(req)) { err = PTR_ERR(req); goto out; @@ -1528,7 +1539,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) ceph_vino(inode), 0, &len, 1, 3, CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, - ci->i_snap_realm->cached_context, + ceph_empty_snapc, ci->i_truncate_seq, ci->i_truncate_size, false); if (IS_ERR(req)) { @@ -1597,3 +1608,206 @@ int ceph_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_ops = &ceph_vmops; return 0; } + +enum { + POOL_READ = 1, + POOL_WRITE = 2, +}; + +static int __ceph_pool_perm_get(struct ceph_inode_info *ci, u32 pool) +{ + struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode); + struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_osd_request *rd_req = NULL, *wr_req = NULL; + struct rb_node **p, *parent; + struct ceph_pool_perm *perm; + struct page **pages; + int err = 0, err2 = 0, have = 0; + + down_read(&mdsc->pool_perm_rwsem); + p = &mdsc->pool_perm_tree.rb_node; + while (*p) { + perm = rb_entry(*p, struct ceph_pool_perm, node); + if (pool < perm->pool) + p = &(*p)->rb_left; + else if (pool > perm->pool) + p = &(*p)->rb_right; + else { + have = perm->perm; + break; + } + } + up_read(&mdsc->pool_perm_rwsem); + if (*p) + goto out; + + dout("__ceph_pool_perm_get pool %u no perm cached\n", pool); + + down_write(&mdsc->pool_perm_rwsem); + parent = NULL; + while (*p) { + parent = *p; + perm = rb_entry(parent, struct ceph_pool_perm, node); + if (pool < perm->pool) + p = &(*p)->rb_left; + else if (pool > perm->pool) + p = &(*p)->rb_right; + else { + have = perm->perm; + break; + } + } + if (*p) { + up_write(&mdsc->pool_perm_rwsem); + goto out; + } + + rd_req = ceph_osdc_alloc_request(&fsc->client->osdc, + ceph_empty_snapc, + 1, false, GFP_NOFS); + if (!rd_req) { + err = -ENOMEM; + goto out_unlock; + } + + rd_req->r_flags = CEPH_OSD_FLAG_READ; + osd_req_op_init(rd_req, 0, CEPH_OSD_OP_STAT, 0); + rd_req->r_base_oloc.pool = pool; + snprintf(rd_req->r_base_oid.name, sizeof(rd_req->r_base_oid.name), + "%llx.00000000", ci->i_vino.ino); + rd_req->r_base_oid.name_len = strlen(rd_req->r_base_oid.name); + + wr_req = ceph_osdc_alloc_request(&fsc->client->osdc, + ceph_empty_snapc, + 1, false, GFP_NOFS); + if (!wr_req) { + err = -ENOMEM; + goto out_unlock; + } + + wr_req->r_flags = CEPH_OSD_FLAG_WRITE | + CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK; + osd_req_op_init(wr_req, 0, CEPH_OSD_OP_CREATE, CEPH_OSD_OP_FLAG_EXCL); + wr_req->r_base_oloc.pool = pool; + wr_req->r_base_oid = rd_req->r_base_oid; + + /* one page should be large enough for STAT data */ + pages = ceph_alloc_page_vector(1, GFP_KERNEL); + if (IS_ERR(pages)) { + err = PTR_ERR(pages); + goto out_unlock; + } + + osd_req_op_raw_data_in_pages(rd_req, 0, pages, PAGE_SIZE, + 0, false, true); + ceph_osdc_build_request(rd_req, 0, NULL, CEPH_NOSNAP, + &ci->vfs_inode.i_mtime); + err = ceph_osdc_start_request(&fsc->client->osdc, rd_req, false); + + ceph_osdc_build_request(wr_req, 0, NULL, CEPH_NOSNAP, + &ci->vfs_inode.i_mtime); + err2 = ceph_osdc_start_request(&fsc->client->osdc, wr_req, false); + + if (!err) + err = ceph_osdc_wait_request(&fsc->client->osdc, rd_req); + if (!err2) + err2 = ceph_osdc_wait_request(&fsc->client->osdc, wr_req); + + if (err >= 0 || err == -ENOENT) + have |= POOL_READ; + else if (err != -EPERM) + goto out_unlock; + + if (err2 == 0 || err2 == -EEXIST) + have |= POOL_WRITE; + else if (err2 != -EPERM) { + err = err2; + goto out_unlock; + } + + perm = kmalloc(sizeof(*perm), GFP_NOFS); + if (!perm) { + err = -ENOMEM; + goto out_unlock; + } + + perm->pool = pool; + perm->perm = have; + rb_link_node(&perm->node, parent, p); + rb_insert_color(&perm->node, &mdsc->pool_perm_tree); + err = 0; +out_unlock: + up_write(&mdsc->pool_perm_rwsem); + + if (rd_req) + ceph_osdc_put_request(rd_req); + if (wr_req) + ceph_osdc_put_request(wr_req); +out: + if (!err) + err = have; + dout("__ceph_pool_perm_get pool %u result = %d\n", pool, err); + return err; +} + +int ceph_pool_perm_check(struct ceph_inode_info *ci, int need) +{ + u32 pool; + int ret, flags; + + if (ceph_test_mount_opt(ceph_inode_to_client(&ci->vfs_inode), + NOPOOLPERM)) + return 0; + + spin_lock(&ci->i_ceph_lock); + flags = ci->i_ceph_flags; + pool = ceph_file_layout_pg_pool(ci->i_layout); + spin_unlock(&ci->i_ceph_lock); +check: + if (flags & CEPH_I_POOL_PERM) { + if ((need & CEPH_CAP_FILE_RD) && !(flags & CEPH_I_POOL_RD)) { + dout("ceph_pool_perm_check pool %u no read perm\n", + pool); + return -EPERM; + } + if ((need & CEPH_CAP_FILE_WR) && !(flags & CEPH_I_POOL_WR)) { + dout("ceph_pool_perm_check pool %u no write perm\n", + pool); + return -EPERM; + } + return 0; + } + + ret = __ceph_pool_perm_get(ci, pool); + if (ret < 0) + return ret; + + flags = CEPH_I_POOL_PERM; + if (ret & POOL_READ) + flags |= CEPH_I_POOL_RD; + if (ret & POOL_WRITE) + flags |= CEPH_I_POOL_WR; + + spin_lock(&ci->i_ceph_lock); + if (pool == ceph_file_layout_pg_pool(ci->i_layout)) { + ci->i_ceph_flags = flags; + } else { + pool = ceph_file_layout_pg_pool(ci->i_layout); + flags = ci->i_ceph_flags; + } + spin_unlock(&ci->i_ceph_lock); + goto check; +} + +void ceph_pool_perm_destroy(struct ceph_mds_client *mdsc) +{ + struct ceph_pool_perm *perm; + struct rb_node *n; + + while (!RB_EMPTY_ROOT(&mdsc->pool_perm_tree)) { + n = rb_first(&mdsc->pool_perm_tree); + perm = rb_entry(n, struct ceph_pool_perm, node); + rb_erase(n, &mdsc->pool_perm_tree); + kfree(perm); + } +} diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index be5ea6af8366..dc10c9dd36c1 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -833,7 +833,9 @@ int __ceph_caps_used(struct ceph_inode_info *ci) used |= CEPH_CAP_PIN; if (ci->i_rd_ref) used |= CEPH_CAP_FILE_RD; - if (ci->i_rdcache_ref || ci->vfs_inode.i_data.nrpages) + if (ci->i_rdcache_ref || + (!S_ISDIR(ci->vfs_inode.i_mode) && /* ignore readdir cache */ + ci->vfs_inode.i_data.nrpages)) used |= CEPH_CAP_FILE_CACHE; if (ci->i_wr_ref) used |= CEPH_CAP_FILE_WR; @@ -926,16 +928,6 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) /* remove from session list */ spin_lock(&session->s_cap_lock); - /* - * s_cap_reconnect is protected by s_cap_lock. no one changes - * s_cap_gen while session is in the reconnect state. - */ - if (queue_release && - (!session->s_cap_reconnect || - cap->cap_gen == session->s_cap_gen)) - __queue_cap_release(session, ci->i_vino.ino, cap->cap_id, - cap->mseq, cap->issue_seq); - if (session->s_cap_iterator == cap) { /* not yet, we are iterating over this very cap */ dout("__ceph_remove_cap delaying %p removal from session %p\n", @@ -948,6 +940,25 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) } /* protect backpointer with s_cap_lock: see iterate_session_caps */ cap->ci = NULL; + + /* + * s_cap_reconnect is protected by s_cap_lock. no one changes + * s_cap_gen while session is in the reconnect state. + */ + if (queue_release && + (!session->s_cap_reconnect || cap->cap_gen == session->s_cap_gen)) { + cap->queue_release = 1; + if (removed) { + list_add_tail(&cap->session_caps, + &session->s_cap_releases); + session->s_num_cap_releases++; + removed = 0; + } + } else { + cap->queue_release = 0; + } + cap->cap_ino = ci->i_vino.ino; + spin_unlock(&session->s_cap_lock); /* remove from inode list */ @@ -977,8 +988,8 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) static int send_cap_msg(struct ceph_mds_session *session, u64 ino, u64 cid, int op, int caps, int wanted, int dirty, - u32 seq, u64 flush_tid, u32 issue_seq, u32 mseq, - u64 size, u64 max_size, + u32 seq, u64 flush_tid, u64 oldest_flush_tid, + u32 issue_seq, u32 mseq, u64 size, u64 max_size, struct timespec *mtime, struct timespec *atime, u64 time_warp_seq, kuid_t uid, kgid_t gid, umode_t mode, @@ -992,20 +1003,23 @@ static int send_cap_msg(struct ceph_mds_session *session, size_t extra_len; dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s" - " seq %u/%u mseq %u follows %lld size %llu/%llu" + " seq %u/%u tid %llu/%llu mseq %u follows %lld size %llu/%llu" " xattr_ver %llu xattr_len %d\n", ceph_cap_op_name(op), cid, ino, ceph_cap_string(caps), ceph_cap_string(wanted), ceph_cap_string(dirty), - seq, issue_seq, mseq, follows, size, max_size, + seq, issue_seq, flush_tid, oldest_flush_tid, + mseq, follows, size, max_size, xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0); - /* flock buffer size + inline version + inline data size */ - extra_len = 4 + 8 + 4; + /* flock buffer size + inline version + inline data size + + * osd_epoch_barrier + oldest_flush_tid */ + extra_len = 4 + 8 + 4 + 4 + 8; msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc) + extra_len, GFP_NOFS, false); if (!msg) return -ENOMEM; + msg->hdr.version = cpu_to_le16(6); msg->hdr.tid = cpu_to_le64(flush_tid); fc = msg->front.iov_base; @@ -1041,6 +1055,10 @@ static int send_cap_msg(struct ceph_mds_session *session, ceph_encode_64(&p, inline_data ? 0 : CEPH_INLINE_NONE); /* inline data size */ ceph_encode_32(&p, 0); + /* osd_epoch_barrier */ + ceph_encode_32(&p, 0); + /* oldest_flush_tid */ + ceph_encode_64(&p, oldest_flush_tid); fc->xattr_version = cpu_to_le64(xattr_version); if (xattrs_buf) { @@ -1053,44 +1071,6 @@ static int send_cap_msg(struct ceph_mds_session *session, return 0; } -void __queue_cap_release(struct ceph_mds_session *session, - u64 ino, u64 cap_id, u32 migrate_seq, - u32 issue_seq) -{ - struct ceph_msg *msg; - struct ceph_mds_cap_release *head; - struct ceph_mds_cap_item *item; - - BUG_ON(!session->s_num_cap_releases); - msg = list_first_entry(&session->s_cap_releases, - struct ceph_msg, list_head); - - dout(" adding %llx release to mds%d msg %p (%d left)\n", - ino, session->s_mds, msg, session->s_num_cap_releases); - - BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE); - head = msg->front.iov_base; - le32_add_cpu(&head->num, 1); - item = msg->front.iov_base + msg->front.iov_len; - item->ino = cpu_to_le64(ino); - item->cap_id = cpu_to_le64(cap_id); - item->migrate_seq = cpu_to_le32(migrate_seq); - item->seq = cpu_to_le32(issue_seq); - - session->s_num_cap_releases--; - - msg->front.iov_len += sizeof(*item); - if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) { - dout(" release msg %p full\n", msg); - list_move_tail(&msg->list_head, &session->s_cap_releases_done); - } else { - dout(" release msg %p at %d/%d (%d)\n", msg, - (int)le32_to_cpu(head->num), - (int)CEPH_CAPS_PER_RELEASE, - (int)msg->front.iov_len); - } -} - /* * Queue cap releases when an inode is dropped from our cache. Since * inode is about to be destroyed, there is no need for i_ceph_lock. @@ -1127,7 +1107,7 @@ void ceph_queue_caps_release(struct inode *inode) */ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, int op, int used, int want, int retain, int flushing, - unsigned *pflush_tid) + u64 flush_tid, u64 oldest_flush_tid) __releases(cap->ci->i_ceph_lock) { struct ceph_inode_info *ci = cap->ci; @@ -1145,8 +1125,6 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, u64 xattr_version = 0; struct ceph_buffer *xattr_blob = NULL; int delayed = 0; - u64 flush_tid = 0; - int i; int ret; bool inline_data; @@ -1190,26 +1168,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, cap->implemented &= cap->issued | used; cap->mds_wanted = want; - if (flushing) { - /* - * assign a tid for flush operations so we can avoid - * flush1 -> dirty1 -> flush2 -> flushack1 -> mark - * clean type races. track latest tid for every bit - * so we can handle flush AxFw, flush Fw, and have the - * first ack clean Ax. - */ - flush_tid = ++ci->i_cap_flush_last_tid; - if (pflush_tid) - *pflush_tid = flush_tid; - dout(" cap_flush_tid %d\n", (int)flush_tid); - for (i = 0; i < CEPH_CAP_BITS; i++) - if (flushing & (1 << i)) - ci->i_cap_flush_tid[i] = flush_tid; - - follows = ci->i_head_snapc->seq; - } else { - follows = 0; - } + follows = flushing ? ci->i_head_snapc->seq : 0; keep = cap->implemented; seq = cap->seq; @@ -1237,7 +1196,8 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, spin_unlock(&ci->i_ceph_lock); ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id, - op, keep, want, flushing, seq, flush_tid, issue_seq, mseq, + op, keep, want, flushing, seq, + flush_tid, oldest_flush_tid, issue_seq, mseq, size, max_size, &mtime, &atime, time_warp_seq, uid, gid, mode, xattr_version, xattr_blob, follows, inline_data); @@ -1259,14 +1219,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, * asynchronously back to the MDS once sync writes complete and dirty * data is written out. * - * Unless @again is true, skip cap_snaps that were already sent to + * Unless @kick is true, skip cap_snaps that were already sent to * the MDS (i.e., during this session). * * Called under i_ceph_lock. Takes s_mutex as needed. */ void __ceph_flush_snaps(struct ceph_inode_info *ci, struct ceph_mds_session **psession, - int again) + int kick) __releases(ci->i_ceph_lock) __acquires(ci->i_ceph_lock) { @@ -1297,11 +1257,8 @@ retry: if (capsnap->dirty_pages || capsnap->writing) break; - /* - * if cap writeback already occurred, we should have dropped - * the capsnap in ceph_put_wrbuffer_cap_refs. - */ - BUG_ON(capsnap->dirty == 0); + /* should be removed by ceph_try_drop_cap_snap() */ + BUG_ON(!capsnap->need_flush); /* pick mds, take s_mutex */ if (ci->i_auth_cap == NULL) { @@ -1310,7 +1267,7 @@ retry: } /* only flush each capsnap once */ - if (!again && !list_empty(&capsnap->flushing_item)) { + if (!kick && !list_empty(&capsnap->flushing_item)) { dout("already flushed %p, skipping\n", capsnap); continue; } @@ -1320,6 +1277,9 @@ retry: if (session && session->s_mds != mds) { dout("oops, wrong session %p mutex\n", session); + if (kick) + goto out; + mutex_unlock(&session->s_mutex); ceph_put_mds_session(session); session = NULL; @@ -1343,20 +1303,22 @@ retry: goto retry; } - capsnap->flush_tid = ++ci->i_cap_flush_last_tid; + spin_lock(&mdsc->cap_dirty_lock); + capsnap->flush_tid = ++mdsc->last_cap_flush_tid; + spin_unlock(&mdsc->cap_dirty_lock); + atomic_inc(&capsnap->nref); - if (!list_empty(&capsnap->flushing_item)) - list_del_init(&capsnap->flushing_item); - list_add_tail(&capsnap->flushing_item, - &session->s_cap_snaps_flushing); + if (list_empty(&capsnap->flushing_item)) + list_add_tail(&capsnap->flushing_item, + &session->s_cap_snaps_flushing); spin_unlock(&ci->i_ceph_lock); dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n", inode, capsnap, capsnap->follows, capsnap->flush_tid); send_cap_msg(session, ceph_vino(inode).ino, 0, CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, - capsnap->dirty, 0, capsnap->flush_tid, 0, mseq, - capsnap->size, 0, + capsnap->dirty, 0, capsnap->flush_tid, 0, + 0, mseq, capsnap->size, 0, &capsnap->mtime, &capsnap->atime, capsnap->time_warp_seq, capsnap->uid, capsnap->gid, capsnap->mode, @@ -1396,7 +1358,8 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci) * Caller is then responsible for calling __mark_inode_dirty with the * returned flags value. */ -int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) +int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask, + struct ceph_cap_flush **pcf) { struct ceph_mds_client *mdsc = ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; @@ -1416,9 +1379,14 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) ceph_cap_string(was | mask)); ci->i_dirty_caps |= mask; if (was == 0) { - if (!ci->i_head_snapc) + WARN_ON_ONCE(ci->i_prealloc_cap_flush); + swap(ci->i_prealloc_cap_flush, *pcf); + + if (!ci->i_head_snapc) { + WARN_ON_ONCE(!rwsem_is_locked(&mdsc->snap_rwsem)); ci->i_head_snapc = ceph_get_snap_context( ci->i_snap_realm->cached_context); + } dout(" inode %p now dirty snapc %p auth cap %p\n", &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap); BUG_ON(!list_empty(&ci->i_dirty_item)); @@ -1429,6 +1397,8 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) ihold(inode); dirty |= I_DIRTY_SYNC; } + } else { + WARN_ON_ONCE(!ci->i_prealloc_cap_flush); } BUG_ON(list_empty(&ci->i_dirty_item)); if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) && @@ -1438,6 +1408,74 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) return dirty; } +static void __add_cap_flushing_to_inode(struct ceph_inode_info *ci, + struct ceph_cap_flush *cf) +{ + struct rb_node **p = &ci->i_cap_flush_tree.rb_node; + struct rb_node *parent = NULL; + struct ceph_cap_flush *other = NULL; + + while (*p) { + parent = *p; + other = rb_entry(parent, struct ceph_cap_flush, i_node); + + if (cf->tid < other->tid) + p = &(*p)->rb_left; + else if (cf->tid > other->tid) + p = &(*p)->rb_right; + else + BUG(); + } + + rb_link_node(&cf->i_node, parent, p); + rb_insert_color(&cf->i_node, &ci->i_cap_flush_tree); +} + +static void __add_cap_flushing_to_mdsc(struct ceph_mds_client *mdsc, + struct ceph_cap_flush *cf) +{ + struct rb_node **p = &mdsc->cap_flush_tree.rb_node; + struct rb_node *parent = NULL; + struct ceph_cap_flush *other = NULL; + + while (*p) { + parent = *p; + other = rb_entry(parent, struct ceph_cap_flush, g_node); + + if (cf->tid < other->tid) + p = &(*p)->rb_left; + else if (cf->tid > other->tid) + p = &(*p)->rb_right; + else + BUG(); + } + + rb_link_node(&cf->g_node, parent, p); + rb_insert_color(&cf->g_node, &mdsc->cap_flush_tree); +} + +struct ceph_cap_flush *ceph_alloc_cap_flush(void) +{ + return kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL); +} + +void ceph_free_cap_flush(struct ceph_cap_flush *cf) +{ + if (cf) + kmem_cache_free(ceph_cap_flush_cachep, cf); +} + +static u64 __get_oldest_flush_tid(struct ceph_mds_client *mdsc) +{ + struct rb_node *n = rb_first(&mdsc->cap_flush_tree); + if (n) { + struct ceph_cap_flush *cf = + rb_entry(n, struct ceph_cap_flush, g_node); + return cf->tid; + } + return 0; +} + /* * Add dirty inode to the flushing list. Assigned a seq number so we * can wait for caps to flush without starving. @@ -1445,14 +1483,17 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) * Called under i_ceph_lock. */ static int __mark_caps_flushing(struct inode *inode, - struct ceph_mds_session *session) + struct ceph_mds_session *session, + u64 *flush_tid, u64 *oldest_flush_tid) { struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_cap_flush *cf = NULL; int flushing; BUG_ON(ci->i_dirty_caps == 0); BUG_ON(list_empty(&ci->i_dirty_item)); + BUG_ON(!ci->i_prealloc_cap_flush); flushing = ci->i_dirty_caps; dout("__mark_caps_flushing flushing %s, flushing_caps %s -> %s\n", @@ -1463,22 +1504,31 @@ static int __mark_caps_flushing(struct inode *inode, ci->i_dirty_caps = 0; dout(" inode %p now !dirty\n", inode); + swap(cf, ci->i_prealloc_cap_flush); + cf->caps = flushing; + cf->kick = false; + spin_lock(&mdsc->cap_dirty_lock); list_del_init(&ci->i_dirty_item); + cf->tid = ++mdsc->last_cap_flush_tid; + __add_cap_flushing_to_mdsc(mdsc, cf); + *oldest_flush_tid = __get_oldest_flush_tid(mdsc); + if (list_empty(&ci->i_flushing_item)) { - ci->i_cap_flush_seq = ++mdsc->cap_flush_seq; list_add_tail(&ci->i_flushing_item, &session->s_cap_flushing); mdsc->num_cap_flushing++; - dout(" inode %p now flushing seq %lld\n", inode, - ci->i_cap_flush_seq); + dout(" inode %p now flushing tid %llu\n", inode, cf->tid); } else { list_move_tail(&ci->i_flushing_item, &session->s_cap_flushing); - dout(" inode %p now flushing (more) seq %lld\n", inode, - ci->i_cap_flush_seq); + dout(" inode %p now flushing (more) tid %llu\n", + inode, cf->tid); } spin_unlock(&mdsc->cap_dirty_lock); + __add_cap_flushing_to_inode(ci, cf); + + *flush_tid = cf->tid; return flushing; } @@ -1524,6 +1574,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, struct ceph_mds_client *mdsc = fsc->mdsc; struct inode *inode = &ci->vfs_inode; struct ceph_cap *cap; + u64 flush_tid, oldest_flush_tid; int file_wanted, used, cap_used; int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */ int issued, implemented, want, retain, revoking, flushing = 0; @@ -1553,13 +1604,13 @@ retry: retry_locked: file_wanted = __ceph_caps_file_wanted(ci); used = __ceph_caps_used(ci); - want = file_wanted | used; issued = __ceph_caps_issued(ci, &implemented); revoking = implemented & ~issued; - retain = want | CEPH_CAP_PIN; + want = file_wanted; + retain = file_wanted | used | CEPH_CAP_PIN; if (!mdsc->stopping && inode->i_nlink > 0) { - if (want) { + if (file_wanted) { retain |= CEPH_CAP_ANY; /* be greedy */ } else if (S_ISDIR(inode->i_mode) && (issued & CEPH_CAP_FILE_SHARED) && @@ -1602,9 +1653,10 @@ retry_locked: * If we fail, it's because pages are locked.... try again later. */ if ((!is_delayed || mdsc->stopping) && - ci->i_wrbuffer_ref == 0 && /* no dirty pages... */ - inode->i_data.nrpages && /* have cached pages */ - (file_wanted == 0 || /* no open files */ + !S_ISDIR(inode->i_mode) && /* ignore readdir cache */ + ci->i_wrbuffer_ref == 0 && /* no dirty pages... */ + inode->i_data.nrpages && /* have cached pages */ + (file_wanted == 0 || /* no open files */ (revoking & (CEPH_CAP_FILE_CACHE| CEPH_CAP_FILE_LAZYIO))) && /* or revoking cache */ !tried_invalidate) { @@ -1742,17 +1794,25 @@ ack: took_snap_rwsem = 1; } - if (cap == ci->i_auth_cap && ci->i_dirty_caps) - flushing = __mark_caps_flushing(inode, session); - else + if (cap == ci->i_auth_cap && ci->i_dirty_caps) { + flushing = __mark_caps_flushing(inode, session, + &flush_tid, + &oldest_flush_tid); + } else { flushing = 0; + flush_tid = 0; + spin_lock(&mdsc->cap_dirty_lock); + oldest_flush_tid = __get_oldest_flush_tid(mdsc); + spin_unlock(&mdsc->cap_dirty_lock); + } mds = cap->mds; /* remember mds, so we don't repeat */ sent++; /* __send_cap drops i_ceph_lock */ delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, cap_used, - want, retain, flushing, NULL); + want, retain, flushing, + flush_tid, oldest_flush_tid); goto retry; /* retake i_ceph_lock and restart our cap scan. */ } @@ -1781,12 +1841,13 @@ ack: /* * Try to flush dirty caps back to the auth mds. */ -static int try_flush_caps(struct inode *inode, unsigned *flush_tid) +static int try_flush_caps(struct inode *inode, u64 *ptid) { struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); - int flushing = 0; struct ceph_mds_session *session = NULL; + int flushing = 0; + u64 flush_tid = 0, oldest_flush_tid = 0; retry: spin_lock(&ci->i_ceph_lock); @@ -1811,42 +1872,54 @@ retry: if (cap->session->s_state < CEPH_MDS_SESSION_OPEN) goto out; - flushing = __mark_caps_flushing(inode, session); + flushing = __mark_caps_flushing(inode, session, &flush_tid, + &oldest_flush_tid); /* __send_cap drops i_ceph_lock */ delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, used, want, - cap->issued | cap->implemented, flushing, - flush_tid); - if (!delayed) - goto out_unlocked; + (cap->issued | cap->implemented), + flushing, flush_tid, oldest_flush_tid); - spin_lock(&ci->i_ceph_lock); - __cap_delay_requeue(mdsc, ci); + if (delayed) { + spin_lock(&ci->i_ceph_lock); + __cap_delay_requeue(mdsc, ci); + spin_unlock(&ci->i_ceph_lock); + } + } else { + struct rb_node *n = rb_last(&ci->i_cap_flush_tree); + if (n) { + struct ceph_cap_flush *cf = + rb_entry(n, struct ceph_cap_flush, i_node); + flush_tid = cf->tid; + } + flushing = ci->i_flushing_caps; + spin_unlock(&ci->i_ceph_lock); } out: - spin_unlock(&ci->i_ceph_lock); -out_unlocked: if (session) mutex_unlock(&session->s_mutex); + + *ptid = flush_tid; return flushing; } /* * Return true if we've flushed caps through the given flush_tid. */ -static int caps_are_flushed(struct inode *inode, unsigned tid) +static int caps_are_flushed(struct inode *inode, u64 flush_tid) { struct ceph_inode_info *ci = ceph_inode(inode); - int i, ret = 1; + struct ceph_cap_flush *cf; + struct rb_node *n; + int ret = 1; spin_lock(&ci->i_ceph_lock); - for (i = 0; i < CEPH_CAP_BITS; i++) - if ((ci->i_flushing_caps & (1 << i)) && - ci->i_cap_flush_tid[i] <= tid) { - /* still flushing this bit */ + n = rb_first(&ci->i_cap_flush_tree); + if (n) { + cf = rb_entry(n, struct ceph_cap_flush, i_node); + if (cf->tid <= flush_tid) ret = 0; - break; - } + } spin_unlock(&ci->i_ceph_lock); return ret; } @@ -1864,13 +1937,16 @@ static void sync_write_wait(struct inode *inode) struct ceph_osd_request *req; u64 last_tid; + if (!S_ISREG(inode->i_mode)) + return; + spin_lock(&ci->i_unsafe_lock); if (list_empty(head)) goto out; /* set upper bound as _last_ entry in chain */ - req = list_entry(head->prev, struct ceph_osd_request, - r_unsafe_item); + req = list_last_entry(head, struct ceph_osd_request, + r_unsafe_item); last_tid = req->r_tid; do { @@ -1888,18 +1964,64 @@ static void sync_write_wait(struct inode *inode) */ if (list_empty(head)) break; - req = list_entry(head->next, struct ceph_osd_request, - r_unsafe_item); + req = list_first_entry(head, struct ceph_osd_request, + r_unsafe_item); } while (req->r_tid < last_tid); out: spin_unlock(&ci->i_unsafe_lock); } +/* + * wait for any uncommitted directory operations to commit. + */ +static int unsafe_dirop_wait(struct inode *inode) +{ + struct ceph_inode_info *ci = ceph_inode(inode); + struct list_head *head = &ci->i_unsafe_dirops; + struct ceph_mds_request *req; + u64 last_tid; + int ret = 0; + + if (!S_ISDIR(inode->i_mode)) + return 0; + + spin_lock(&ci->i_unsafe_lock); + if (list_empty(head)) + goto out; + + req = list_last_entry(head, struct ceph_mds_request, + r_unsafe_dir_item); + last_tid = req->r_tid; + + do { + ceph_mdsc_get_request(req); + spin_unlock(&ci->i_unsafe_lock); + + dout("unsafe_dirop_wait %p wait on tid %llu (until %llu)\n", + inode, req->r_tid, last_tid); + ret = !wait_for_completion_timeout(&req->r_safe_completion, + ceph_timeout_jiffies(req->r_timeout)); + if (ret) + ret = -EIO; /* timed out */ + + ceph_mdsc_put_request(req); + + spin_lock(&ci->i_unsafe_lock); + if (ret || list_empty(head)) + break; + req = list_first_entry(head, struct ceph_mds_request, + r_unsafe_dir_item); + } while (req->r_tid < last_tid); +out: + spin_unlock(&ci->i_unsafe_lock); + return ret; +} + int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; struct ceph_inode_info *ci = ceph_inode(inode); - unsigned flush_tid; + u64 flush_tid; int ret; int dirty; @@ -1908,25 +2030,30 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) ret = filemap_write_and_wait_range(inode->i_mapping, start, end); if (ret < 0) - return ret; + goto out; + + if (datasync) + goto out; + mutex_lock(&inode->i_mutex); dirty = try_flush_caps(inode, &flush_tid); dout("fsync dirty caps are %s\n", ceph_cap_string(dirty)); + ret = unsafe_dirop_wait(inode); + /* * only wait on non-file metadata writeback (the mds * can recover size and mtime, so we don't need to * wait for that) */ - if (!datasync && (dirty & ~CEPH_CAP_ANY_FILE_WR)) { - dout("fsync waiting for flush_tid %u\n", flush_tid); + if (!ret && (dirty & ~CEPH_CAP_ANY_FILE_WR)) { ret = wait_event_interruptible(ci->i_cap_wq, - caps_are_flushed(inode, flush_tid)); + caps_are_flushed(inode, flush_tid)); } - - dout("fsync %p%s done\n", inode, datasync ? " datasync" : ""); mutex_unlock(&inode->i_mutex); +out: + dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret); return ret; } @@ -1939,7 +2066,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) int ceph_write_inode(struct inode *inode, struct writeback_control *wbc) { struct ceph_inode_info *ci = ceph_inode(inode); - unsigned flush_tid; + u64 flush_tid; int err = 0; int dirty; int wait = wbc->sync_mode == WB_SYNC_ALL; @@ -1994,6 +2121,104 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc, } } +static int __kick_flushing_caps(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session, + struct ceph_inode_info *ci, + bool kick_all) +{ + struct inode *inode = &ci->vfs_inode; + struct ceph_cap *cap; + struct ceph_cap_flush *cf; + struct rb_node *n; + int delayed = 0; + u64 first_tid = 0; + u64 oldest_flush_tid; + + spin_lock(&mdsc->cap_dirty_lock); + oldest_flush_tid = __get_oldest_flush_tid(mdsc); + spin_unlock(&mdsc->cap_dirty_lock); + + while (true) { + spin_lock(&ci->i_ceph_lock); + cap = ci->i_auth_cap; + if (!(cap && cap->session == session)) { + pr_err("%p auth cap %p not mds%d ???\n", inode, + cap, session->s_mds); + spin_unlock(&ci->i_ceph_lock); + break; + } + + for (n = rb_first(&ci->i_cap_flush_tree); n; n = rb_next(n)) { + cf = rb_entry(n, struct ceph_cap_flush, i_node); + if (cf->tid < first_tid) + continue; + if (kick_all || cf->kick) + break; + } + if (!n) { + spin_unlock(&ci->i_ceph_lock); + break; + } + + cf = rb_entry(n, struct ceph_cap_flush, i_node); + cf->kick = false; + + first_tid = cf->tid + 1; + + dout("kick_flushing_caps %p cap %p tid %llu %s\n", inode, + cap, cf->tid, ceph_cap_string(cf->caps)); + delayed |= __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, + __ceph_caps_used(ci), + __ceph_caps_wanted(ci), + cap->issued | cap->implemented, + cf->caps, cf->tid, oldest_flush_tid); + } + return delayed; +} + +void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session) +{ + struct ceph_inode_info *ci; + struct ceph_cap *cap; + struct ceph_cap_flush *cf; + struct rb_node *n; + + dout("early_kick_flushing_caps mds%d\n", session->s_mds); + list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) { + spin_lock(&ci->i_ceph_lock); + cap = ci->i_auth_cap; + if (!(cap && cap->session == session)) { + pr_err("%p auth cap %p not mds%d ???\n", + &ci->vfs_inode, cap, session->s_mds); + spin_unlock(&ci->i_ceph_lock); + continue; + } + + + /* + * if flushing caps were revoked, we re-send the cap flush + * in client reconnect stage. This guarantees MDS * processes + * the cap flush message before issuing the flushing caps to + * other client. + */ + if ((cap->issued & ci->i_flushing_caps) != + ci->i_flushing_caps) { + spin_unlock(&ci->i_ceph_lock); + if (!__kick_flushing_caps(mdsc, session, ci, true)) + continue; + spin_lock(&ci->i_ceph_lock); + } + + for (n = rb_first(&ci->i_cap_flush_tree); n; n = rb_next(n)) { + cf = rb_entry(n, struct ceph_cap_flush, i_node); + cf->kick = true; + } + + spin_unlock(&ci->i_ceph_lock); + } +} + void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, struct ceph_mds_session *session) { @@ -2003,28 +2228,10 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, dout("kick_flushing_caps mds%d\n", session->s_mds); list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) { - struct inode *inode = &ci->vfs_inode; - struct ceph_cap *cap; - int delayed = 0; - - spin_lock(&ci->i_ceph_lock); - cap = ci->i_auth_cap; - if (cap && cap->session == session) { - dout("kick_flushing_caps %p cap %p %s\n", inode, - cap, ceph_cap_string(ci->i_flushing_caps)); - delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, - __ceph_caps_used(ci), - __ceph_caps_wanted(ci), - cap->issued | cap->implemented, - ci->i_flushing_caps, NULL); - if (delayed) { - spin_lock(&ci->i_ceph_lock); - __cap_delay_requeue(mdsc, ci); - spin_unlock(&ci->i_ceph_lock); - } - } else { - pr_err("%p auth cap %p not mds%d ???\n", inode, - cap, session->s_mds); + int delayed = __kick_flushing_caps(mdsc, session, ci, false); + if (delayed) { + spin_lock(&ci->i_ceph_lock); + __cap_delay_requeue(mdsc, ci); spin_unlock(&ci->i_ceph_lock); } } @@ -2036,26 +2243,25 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc, { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_cap *cap; - int delayed = 0; spin_lock(&ci->i_ceph_lock); cap = ci->i_auth_cap; - dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode, - ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq); + dout("kick_flushing_inode_caps %p flushing %s\n", inode, + ceph_cap_string(ci->i_flushing_caps)); __ceph_flush_snaps(ci, &session, 1); if (ci->i_flushing_caps) { + int delayed; + spin_lock(&mdsc->cap_dirty_lock); list_move_tail(&ci->i_flushing_item, &cap->session->s_cap_flushing); spin_unlock(&mdsc->cap_dirty_lock); - delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, - __ceph_caps_used(ci), - __ceph_caps_wanted(ci), - cap->issued | cap->implemented, - ci->i_flushing_caps, NULL); + spin_unlock(&ci->i_ceph_lock); + + delayed = __kick_flushing_caps(mdsc, session, ci, true); if (delayed) { spin_lock(&ci->i_ceph_lock); __cap_delay_requeue(mdsc, ci); @@ -2073,7 +2279,8 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc, * * Protected by i_ceph_lock. */ -static void __take_cap_refs(struct ceph_inode_info *ci, int got) +static void __take_cap_refs(struct ceph_inode_info *ci, int got, + bool snap_rwsem_locked) { if (got & CEPH_CAP_PIN) ci->i_pin_ref++; @@ -2081,8 +2288,14 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got) ci->i_rd_ref++; if (got & CEPH_CAP_FILE_CACHE) ci->i_rdcache_ref++; - if (got & CEPH_CAP_FILE_WR) + if (got & CEPH_CAP_FILE_WR) { + if (ci->i_wr_ref == 0 && !ci->i_head_snapc) { + BUG_ON(!snap_rwsem_locked); + ci->i_head_snapc = ceph_get_snap_context( + ci->i_snap_realm->cached_context); + } ci->i_wr_ref++; + } if (got & CEPH_CAP_FILE_BUFFER) { if (ci->i_wb_ref == 0) ihold(&ci->vfs_inode); @@ -2100,16 +2313,19 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got) * requested from the MDS. */ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, - loff_t endoff, int *got, int *check_max, int *err) + loff_t endoff, bool nonblock, int *got, int *err) { struct inode *inode = &ci->vfs_inode; + struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; int ret = 0; int have, implemented; int file_wanted; + bool snap_rwsem_locked = false; dout("get_cap_refs %p need %s want %s\n", inode, ceph_cap_string(need), ceph_cap_string(want)); +again: spin_lock(&ci->i_ceph_lock); /* make sure file is actually open */ @@ -2125,6 +2341,10 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, /* finish pending truncate */ while (ci->i_truncate_pending) { spin_unlock(&ci->i_ceph_lock); + if (snap_rwsem_locked) { + up_read(&mdsc->snap_rwsem); + snap_rwsem_locked = false; + } __ceph_do_pending_vmtruncate(inode); spin_lock(&ci->i_ceph_lock); } @@ -2136,7 +2356,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, dout("get_cap_refs %p endoff %llu > maxsize %llu\n", inode, endoff, ci->i_max_size); if (endoff > ci->i_requested_max_size) { - *check_max = 1; + *err = -EAGAIN; ret = 1; } goto out_unlock; @@ -2164,8 +2384,29 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, inode, ceph_cap_string(have), ceph_cap_string(not), ceph_cap_string(revoking)); if ((revoking & not) == 0) { + if (!snap_rwsem_locked && + !ci->i_head_snapc && + (need & CEPH_CAP_FILE_WR)) { + if (!down_read_trylock(&mdsc->snap_rwsem)) { + /* + * we can not call down_read() when + * task isn't in TASK_RUNNING state + */ + if (nonblock) { + *err = -EAGAIN; + ret = 1; + goto out_unlock; + } + + spin_unlock(&ci->i_ceph_lock); + down_read(&mdsc->snap_rwsem); + snap_rwsem_locked = true; + goto again; + } + snap_rwsem_locked = true; + } *got = need | (have & want); - __take_cap_refs(ci, *got); + __take_cap_refs(ci, *got, true); ret = 1; } } else { @@ -2189,6 +2430,8 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, } out_unlock: spin_unlock(&ci->i_ceph_lock); + if (snap_rwsem_locked) + up_read(&mdsc->snap_rwsem); dout("get_cap_refs %p ret %d got %s\n", inode, ret, ceph_cap_string(*got)); @@ -2231,50 +2474,70 @@ static void check_max_size(struct inode *inode, loff_t endoff) int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, loff_t endoff, int *got, struct page **pinned_page) { - int _got, check_max, ret, err = 0; + int _got, ret, err = 0; -retry: - if (endoff > 0) - check_max_size(&ci->vfs_inode, endoff); - _got = 0; - check_max = 0; - ret = wait_event_interruptible(ci->i_cap_wq, - try_get_cap_refs(ci, need, want, endoff, - &_got, &check_max, &err)); - if (err) - ret = err; + ret = ceph_pool_perm_check(ci, need); if (ret < 0) return ret; - if (check_max) - goto retry; + while (true) { + if (endoff > 0) + check_max_size(&ci->vfs_inode, endoff); - if (ci->i_inline_version != CEPH_INLINE_NONE && - (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) && - i_size_read(&ci->vfs_inode) > 0) { - struct page *page = find_get_page(ci->vfs_inode.i_mapping, 0); - if (page) { - if (PageUptodate(page)) { - *pinned_page = page; - goto out; - } - page_cache_release(page); - } - /* - * drop cap refs first because getattr while holding - * caps refs can cause deadlock. - */ - ceph_put_cap_refs(ci, _got); + err = 0; _got = 0; + ret = try_get_cap_refs(ci, need, want, endoff, + false, &_got, &err); + if (ret) { + if (err == -EAGAIN) + continue; + if (err < 0) + return err; + } else { + ret = wait_event_interruptible(ci->i_cap_wq, + try_get_cap_refs(ci, need, want, endoff, + true, &_got, &err)); + if (err == -EAGAIN) + continue; + if (err < 0) + ret = err; + if (ret < 0) + return ret; + } - /* getattr request will bring inline data into page cache */ - ret = __ceph_do_getattr(&ci->vfs_inode, NULL, - CEPH_STAT_CAP_INLINE_DATA, true); - if (ret < 0) - return ret; - goto retry; + if (ci->i_inline_version != CEPH_INLINE_NONE && + (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) && + i_size_read(&ci->vfs_inode) > 0) { + struct page *page = + find_get_page(ci->vfs_inode.i_mapping, 0); + if (page) { + if (PageUptodate(page)) { + *pinned_page = page; + break; + } + page_cache_release(page); + } + /* + * drop cap refs first because getattr while + * holding * caps refs can cause deadlock. + */ + ceph_put_cap_refs(ci, _got); + _got = 0; + + /* + * getattr request will bring inline data into + * page cache + */ + ret = __ceph_do_getattr(&ci->vfs_inode, NULL, + CEPH_STAT_CAP_INLINE_DATA, + true); + if (ret < 0) + return ret; + continue; + } + break; } -out: + *got = _got; return 0; } @@ -2286,10 +2549,31 @@ out: void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps) { spin_lock(&ci->i_ceph_lock); - __take_cap_refs(ci, caps); + __take_cap_refs(ci, caps, false); spin_unlock(&ci->i_ceph_lock); } + +/* + * drop cap_snap that is not associated with any snapshot. + * we don't need to send FLUSHSNAP message for it. + */ +static int ceph_try_drop_cap_snap(struct ceph_cap_snap *capsnap) +{ + if (!capsnap->need_flush && + !capsnap->writing && !capsnap->dirty_pages) { + + dout("dropping cap_snap %p follows %llu\n", + capsnap, capsnap->follows); + ceph_put_snap_context(capsnap->context); + list_del(&capsnap->ci_item); + list_del(&capsnap->flushing_item); + ceph_put_cap_snap(capsnap); + return 1; + } + return 0; +} + /* * Release cap refs. * @@ -2303,7 +2587,6 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had) { struct inode *inode = &ci->vfs_inode; int last = 0, put = 0, flushsnaps = 0, wake = 0; - struct ceph_cap_snap *capsnap; spin_lock(&ci->i_ceph_lock); if (had & CEPH_CAP_PIN) @@ -2325,17 +2608,24 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had) if (had & CEPH_CAP_FILE_WR) if (--ci->i_wr_ref == 0) { last++; - if (!list_empty(&ci->i_cap_snaps)) { - capsnap = list_first_entry(&ci->i_cap_snaps, - struct ceph_cap_snap, - ci_item); - if (capsnap->writing) { - capsnap->writing = 0; - flushsnaps = - __ceph_finish_cap_snap(ci, - capsnap); - wake = 1; - } + if (__ceph_have_pending_cap_snap(ci)) { + struct ceph_cap_snap *capsnap = + list_last_entry(&ci->i_cap_snaps, + struct ceph_cap_snap, + ci_item); + capsnap->writing = 0; + if (ceph_try_drop_cap_snap(capsnap)) + put++; + else if (__ceph_finish_cap_snap(ci, capsnap)) + flushsnaps = 1; + wake = 1; + } + if (ci->i_wrbuffer_ref_head == 0 && + ci->i_dirty_caps == 0 && + ci->i_flushing_caps == 0) { + BUG_ON(!ci->i_head_snapc); + ceph_put_snap_context(ci->i_head_snapc); + ci->i_head_snapc = NULL; } /* see comment in __ceph_remove_cap() */ if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) @@ -2352,7 +2642,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had) ceph_flush_snaps(ci); if (wake) wake_up_all(&ci->i_cap_wq); - if (put) + while (put-- > 0) iput(inode); } @@ -2380,7 +2670,9 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, if (ci->i_head_snapc == snapc) { ci->i_wrbuffer_ref_head -= nr; if (ci->i_wrbuffer_ref_head == 0 && - ci->i_dirty_caps == 0 && ci->i_flushing_caps == 0) { + ci->i_wr_ref == 0 && + ci->i_dirty_caps == 0 && + ci->i_flushing_caps == 0) { BUG_ON(!ci->i_head_snapc); ceph_put_snap_context(ci->i_head_snapc); ci->i_head_snapc = NULL; @@ -2401,25 +2693,15 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, capsnap->dirty_pages -= nr; if (capsnap->dirty_pages == 0) { complete_capsnap = 1; - if (capsnap->dirty == 0) - /* cap writeback completed before we created - * the cap_snap; no FLUSHSNAP is needed */ - drop_capsnap = 1; + drop_capsnap = ceph_try_drop_cap_snap(capsnap); } dout("put_wrbuffer_cap_refs on %p cap_snap %p " - " snap %lld %d/%d -> %d/%d %s%s%s\n", + " snap %lld %d/%d -> %d/%d %s%s\n", inode, capsnap, capsnap->context->seq, ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr, ci->i_wrbuffer_ref, capsnap->dirty_pages, last ? " (wrbuffer last)" : "", - complete_capsnap ? " (complete capsnap)" : "", - drop_capsnap ? " (drop capsnap)" : ""); - if (drop_capsnap) { - ceph_put_snap_context(capsnap->context); - list_del(&capsnap->ci_item); - list_del(&capsnap->flushing_item); - ceph_put_cap_snap(capsnap); - } + complete_capsnap ? " (complete capsnap)" : ""); } spin_unlock(&ci->i_ceph_lock); @@ -2526,7 +2808,8 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, * try to invalidate (once). (If there are dirty buffers, we * will invalidate _after_ writeback.) */ - if (((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) && + if (!S_ISDIR(inode->i_mode) && /* don't invalidate readdir cache */ + ((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) && (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 && !ci->i_wrbuffer_ref) { if (try_nonblocking_invalidate(inode)) { @@ -2732,16 +3015,29 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_cap_flush *cf; + struct rb_node *n; + LIST_HEAD(to_remove); unsigned seq = le32_to_cpu(m->seq); int dirty = le32_to_cpu(m->dirty); int cleaned = 0; int drop = 0; - int i; - for (i = 0; i < CEPH_CAP_BITS; i++) - if ((dirty & (1 << i)) && - (u16)flush_tid == ci->i_cap_flush_tid[i]) - cleaned |= 1 << i; + n = rb_first(&ci->i_cap_flush_tree); + while (n) { + cf = rb_entry(n, struct ceph_cap_flush, i_node); + n = rb_next(&cf->i_node); + if (cf->tid == flush_tid) + cleaned = cf->caps; + if (cf->tid <= flush_tid) { + rb_erase(&cf->i_node, &ci->i_cap_flush_tree); + list_add_tail(&cf->list, &to_remove); + } else { + cleaned &= ~cf->caps; + if (!cleaned) + break; + } + } dout("handle_cap_flush_ack inode %p mds%d seq %d on %s cleaned %s," " flushing %s -> %s\n", @@ -2749,12 +3045,23 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, ceph_cap_string(cleaned), ceph_cap_string(ci->i_flushing_caps), ceph_cap_string(ci->i_flushing_caps & ~cleaned)); - if (ci->i_flushing_caps == (ci->i_flushing_caps & ~cleaned)) + if (list_empty(&to_remove) && !cleaned) goto out; ci->i_flushing_caps &= ~cleaned; spin_lock(&mdsc->cap_dirty_lock); + + if (!list_empty(&to_remove)) { + list_for_each_entry(cf, &to_remove, list) + rb_erase(&cf->g_node, &mdsc->cap_flush_tree); + + n = rb_first(&mdsc->cap_flush_tree); + cf = n ? rb_entry(n, struct ceph_cap_flush, g_node) : NULL; + if (!cf || cf->tid > flush_tid) + wake_up_all(&mdsc->cap_flushing_wq); + } + if (ci->i_flushing_caps == 0) { list_del_init(&ci->i_flushing_item); if (!list_empty(&session->s_cap_flushing)) @@ -2764,14 +3071,14 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, struct ceph_inode_info, i_flushing_item)->vfs_inode); mdsc->num_cap_flushing--; - wake_up_all(&mdsc->cap_flushing_wq); dout(" inode %p now !flushing\n", inode); if (ci->i_dirty_caps == 0) { dout(" inode %p now clean\n", inode); BUG_ON(!list_empty(&ci->i_dirty_item)); drop = 1; - if (ci->i_wrbuffer_ref_head == 0) { + if (ci->i_wr_ref == 0 && + ci->i_wrbuffer_ref_head == 0) { BUG_ON(!ci->i_head_snapc); ceph_put_snap_context(ci->i_head_snapc); ci->i_head_snapc = NULL; @@ -2785,6 +3092,13 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, out: spin_unlock(&ci->i_ceph_lock); + + while (!list_empty(&to_remove)) { + cf = list_first_entry(&to_remove, + struct ceph_cap_flush, list); + list_del(&cf->list); + ceph_free_cap_flush(cf); + } if (drop) iput(inode); } @@ -2800,6 +3114,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid, struct ceph_mds_session *session) { struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; u64 follows = le64_to_cpu(m->snap_follows); struct ceph_cap_snap *capsnap; int drop = 0; @@ -2823,6 +3138,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid, list_del(&capsnap->ci_item); list_del(&capsnap->flushing_item); ceph_put_cap_snap(capsnap); + wake_up_all(&mdsc->cap_flushing_wq); drop = 1; break; } else { @@ -2971,7 +3287,6 @@ retry: mutex_lock_nested(&session->s_mutex, SINGLE_DEPTH_NESTING); } - ceph_add_cap_releases(mdsc, tsession); new_cap = ceph_get_cap(mdsc, NULL); } else { WARN_ON(1); @@ -3167,16 +3482,20 @@ void ceph_handle_caps(struct ceph_mds_session *session, dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq, (unsigned)seq); - if (op == CEPH_CAP_OP_IMPORT) - ceph_add_cap_releases(mdsc, session); - if (!inode) { dout(" i don't have ino %llx\n", vino.ino); if (op == CEPH_CAP_OP_IMPORT) { + cap = ceph_get_cap(mdsc, NULL); + cap->cap_ino = vino.ino; + cap->queue_release = 1; + cap->cap_id = cap_id; + cap->mseq = mseq; + cap->seq = seq; spin_lock(&session->s_cap_lock); - __queue_cap_release(session, vino.ino, cap_id, - mseq, seq); + list_add_tail(&cap->session_caps, + &session->s_cap_releases); + session->s_num_cap_releases++; spin_unlock(&session->s_cap_lock); } goto flush_cap_releases; @@ -3252,11 +3571,10 @@ void ceph_handle_caps(struct ceph_mds_session *session, flush_cap_releases: /* - * send any full release message to try to move things + * send any cap release message to try to move things * along for the mds (who clearly thinks we still have this * cap). */ - ceph_add_cap_releases(mdsc, session); ceph_send_cap_releases(mdsc, session); done: diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 4248307fea90..9314b4ea2375 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -38,7 +38,7 @@ int ceph_init_dentry(struct dentry *dentry) if (dentry->d_fsdata) return 0; - di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO); + di = kmem_cache_alloc(ceph_dentry_cachep, GFP_KERNEL | __GFP_ZERO); if (!di) return -ENOMEM; /* oh well */ @@ -107,6 +107,27 @@ static int fpos_cmp(loff_t l, loff_t r) } /* + * make note of the last dentry we read, so we can + * continue at the same lexicographical point, + * regardless of what dir changes take place on the + * server. + */ +static int note_last_dentry(struct ceph_file_info *fi, const char *name, + int len, unsigned next_offset) +{ + char *buf = kmalloc(len+1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + kfree(fi->last_name); + fi->last_name = buf; + memcpy(fi->last_name, name, len); + fi->last_name[len] = 0; + fi->next_offset = next_offset; + dout("note_last_dentry '%s'\n", fi->last_name); + return 0; +} + +/* * When possible, we try to satisfy a readdir by peeking at the * dcache. We make this work by carefully ordering dentries on * d_child when we initially get results back from the MDS, and @@ -123,123 +144,113 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx, struct ceph_file_info *fi = file->private_data; struct dentry *parent = file->f_path.dentry; struct inode *dir = d_inode(parent); - struct list_head *p; - struct dentry *dentry, *last; + struct dentry *dentry, *last = NULL; struct ceph_dentry_info *di; + unsigned nsize = PAGE_CACHE_SIZE / sizeof(struct dentry *); int err = 0; + loff_t ptr_pos = 0; + struct ceph_readdir_cache_control cache_ctl = {}; - /* claim ref on last dentry we returned */ - last = fi->dentry; - fi->dentry = NULL; - - dout("__dcache_readdir %p v%u at %llu (last %p)\n", - dir, shared_gen, ctx->pos, last); + dout("__dcache_readdir %p v%u at %llu\n", dir, shared_gen, ctx->pos); - spin_lock(&parent->d_lock); - - /* start at beginning? */ - if (ctx->pos == 2 || last == NULL || - fpos_cmp(ctx->pos, ceph_dentry(last)->offset) < 0) { - if (list_empty(&parent->d_subdirs)) - goto out_unlock; - p = parent->d_subdirs.prev; - dout(" initial p %p/%p\n", p->prev, p->next); - } else { - p = last->d_child.prev; + /* we can calculate cache index for the first dirfrag */ + if (ceph_frag_is_leftmost(fpos_frag(ctx->pos))) { + cache_ctl.index = fpos_off(ctx->pos) - 2; + BUG_ON(cache_ctl.index < 0); + ptr_pos = cache_ctl.index * sizeof(struct dentry *); } -more: - dentry = list_entry(p, struct dentry, d_child); - di = ceph_dentry(dentry); - while (1) { - dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next, - d_unhashed(dentry) ? "!hashed" : "hashed", - parent->d_subdirs.prev, parent->d_subdirs.next); - if (p == &parent->d_subdirs) { + while (true) { + pgoff_t pgoff; + bool emit_dentry; + + if (ptr_pos >= i_size_read(dir)) { fi->flags |= CEPH_F_ATEND; - goto out_unlock; + err = 0; + break; + } + + err = -EAGAIN; + pgoff = ptr_pos >> PAGE_CACHE_SHIFT; + if (!cache_ctl.page || pgoff != page_index(cache_ctl.page)) { + ceph_readdir_cache_release(&cache_ctl); + cache_ctl.page = find_lock_page(&dir->i_data, pgoff); + if (!cache_ctl.page) { + dout(" page %lu not found\n", pgoff); + break; + } + /* reading/filling the cache are serialized by + * i_mutex, no need to use page lock */ + unlock_page(cache_ctl.page); + cache_ctl.dentries = kmap(cache_ctl.page); } - spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + + rcu_read_lock(); + spin_lock(&parent->d_lock); + /* check i_size again here, because empty directory can be + * marked as complete while not holding the i_mutex. */ + if (ceph_dir_is_complete_ordered(dir) && + ptr_pos < i_size_read(dir)) + dentry = cache_ctl.dentries[cache_ctl.index % nsize]; + else + dentry = NULL; + spin_unlock(&parent->d_lock); + if (dentry && !lockref_get_not_dead(&dentry->d_lockref)) + dentry = NULL; + rcu_read_unlock(); + if (!dentry) + break; + + emit_dentry = false; + di = ceph_dentry(dentry); + spin_lock(&dentry->d_lock); if (di->lease_shared_gen == shared_gen && - !d_unhashed(dentry) && d_really_is_positive(dentry) && + d_really_is_positive(dentry) && ceph_snap(d_inode(dentry)) != CEPH_SNAPDIR && ceph_ino(d_inode(dentry)) != CEPH_INO_CEPH && - fpos_cmp(ctx->pos, di->offset) <= 0) - break; - dout(" skipping %p %pd at %llu (%llu)%s%s\n", dentry, - dentry, di->offset, - ctx->pos, d_unhashed(dentry) ? " unhashed" : "", - !d_inode(dentry) ? " null" : ""); + fpos_cmp(ctx->pos, di->offset) <= 0) { + emit_dentry = true; + } spin_unlock(&dentry->d_lock); - p = p->prev; - dentry = list_entry(p, struct dentry, d_child); - di = ceph_dentry(dentry); - } - - dget_dlock(dentry); - spin_unlock(&dentry->d_lock); - spin_unlock(&parent->d_lock); - /* make sure a dentry wasn't dropped while we didn't have parent lock */ - if (!ceph_dir_is_complete_ordered(dir)) { - dout(" lost dir complete on %p; falling back to mds\n", dir); - dput(dentry); - err = -EAGAIN; - goto out; - } + if (emit_dentry) { + dout(" %llu (%llu) dentry %p %pd %p\n", di->offset, ctx->pos, + dentry, dentry, d_inode(dentry)); + ctx->pos = di->offset; + if (!dir_emit(ctx, dentry->d_name.name, + dentry->d_name.len, + ceph_translate_ino(dentry->d_sb, + d_inode(dentry)->i_ino), + d_inode(dentry)->i_mode >> 12)) { + dput(dentry); + err = 0; + break; + } + ctx->pos++; - dout(" %llu (%llu) dentry %p %pd %p\n", di->offset, ctx->pos, - dentry, dentry, d_inode(dentry)); - if (!dir_emit(ctx, dentry->d_name.name, - dentry->d_name.len, - ceph_translate_ino(dentry->d_sb, d_inode(dentry)->i_ino), - d_inode(dentry)->i_mode >> 12)) { - if (last) { - /* remember our position */ - fi->dentry = last; - fi->next_offset = fpos_off(di->offset); + if (last) + dput(last); + last = dentry; + } else { + dput(dentry); } - dput(dentry); - return 0; - } - - ctx->pos = di->offset + 1; - if (last) - dput(last); - last = dentry; - - spin_lock(&parent->d_lock); - p = p->prev; /* advance to next dentry */ - goto more; - -out_unlock: - spin_unlock(&parent->d_lock); -out: - if (last) + cache_ctl.index++; + ptr_pos += sizeof(struct dentry *); + } + ceph_readdir_cache_release(&cache_ctl); + if (last) { + int ret; + di = ceph_dentry(last); + ret = note_last_dentry(fi, last->d_name.name, last->d_name.len, + fpos_off(di->offset) + 1); + if (ret < 0) + err = ret; dput(last); + } return err; } -/* - * make note of the last dentry we read, so we can - * continue at the same lexicographical point, - * regardless of what dir changes take place on the - * server. - */ -static int note_last_dentry(struct ceph_file_info *fi, const char *name, - int len) -{ - kfree(fi->last_name); - fi->last_name = kmalloc(len+1, GFP_NOFS); - if (!fi->last_name) - return -ENOMEM; - memcpy(fi->last_name, name, len); - fi->last_name[len] = 0; - dout("note_last_dentry '%s'\n", fi->last_name); - return 0; -} - static int ceph_readdir(struct file *file, struct dir_context *ctx) { struct ceph_file_info *fi = file->private_data; @@ -280,8 +291,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) /* can we use the dcache? */ spin_lock(&ci->i_ceph_lock); - if ((ctx->pos == 2 || fi->dentry) && - ceph_test_mount_opt(fsc, DCACHE) && + if (ceph_test_mount_opt(fsc, DCACHE) && !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && ceph_snap(inode) != CEPH_SNAPDIR && __ceph_dir_is_complete_ordered(ci) && @@ -296,24 +306,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) } else { spin_unlock(&ci->i_ceph_lock); } - if (fi->dentry) { - err = note_last_dentry(fi, fi->dentry->d_name.name, - fi->dentry->d_name.len); - if (err) - return err; - dput(fi->dentry); - fi->dentry = NULL; - } /* proceed with a normal readdir */ - - if (ctx->pos == 2) { - /* note dir version at start of readdir so we can tell - * if any dentries get dropped */ - fi->dir_release_count = atomic_read(&ci->i_release_count); - fi->dir_ordered_count = ci->i_ordered_count; - } - more: /* do we have the correct frag content buffered? */ if (fi->frag != frag || fi->last_readdir == NULL) { @@ -342,12 +336,15 @@ more: req->r_direct_hash = ceph_frag_value(frag); req->r_direct_is_hash = true; if (fi->last_name) { - req->r_path2 = kstrdup(fi->last_name, GFP_NOFS); + req->r_path2 = kstrdup(fi->last_name, GFP_KERNEL); if (!req->r_path2) { ceph_mdsc_put_request(req); return -ENOMEM; } } + req->r_dir_release_cnt = fi->dir_release_count; + req->r_dir_ordered_cnt = fi->dir_ordered_count; + req->r_readdir_cache_idx = fi->readdir_cache_idx; req->r_readdir_offset = fi->next_offset; req->r_args.readdir.frag = cpu_to_le32(frag); @@ -364,26 +361,38 @@ more: (int)req->r_reply_info.dir_end, (int)req->r_reply_info.dir_complete); - if (!req->r_did_prepopulate) { - dout("readdir !did_prepopulate"); - /* preclude from marking dir complete */ - fi->dir_release_count--; - } /* note next offset and last dentry name */ rinfo = &req->r_reply_info; if (le32_to_cpu(rinfo->dir_dir->frag) != frag) { frag = le32_to_cpu(rinfo->dir_dir->frag); - if (ceph_frag_is_leftmost(frag)) - fi->next_offset = 2; - else - fi->next_offset = 0; - off = fi->next_offset; + off = req->r_readdir_offset; + fi->next_offset = off; } + fi->frag = frag; fi->offset = fi->next_offset; fi->last_readdir = req; + if (req->r_did_prepopulate) { + fi->readdir_cache_idx = req->r_readdir_cache_idx; + if (fi->readdir_cache_idx < 0) { + /* preclude from marking dir ordered */ + fi->dir_ordered_count = 0; + } else if (ceph_frag_is_leftmost(frag) && off == 2) { + /* note dir version at start of readdir so + * we can tell if any dentries get dropped */ + fi->dir_release_count = req->r_dir_release_cnt; + fi->dir_ordered_count = req->r_dir_ordered_cnt; + } + } else { + dout("readdir !did_prepopulate"); + /* disable readdir cache */ + fi->readdir_cache_idx = -1; + /* preclude from marking dir complete */ + fi->dir_release_count = 0; + } + if (req->r_reply_info.dir_end) { kfree(fi->last_name); fi->last_name = NULL; @@ -394,10 +403,10 @@ more: } else { err = note_last_dentry(fi, rinfo->dir_dname[rinfo->dir_nr-1], - rinfo->dir_dname_len[rinfo->dir_nr-1]); + rinfo->dir_dname_len[rinfo->dir_nr-1], + fi->next_offset + rinfo->dir_nr); if (err) return err; - fi->next_offset += rinfo->dir_nr; } } @@ -453,16 +462,22 @@ more: * were released during the whole readdir, and we should have * the complete dir contents in our cache. */ - spin_lock(&ci->i_ceph_lock); - if (atomic_read(&ci->i_release_count) == fi->dir_release_count) { - if (ci->i_ordered_count == fi->dir_ordered_count) + if (atomic64_read(&ci->i_release_count) == fi->dir_release_count) { + spin_lock(&ci->i_ceph_lock); + if (fi->dir_ordered_count == atomic64_read(&ci->i_ordered_count)) { dout(" marking %p complete and ordered\n", inode); - else + /* use i_size to track number of entries in + * readdir cache */ + BUG_ON(fi->readdir_cache_idx < 0); + i_size_write(inode, fi->readdir_cache_idx * + sizeof(struct dentry*)); + } else { dout(" marking %p complete\n", inode); + } __ceph_dir_set_complete(ci, fi->dir_release_count, fi->dir_ordered_count); + spin_unlock(&ci->i_ceph_lock); } - spin_unlock(&ci->i_ceph_lock); dout("readdir %p file %p done.\n", inode, file); return 0; @@ -476,14 +491,12 @@ static void reset_readdir(struct ceph_file_info *fi, unsigned frag) } kfree(fi->last_name); fi->last_name = NULL; + fi->dir_release_count = 0; + fi->readdir_cache_idx = -1; if (ceph_frag_is_leftmost(frag)) fi->next_offset = 2; /* compensate for . and .. */ else fi->next_offset = 0; - if (fi->dentry) { - dput(fi->dentry); - fi->dentry = NULL; - } fi->flags &= ~CEPH_F_ATEND; } @@ -497,13 +510,12 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) mutex_lock(&inode->i_mutex); retval = -EINVAL; switch (whence) { - case SEEK_END: - offset += inode->i_size + 2; /* FIXME */ - break; case SEEK_CUR: offset += file->f_pos; case SEEK_SET: break; + case SEEK_END: + retval = -EOPNOTSUPP; default: goto out; } @@ -516,20 +528,18 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) } retval = offset; - /* - * discard buffered readdir content on seekdir(0), or - * seek to new frag, or seek prior to current chunk. - */ if (offset == 0 || fpos_frag(offset) != fi->frag || fpos_off(offset) < fi->offset) { + /* discard buffered readdir content on seekdir(0), or + * seek to new frag, or seek prior to current chunk */ dout("dir_llseek dropping %p content\n", file); reset_readdir(fi, fpos_frag(offset)); + } else if (fpos_cmp(offset, old_offset) > 0) { + /* reset dir_release_count if we did a forward seek */ + fi->dir_release_count = 0; + fi->readdir_cache_idx = -1; } - - /* bump dir_release_count if we did a forward seek */ - if (fpos_cmp(offset, old_offset) > 0) - fi->dir_release_count--; } out: mutex_unlock(&inode->i_mutex); @@ -764,7 +774,7 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, err = PTR_ERR(req); goto out; } - req->r_path2 = kstrdup(dest, GFP_NOFS); + req->r_path2 = kstrdup(dest, GFP_KERNEL); if (!req->r_path2) { err = -ENOMEM; ceph_mdsc_put_request(req); @@ -985,16 +995,15 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, * to do it here. */ + /* d_move screws up sibling dentries' offsets */ + ceph_dir_clear_complete(old_dir); + ceph_dir_clear_complete(new_dir); + d_move(old_dentry, new_dentry); /* ensure target dentry is invalidated, despite rehashing bug in vfs_rename_dir */ ceph_invalidate_dentry_lease(new_dentry); - - /* d_move screws up sibling dentries' offsets */ - ceph_dir_clear_complete(old_dir); - ceph_dir_clear_complete(new_dir); - } ceph_mdsc_put_request(req); return err; @@ -1189,7 +1198,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, return -EISDIR; if (!cf->dir_info) { - cf->dir_info = kmalloc(bufsize, GFP_NOFS); + cf->dir_info = kmalloc(bufsize, GFP_KERNEL); if (!cf->dir_info) return -ENOMEM; cf->dir_info_len = @@ -1224,66 +1233,6 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, } /* - * an fsync() on a dir will wait for any uncommitted directory - * operations to commit. - */ -static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end, - int datasync) -{ - struct inode *inode = file_inode(file); - struct ceph_inode_info *ci = ceph_inode(inode); - struct list_head *head = &ci->i_unsafe_dirops; - struct ceph_mds_request *req; - u64 last_tid; - int ret = 0; - - dout("dir_fsync %p\n", inode); - ret = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (ret) - return ret; - mutex_lock(&inode->i_mutex); - - spin_lock(&ci->i_unsafe_lock); - if (list_empty(head)) - goto out; - - req = list_entry(head->prev, - struct ceph_mds_request, r_unsafe_dir_item); - last_tid = req->r_tid; - - do { - ceph_mdsc_get_request(req); - spin_unlock(&ci->i_unsafe_lock); - - dout("dir_fsync %p wait on tid %llu (until %llu)\n", - inode, req->r_tid, last_tid); - if (req->r_timeout) { - unsigned long time_left = wait_for_completion_timeout( - &req->r_safe_completion, - req->r_timeout); - if (time_left > 0) - ret = 0; - else - ret = -EIO; /* timed out */ - } else { - wait_for_completion(&req->r_safe_completion); - } - ceph_mdsc_put_request(req); - - spin_lock(&ci->i_unsafe_lock); - if (ret || list_empty(head)) - break; - req = list_entry(head->next, - struct ceph_mds_request, r_unsafe_dir_item); - } while (req->r_tid < last_tid); -out: - spin_unlock(&ci->i_unsafe_lock); - mutex_unlock(&inode->i_mutex); - - return ret; -} - -/* * We maintain a private dentry LRU. * * FIXME: this needs to be changed to a per-mds lru to be useful. @@ -1353,7 +1302,7 @@ const struct file_operations ceph_dir_fops = { .open = ceph_open, .release = ceph_release, .unlocked_ioctl = ceph_ioctl, - .fsync = ceph_dir_fsync, + .fsync = ceph_fsync, }; const struct file_operations ceph_snapdir_fops = { diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 3b6b522b4b31..8b79d87eaf46 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -89,13 +89,14 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) case S_IFDIR: dout("init_file %p %p 0%o (regular)\n", inode, file, inode->i_mode); - cf = kmem_cache_alloc(ceph_file_cachep, GFP_NOFS | __GFP_ZERO); + cf = kmem_cache_alloc(ceph_file_cachep, GFP_KERNEL | __GFP_ZERO); if (cf == NULL) { ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */ return -ENOMEM; } cf->fmode = fmode; cf->next_offset = 2; + cf->readdir_cache_idx = -1; file->private_data = cf; BUG_ON(inode->i_fop->release != ceph_release); break; @@ -324,7 +325,6 @@ int ceph_release(struct inode *inode, struct file *file) ceph_mdsc_put_request(cf->last_readdir); kfree(cf->last_name); kfree(cf->dir_info); - dput(cf->dentry); kmem_cache_free(ceph_file_cachep, cf); /* wake up anyone waiting for caps on this inode */ @@ -483,7 +483,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, } } else { num_pages = calc_pages_for(off, len); - pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); + pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); if (IS_ERR(pages)) return PTR_ERR(pages); ret = striped_read(inode, off, len, pages, @@ -557,13 +557,13 @@ static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe) * objects, rollback on failure, etc.) */ static ssize_t -ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) +ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, + struct ceph_snap_context *snapc) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); - struct ceph_snap_context *snapc; struct ceph_vino vino; struct ceph_osd_request *req; struct page **pages; @@ -600,7 +600,6 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) size_t start; ssize_t n; - snapc = ci->i_snap_realm->cached_context; vino = ceph_vino(inode); req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, pos, &len, 0, @@ -614,7 +613,7 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) break; } - osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC); + osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0); n = iov_iter_get_pages_alloc(from, &pages, len, &start); if (unlikely(n < 0)) { @@ -674,13 +673,13 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) * objects, rollback on failure, etc.) */ static ssize_t -ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) +ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, + struct ceph_snap_context *snapc) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); - struct ceph_snap_context *snapc; struct ceph_vino vino; struct ceph_osd_request *req; struct page **pages; @@ -717,7 +716,6 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) size_t left; int n; - snapc = ci->i_snap_realm->cached_context; vino = ceph_vino(inode); req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, pos, &len, 0, 1, @@ -736,7 +734,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) */ num_pages = (len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); + pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); if (IS_ERR(pages)) { ret = PTR_ERR(pages); goto out; @@ -860,7 +858,7 @@ again: struct page *page = NULL; loff_t i_size; if (retry_op == READ_INLINE) { - page = __page_cache_alloc(GFP_NOFS); + page = __page_cache_alloc(GFP_KERNEL); if (!page) return -ENOMEM; } @@ -941,6 +939,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->client->osdc; + struct ceph_cap_flush *prealloc_cf; ssize_t count, written = 0; int err, want, got; loff_t pos; @@ -948,6 +947,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) if (ceph_snap(inode) != CEPH_NOSNAP) return -EROFS; + prealloc_cf = ceph_alloc_cap_flush(); + if (!prealloc_cf) + return -ENOMEM; + mutex_lock(&inode->i_mutex); /* We can write back this queue in page reclaim */ @@ -959,7 +962,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) pos = iocb->ki_pos; count = iov_iter_count(from); - err = file_remove_suid(file); + err = file_remove_privs(file); if (err) goto out; @@ -996,14 +999,30 @@ retry_snap: if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) { + struct ceph_snap_context *snapc; struct iov_iter data; mutex_unlock(&inode->i_mutex); + + spin_lock(&ci->i_ceph_lock); + if (__ceph_have_pending_cap_snap(ci)) { + struct ceph_cap_snap *capsnap = + list_last_entry(&ci->i_cap_snaps, + struct ceph_cap_snap, + ci_item); + snapc = ceph_get_snap_context(capsnap->context); + } else { + BUG_ON(!ci->i_head_snapc); + snapc = ceph_get_snap_context(ci->i_head_snapc); + } + spin_unlock(&ci->i_ceph_lock); + /* we might need to revert back to that point */ data = *from; if (iocb->ki_flags & IOCB_DIRECT) - written = ceph_sync_direct_write(iocb, &data, pos); + written = ceph_sync_direct_write(iocb, &data, pos, + snapc); else - written = ceph_sync_write(iocb, &data, pos); + written = ceph_sync_write(iocb, &data, pos, snapc); if (written == -EOLDSNAPC) { dout("aio_write %p %llx.%llx %llu~%u" "got EOLDSNAPC, retrying\n", @@ -1014,6 +1033,7 @@ retry_snap: } if (written > 0) iov_iter_advance(from, written); + ceph_put_snap_context(snapc); } else { loff_t old_size = inode->i_size; /* @@ -1035,7 +1055,8 @@ retry_snap: int dirty; spin_lock(&ci->i_ceph_lock); ci->i_inline_version = CEPH_INLINE_NONE; - dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); + dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, + &prealloc_cf); spin_unlock(&ci->i_ceph_lock); if (dirty) __mark_inode_dirty(inode, dirty); @@ -1059,6 +1080,7 @@ retry_snap: out: mutex_unlock(&inode->i_mutex); out_unlocked: + ceph_free_cap_flush(prealloc_cf); current->backing_dev_info = NULL; return written ? written : err; } @@ -1255,6 +1277,7 @@ static long ceph_fallocate(struct file *file, int mode, struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->client->osdc; + struct ceph_cap_flush *prealloc_cf; int want, got = 0; int dirty; int ret = 0; @@ -1267,6 +1290,10 @@ static long ceph_fallocate(struct file *file, int mode, if (!S_ISREG(inode->i_mode)) return -EOPNOTSUPP; + prealloc_cf = ceph_alloc_cap_flush(); + if (!prealloc_cf) + return -ENOMEM; + mutex_lock(&inode->i_mutex); if (ceph_snap(inode) != CEPH_NOSNAP) { @@ -1313,7 +1340,8 @@ static long ceph_fallocate(struct file *file, int mode, if (!ret) { spin_lock(&ci->i_ceph_lock); ci->i_inline_version = CEPH_INLINE_NONE; - dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); + dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, + &prealloc_cf); spin_unlock(&ci->i_ceph_lock); if (dirty) __mark_inode_dirty(inode, dirty); @@ -1322,6 +1350,7 @@ static long ceph_fallocate(struct file *file, int mode, ceph_put_cap_refs(ci, got); unlock: mutex_unlock(&inode->i_mutex); + ceph_free_cap_flush(prealloc_cf); return ret; } diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 571acd88606c..96d2bd829902 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -389,9 +389,10 @@ struct inode *ceph_alloc_inode(struct super_block *sb) ci->i_inline_version = 0; ci->i_time_warp_seq = 0; ci->i_ceph_flags = 0; - ci->i_ordered_count = 0; - atomic_set(&ci->i_release_count, 1); - atomic_set(&ci->i_complete_count, 0); + atomic64_set(&ci->i_ordered_count, 1); + atomic64_set(&ci->i_release_count, 1); + atomic64_set(&ci->i_complete_seq[0], 0); + atomic64_set(&ci->i_complete_seq[1], 0); ci->i_symlink = NULL; memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout)); @@ -415,9 +416,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb) ci->i_flushing_caps = 0; INIT_LIST_HEAD(&ci->i_dirty_item); INIT_LIST_HEAD(&ci->i_flushing_item); - ci->i_cap_flush_seq = 0; - ci->i_cap_flush_last_tid = 0; - memset(&ci->i_cap_flush_tid, 0, sizeof(ci->i_cap_flush_tid)); + ci->i_prealloc_cap_flush = NULL; + ci->i_cap_flush_tree = RB_ROOT; init_waitqueue_head(&ci->i_cap_wq); ci->i_hold_caps_min = 0; ci->i_hold_caps_max = 0; @@ -752,7 +752,10 @@ static int fill_inode(struct inode *inode, struct page *locked_page, if (new_version || (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) { + if (ci->i_layout.fl_pg_pool != info->layout.fl_pg_pool) + ci->i_ceph_flags &= ~CEPH_I_POOL_PERM; ci->i_layout = info->layout; + queue_trunc = ceph_fill_file_size(inode, issued, le32_to_cpu(info->truncate_seq), le64_to_cpu(info->truncate_size), @@ -858,9 +861,10 @@ static int fill_inode(struct inode *inode, struct page *locked_page, (issued & CEPH_CAP_FILE_EXCL) == 0 && !__ceph_dir_is_complete(ci)) { dout(" marking %p complete (empty)\n", inode); + i_size_write(inode, 0); __ceph_dir_set_complete(ci, - atomic_read(&ci->i_release_count), - ci->i_ordered_count); + atomic64_read(&ci->i_release_count), + atomic64_read(&ci->i_ordered_count)); } wake = true; @@ -1212,6 +1216,10 @@ retry_lookup: dout("fill_trace doing d_move %p -> %p\n", req->r_old_dentry, dn); + /* d_move screws up sibling dentries' offsets */ + ceph_dir_clear_ordered(dir); + ceph_dir_clear_ordered(olddir); + d_move(req->r_old_dentry, dn); dout(" src %p '%pd' dst %p '%pd'\n", req->r_old_dentry, @@ -1222,10 +1230,6 @@ retry_lookup: rehashing bug in vfs_rename_dir */ ceph_invalidate_dentry_lease(dn); - /* d_move screws up sibling dentries' offsets */ - ceph_dir_clear_ordered(dir); - ceph_dir_clear_ordered(olddir); - dout("dn %p gets new offset %lld\n", req->r_old_dentry, ceph_dentry(req->r_old_dentry)->offset); @@ -1333,6 +1337,49 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req, return err; } +void ceph_readdir_cache_release(struct ceph_readdir_cache_control *ctl) +{ + if (ctl->page) { + kunmap(ctl->page); + page_cache_release(ctl->page); + ctl->page = NULL; + } +} + +static int fill_readdir_cache(struct inode *dir, struct dentry *dn, + struct ceph_readdir_cache_control *ctl, + struct ceph_mds_request *req) +{ + struct ceph_inode_info *ci = ceph_inode(dir); + unsigned nsize = PAGE_CACHE_SIZE / sizeof(struct dentry*); + unsigned idx = ctl->index % nsize; + pgoff_t pgoff = ctl->index / nsize; + + if (!ctl->page || pgoff != page_index(ctl->page)) { + ceph_readdir_cache_release(ctl); + ctl->page = grab_cache_page(&dir->i_data, pgoff); + if (!ctl->page) { + ctl->index = -1; + return -ENOMEM; + } + /* reading/filling the cache are serialized by + * i_mutex, no need to use page lock */ + unlock_page(ctl->page); + ctl->dentries = kmap(ctl->page); + } + + if (req->r_dir_release_cnt == atomic64_read(&ci->i_release_count) && + req->r_dir_ordered_cnt == atomic64_read(&ci->i_ordered_count)) { + dout("readdir cache dn %p idx %d\n", dn, ctl->index); + ctl->dentries[idx] = dn; + ctl->index++; + } else { + dout("disable readdir cache\n"); + ctl->index = -1; + } + return 0; +} + int ceph_readdir_prepopulate(struct ceph_mds_request *req, struct ceph_mds_session *session) { @@ -1345,8 +1392,11 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, struct inode *snapdir = NULL; struct ceph_mds_request_head *rhead = req->r_request->front.iov_base; struct ceph_dentry_info *di; - u64 r_readdir_offset = req->r_readdir_offset; u32 frag = le32_to_cpu(rhead->args.readdir.frag); + struct ceph_readdir_cache_control cache_ctl = {}; + + if (req->r_aborted) + return readdir_prepopulate_inodes_only(req, session); if (rinfo->dir_dir && le32_to_cpu(rinfo->dir_dir->frag) != frag) { @@ -1354,14 +1404,11 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, frag, le32_to_cpu(rinfo->dir_dir->frag)); frag = le32_to_cpu(rinfo->dir_dir->frag); if (ceph_frag_is_leftmost(frag)) - r_readdir_offset = 2; + req->r_readdir_offset = 2; else - r_readdir_offset = 0; + req->r_readdir_offset = 0; } - if (req->r_aborted) - return readdir_prepopulate_inodes_only(req, session); - if (le32_to_cpu(rinfo->head->op) == CEPH_MDS_OP_LSSNAP) { snapdir = ceph_get_snapdir(d_inode(parent)); parent = d_find_alias(snapdir); @@ -1374,6 +1421,17 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, ceph_fill_dirfrag(d_inode(parent), rinfo->dir_dir); } + if (ceph_frag_is_leftmost(frag) && req->r_readdir_offset == 2) { + /* note dir version at start of readdir so we can tell + * if any dentries get dropped */ + struct ceph_inode_info *ci = ceph_inode(d_inode(parent)); + req->r_dir_release_cnt = atomic64_read(&ci->i_release_count); + req->r_dir_ordered_cnt = atomic64_read(&ci->i_ordered_count); + req->r_readdir_cache_idx = 0; + } + + cache_ctl.index = req->r_readdir_cache_idx; + /* FIXME: release caps/leases if error occurs */ for (i = 0; i < rinfo->dir_nr; i++) { struct ceph_vino vino; @@ -1413,13 +1471,6 @@ retry_lookup: d_delete(dn); dput(dn); goto retry_lookup; - } else { - /* reorder parent's d_subdirs */ - spin_lock(&parent->d_lock); - spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); - list_move(&dn->d_child, &parent->d_subdirs); - spin_unlock(&dn->d_lock); - spin_unlock(&parent->d_lock); } /* inode */ @@ -1436,13 +1487,15 @@ retry_lookup: } } - if (fill_inode(in, NULL, &rinfo->dir_in[i], NULL, session, - req->r_request_started, -1, - &req->r_caps_reservation) < 0) { + ret = fill_inode(in, NULL, &rinfo->dir_in[i], NULL, session, + req->r_request_started, -1, + &req->r_caps_reservation); + if (ret < 0) { pr_err("fill_inode badness on %p\n", in); if (d_really_is_negative(dn)) iput(in); d_drop(dn); + err = ret; goto next_item; } @@ -1458,19 +1511,28 @@ retry_lookup: } di = dn->d_fsdata; - di->offset = ceph_make_fpos(frag, i + r_readdir_offset); + di->offset = ceph_make_fpos(frag, i + req->r_readdir_offset); update_dentry_lease(dn, rinfo->dir_dlease[i], req->r_session, req->r_request_started); + + if (err == 0 && cache_ctl.index >= 0) { + ret = fill_readdir_cache(d_inode(parent), dn, + &cache_ctl, req); + if (ret < 0) + err = ret; + } next_item: if (dn) dput(dn); } - if (err == 0) - req->r_did_prepopulate = true; - out: + if (err == 0) { + req->r_did_prepopulate = true; + req->r_readdir_cache_idx = cache_ctl.index; + } + ceph_readdir_cache_release(&cache_ctl); if (snapdir) { iput(snapdir); dput(parent); @@ -1712,11 +1774,13 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) const unsigned int ia_valid = attr->ia_valid; struct ceph_mds_request *req; struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; + struct ceph_cap_flush *prealloc_cf; int issued; int release = 0, dirtied = 0; int mask = 0; int err = 0; int inode_dirty_flags = 0; + bool lock_snap_rwsem = false; if (ceph_snap(inode) != CEPH_NOSNAP) return -EROFS; @@ -1725,13 +1789,31 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) if (err != 0) return err; + prealloc_cf = ceph_alloc_cap_flush(); + if (!prealloc_cf) + return -ENOMEM; + req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETATTR, USE_AUTH_MDS); - if (IS_ERR(req)) + if (IS_ERR(req)) { + ceph_free_cap_flush(prealloc_cf); return PTR_ERR(req); + } spin_lock(&ci->i_ceph_lock); issued = __ceph_caps_issued(ci, NULL); + + if (!ci->i_head_snapc && + (issued & (CEPH_CAP_ANY_EXCL | CEPH_CAP_FILE_WR))) { + lock_snap_rwsem = true; + if (!down_read_trylock(&mdsc->snap_rwsem)) { + spin_unlock(&ci->i_ceph_lock); + down_read(&mdsc->snap_rwsem); + spin_lock(&ci->i_ceph_lock); + issued = __ceph_caps_issued(ci, NULL); + } + } + dout("setattr %p issued %s\n", inode, ceph_cap_string(issued)); if (ia_valid & ATTR_UID) { @@ -1874,12 +1956,15 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) dout("setattr %p ATTR_FILE ... hrm!\n", inode); if (dirtied) { - inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied); + inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied, + &prealloc_cf); inode->i_ctime = CURRENT_TIME; } release &= issued; spin_unlock(&ci->i_ceph_lock); + if (lock_snap_rwsem) + up_read(&mdsc->snap_rwsem); if (inode_dirty_flags) __mark_inode_dirty(inode, inode_dirty_flags); @@ -1904,9 +1989,11 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) ceph_mdsc_put_request(req); if (mask & CEPH_SETATTR_SIZE) __ceph_do_pending_vmtruncate(inode); + ceph_free_cap_flush(prealloc_cf); return err; out_put: ceph_mdsc_put_request(req); + ceph_free_cap_flush(prealloc_cf); return err; } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 84f37f34f9aa..6aa07af67603 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -8,6 +8,7 @@ #include <linux/debugfs.h> #include <linux/seq_file.h> #include <linux/utsname.h> +#include <linux/ratelimit.h> #include "super.h" #include "mds_client.h" @@ -458,7 +459,6 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, s->s_cap_reconnect = 0; s->s_cap_iterator = NULL; INIT_LIST_HEAD(&s->s_cap_releases); - INIT_LIST_HEAD(&s->s_cap_releases_done); INIT_LIST_HEAD(&s->s_cap_flushing); INIT_LIST_HEAD(&s->s_cap_snaps_flushing); @@ -629,6 +629,9 @@ static void __register_request(struct ceph_mds_client *mdsc, req->r_uid = current_fsuid(); req->r_gid = current_fsgid(); + if (mdsc->oldest_tid == 0 && req->r_op != CEPH_MDS_OP_SETFILELOCK) + mdsc->oldest_tid = req->r_tid; + if (dir) { struct ceph_inode_info *ci = ceph_inode(dir); @@ -644,6 +647,21 @@ static void __unregister_request(struct ceph_mds_client *mdsc, struct ceph_mds_request *req) { dout("__unregister_request %p tid %lld\n", req, req->r_tid); + + if (req->r_tid == mdsc->oldest_tid) { + struct rb_node *p = rb_next(&req->r_node); + mdsc->oldest_tid = 0; + while (p) { + struct ceph_mds_request *next_req = + rb_entry(p, struct ceph_mds_request, r_node); + if (next_req->r_op != CEPH_MDS_OP_SETFILELOCK) { + mdsc->oldest_tid = next_req->r_tid; + break; + } + p = rb_next(p); + } + } + rb_erase(&req->r_node, &mdsc->request_tree); RB_CLEAR_NODE(&req->r_node); @@ -998,27 +1016,25 @@ void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc, * session caps */ -/* - * Free preallocated cap messages assigned to this session - */ -static void cleanup_cap_releases(struct ceph_mds_session *session) +/* caller holds s_cap_lock, we drop it */ +static void cleanup_cap_releases(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session) + __releases(session->s_cap_lock) { - struct ceph_msg *msg; + LIST_HEAD(tmp_list); + list_splice_init(&session->s_cap_releases, &tmp_list); + session->s_num_cap_releases = 0; + spin_unlock(&session->s_cap_lock); - spin_lock(&session->s_cap_lock); - while (!list_empty(&session->s_cap_releases)) { - msg = list_first_entry(&session->s_cap_releases, - struct ceph_msg, list_head); - list_del_init(&msg->list_head); - ceph_msg_put(msg); - } - while (!list_empty(&session->s_cap_releases_done)) { - msg = list_first_entry(&session->s_cap_releases_done, - struct ceph_msg, list_head); - list_del_init(&msg->list_head); - ceph_msg_put(msg); + dout("cleanup_cap_releases mds%d\n", session->s_mds); + while (!list_empty(&tmp_list)) { + struct ceph_cap *cap; + /* zero out the in-progress message */ + cap = list_first_entry(&tmp_list, + struct ceph_cap, session_caps); + list_del(&cap->session_caps); + ceph_put_cap(mdsc, cap); } - spin_unlock(&session->s_cap_lock); } static void cleanup_session_requests(struct ceph_mds_client *mdsc, @@ -1033,7 +1049,8 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc, req = list_first_entry(&session->s_unsafe, struct ceph_mds_request, r_unsafe_item); list_del_init(&req->r_unsafe_item); - pr_info(" dropping unsafe request %llu\n", req->r_tid); + pr_warn_ratelimited(" dropping unsafe request %llu\n", + req->r_tid); __unregister_request(mdsc, req); } /* zero r_attempts, so kick_requests() will re-send requests */ @@ -1095,10 +1112,16 @@ static int iterate_session_caps(struct ceph_mds_session *session, dout("iterate_session_caps finishing cap %p removal\n", cap); BUG_ON(cap->session != session); + cap->session = NULL; list_del_init(&cap->session_caps); session->s_nr_caps--; - cap->session = NULL; - old_cap = cap; /* put_cap it w/o locks held */ + if (cap->queue_release) { + list_add_tail(&cap->session_caps, + &session->s_cap_releases); + session->s_num_cap_releases++; + } else { + old_cap = cap; /* put_cap it w/o locks held */ + } } if (ret < 0) goto out; @@ -1119,6 +1142,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) { struct ceph_inode_info *ci = ceph_inode(inode); + LIST_HEAD(to_remove); int drop = 0; dout("removing cap %p, ci is %p, inode is %p\n", @@ -1126,12 +1150,27 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, spin_lock(&ci->i_ceph_lock); __ceph_remove_cap(cap, false); if (!ci->i_auth_cap) { + struct ceph_cap_flush *cf; struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + while (true) { + struct rb_node *n = rb_first(&ci->i_cap_flush_tree); + if (!n) + break; + cf = rb_entry(n, struct ceph_cap_flush, i_node); + rb_erase(&cf->i_node, &ci->i_cap_flush_tree); + list_add(&cf->list, &to_remove); + } + spin_lock(&mdsc->cap_dirty_lock); + + list_for_each_entry(cf, &to_remove, list) + rb_erase(&cf->g_node, &mdsc->cap_flush_tree); + if (!list_empty(&ci->i_dirty_item)) { - pr_info(" dropping dirty %s state for %p %lld\n", + pr_warn_ratelimited( + " dropping dirty %s state for %p %lld\n", ceph_cap_string(ci->i_dirty_caps), inode, ceph_ino(inode)); ci->i_dirty_caps = 0; @@ -1139,7 +1178,8 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, drop = 1; } if (!list_empty(&ci->i_flushing_item)) { - pr_info(" dropping dirty+flushing %s state for %p %lld\n", + pr_warn_ratelimited( + " dropping dirty+flushing %s state for %p %lld\n", ceph_cap_string(ci->i_flushing_caps), inode, ceph_ino(inode)); ci->i_flushing_caps = 0; @@ -1148,8 +1188,20 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, drop = 1; } spin_unlock(&mdsc->cap_dirty_lock); + + if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) { + list_add(&ci->i_prealloc_cap_flush->list, &to_remove); + ci->i_prealloc_cap_flush = NULL; + } } spin_unlock(&ci->i_ceph_lock); + while (!list_empty(&to_remove)) { + struct ceph_cap_flush *cf; + cf = list_first_entry(&to_remove, + struct ceph_cap_flush, list); + list_del(&cf->list); + ceph_free_cap_flush(cf); + } while (drop--) iput(inode); return 0; @@ -1191,11 +1243,12 @@ static void remove_session_caps(struct ceph_mds_session *session) spin_lock(&session->s_cap_lock); } } - spin_unlock(&session->s_cap_lock); + + // drop cap expires and unlock s_cap_lock + cleanup_cap_releases(session->s_mdsc, session); BUG_ON(session->s_nr_caps > 0); BUG_ON(!list_empty(&session->s_cap_flushing)); - cleanup_cap_releases(session); } /* @@ -1371,7 +1424,8 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) inode, cap, ceph_cap_string(mine), ceph_cap_string(oissued), ceph_cap_string(used), ceph_cap_string(wanted)); if (cap == ci->i_auth_cap) { - if (ci->i_dirty_caps | ci->i_flushing_caps) + if (ci->i_dirty_caps || ci->i_flushing_caps || + !list_empty(&ci->i_cap_snaps)) goto out; if ((used | wanted) & CEPH_CAP_ANY_WR) goto out; @@ -1417,121 +1471,80 @@ static int trim_caps(struct ceph_mds_client *mdsc, session->s_trim_caps = 0; } - ceph_add_cap_releases(mdsc, session); ceph_send_cap_releases(mdsc, session); return 0; } -/* - * Allocate cap_release messages. If there is a partially full message - * in the queue, try to allocate enough to cover it's remainder, so that - * we can send it immediately. - * - * Called under s_mutex. - */ -int ceph_add_cap_releases(struct ceph_mds_client *mdsc, - struct ceph_mds_session *session) +static int check_capsnap_flush(struct ceph_inode_info *ci, + u64 want_snap_seq) { - struct ceph_msg *msg, *partial = NULL; - struct ceph_mds_cap_release *head; - int err = -ENOMEM; - int extra = mdsc->fsc->mount_options->cap_release_safety; - int num; - - dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds, - extra); - - spin_lock(&session->s_cap_lock); - - if (!list_empty(&session->s_cap_releases)) { - msg = list_first_entry(&session->s_cap_releases, - struct ceph_msg, - list_head); - head = msg->front.iov_base; - num = le32_to_cpu(head->num); - if (num) { - dout(" partial %p with (%d/%d)\n", msg, num, - (int)CEPH_CAPS_PER_RELEASE); - extra += CEPH_CAPS_PER_RELEASE - num; - partial = msg; - } - } - while (session->s_num_cap_releases < session->s_nr_caps + extra) { - spin_unlock(&session->s_cap_lock); - msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE, - GFP_NOFS, false); - if (!msg) - goto out_unlocked; - dout("add_cap_releases %p msg %p now %d\n", session, msg, - (int)msg->front.iov_len); - head = msg->front.iov_base; - head->num = cpu_to_le32(0); - msg->front.iov_len = sizeof(*head); - spin_lock(&session->s_cap_lock); - list_add(&msg->list_head, &session->s_cap_releases); - session->s_num_cap_releases += CEPH_CAPS_PER_RELEASE; - } - - if (partial) { - head = partial->front.iov_base; - num = le32_to_cpu(head->num); - dout(" queueing partial %p with %d/%d\n", partial, num, - (int)CEPH_CAPS_PER_RELEASE); - list_move_tail(&partial->list_head, - &session->s_cap_releases_done); - session->s_num_cap_releases -= CEPH_CAPS_PER_RELEASE - num; + int ret = 1; + spin_lock(&ci->i_ceph_lock); + if (want_snap_seq > 0 && !list_empty(&ci->i_cap_snaps)) { + struct ceph_cap_snap *capsnap = + list_first_entry(&ci->i_cap_snaps, + struct ceph_cap_snap, ci_item); + ret = capsnap->follows >= want_snap_seq; } - err = 0; - spin_unlock(&session->s_cap_lock); -out_unlocked: - return err; + spin_unlock(&ci->i_ceph_lock); + return ret; } -static int check_cap_flush(struct inode *inode, u64 want_flush_seq) +static int check_caps_flush(struct ceph_mds_client *mdsc, + u64 want_flush_tid) { - struct ceph_inode_info *ci = ceph_inode(inode); - int ret; - spin_lock(&ci->i_ceph_lock); - if (ci->i_flushing_caps) - ret = ci->i_cap_flush_seq >= want_flush_seq; - else - ret = 1; - spin_unlock(&ci->i_ceph_lock); + struct rb_node *n; + struct ceph_cap_flush *cf; + int ret = 1; + + spin_lock(&mdsc->cap_dirty_lock); + n = rb_first(&mdsc->cap_flush_tree); + cf = n ? rb_entry(n, struct ceph_cap_flush, g_node) : NULL; + if (cf && cf->tid <= want_flush_tid) { + dout("check_caps_flush still flushing tid %llu <= %llu\n", + cf->tid, want_flush_tid); + ret = 0; + } + spin_unlock(&mdsc->cap_dirty_lock); return ret; } /* * flush all dirty inode data to disk. * - * returns true if we've flushed through want_flush_seq + * returns true if we've flushed through want_flush_tid */ -static void wait_caps_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq) +static void wait_caps_flush(struct ceph_mds_client *mdsc, + u64 want_flush_tid, u64 want_snap_seq) { int mds; - dout("check_cap_flush want %lld\n", want_flush_seq); + dout("check_caps_flush want %llu snap want %llu\n", + want_flush_tid, want_snap_seq); mutex_lock(&mdsc->mutex); - for (mds = 0; mds < mdsc->max_sessions; mds++) { + for (mds = 0; mds < mdsc->max_sessions; ) { struct ceph_mds_session *session = mdsc->sessions[mds]; struct inode *inode = NULL; - if (!session) + if (!session) { + mds++; continue; + } get_session(session); mutex_unlock(&mdsc->mutex); mutex_lock(&session->s_mutex); - if (!list_empty(&session->s_cap_flushing)) { - struct ceph_inode_info *ci = - list_entry(session->s_cap_flushing.next, - struct ceph_inode_info, - i_flushing_item); - - if (!check_cap_flush(&ci->vfs_inode, want_flush_seq)) { - dout("check_cap_flush still flushing %p " - "seq %lld <= %lld to mds%d\n", - &ci->vfs_inode, ci->i_cap_flush_seq, - want_flush_seq, session->s_mds); + if (!list_empty(&session->s_cap_snaps_flushing)) { + struct ceph_cap_snap *capsnap = + list_first_entry(&session->s_cap_snaps_flushing, + struct ceph_cap_snap, + flushing_item); + struct ceph_inode_info *ci = capsnap->ci; + if (!check_capsnap_flush(ci, want_snap_seq)) { + dout("check_cap_flush still flushing snap %p " + "follows %lld <= %lld to mds%d\n", + &ci->vfs_inode, capsnap->follows, + want_snap_seq, mds); inode = igrab(&ci->vfs_inode); } } @@ -1540,15 +1553,21 @@ static void wait_caps_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq) if (inode) { wait_event(mdsc->cap_flushing_wq, - check_cap_flush(inode, want_flush_seq)); + check_capsnap_flush(ceph_inode(inode), + want_snap_seq)); iput(inode); + } else { + mds++; } mutex_lock(&mdsc->mutex); } - mutex_unlock(&mdsc->mutex); - dout("check_cap_flush ok, flushed thru %lld\n", want_flush_seq); + + wait_event(mdsc->cap_flushing_wq, + check_caps_flush(mdsc, want_flush_tid)); + + dout("check_caps_flush ok, flushed thru %llu\n", want_flush_tid); } /* @@ -1557,60 +1576,74 @@ static void wait_caps_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq) void ceph_send_cap_releases(struct ceph_mds_client *mdsc, struct ceph_mds_session *session) { - struct ceph_msg *msg; + struct ceph_msg *msg = NULL; + struct ceph_mds_cap_release *head; + struct ceph_mds_cap_item *item; + struct ceph_cap *cap; + LIST_HEAD(tmp_list); + int num_cap_releases; - dout("send_cap_releases mds%d\n", session->s_mds); spin_lock(&session->s_cap_lock); - while (!list_empty(&session->s_cap_releases_done)) { - msg = list_first_entry(&session->s_cap_releases_done, - struct ceph_msg, list_head); - list_del_init(&msg->list_head); - spin_unlock(&session->s_cap_lock); - msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); - dout("send_cap_releases mds%d %p\n", session->s_mds, msg); - ceph_con_send(&session->s_con, msg); - spin_lock(&session->s_cap_lock); - } +again: + list_splice_init(&session->s_cap_releases, &tmp_list); + num_cap_releases = session->s_num_cap_releases; + session->s_num_cap_releases = 0; spin_unlock(&session->s_cap_lock); -} -static void discard_cap_releases(struct ceph_mds_client *mdsc, - struct ceph_mds_session *session) -{ - struct ceph_msg *msg; - struct ceph_mds_cap_release *head; - unsigned num; - - dout("discard_cap_releases mds%d\n", session->s_mds); + while (!list_empty(&tmp_list)) { + if (!msg) { + msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, + PAGE_CACHE_SIZE, GFP_NOFS, false); + if (!msg) + goto out_err; + head = msg->front.iov_base; + head->num = cpu_to_le32(0); + msg->front.iov_len = sizeof(*head); + } + cap = list_first_entry(&tmp_list, struct ceph_cap, + session_caps); + list_del(&cap->session_caps); + num_cap_releases--; - if (!list_empty(&session->s_cap_releases)) { - /* zero out the in-progress message */ - msg = list_first_entry(&session->s_cap_releases, - struct ceph_msg, list_head); head = msg->front.iov_base; - num = le32_to_cpu(head->num); - dout("discard_cap_releases mds%d %p %u\n", - session->s_mds, msg, num); - head->num = cpu_to_le32(0); - msg->front.iov_len = sizeof(*head); - session->s_num_cap_releases += num; + le32_add_cpu(&head->num, 1); + item = msg->front.iov_base + msg->front.iov_len; + item->ino = cpu_to_le64(cap->cap_ino); + item->cap_id = cpu_to_le64(cap->cap_id); + item->migrate_seq = cpu_to_le32(cap->mseq); + item->seq = cpu_to_le32(cap->issue_seq); + msg->front.iov_len += sizeof(*item); + + ceph_put_cap(mdsc, cap); + + if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) { + msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); + dout("send_cap_releases mds%d %p\n", session->s_mds, msg); + ceph_con_send(&session->s_con, msg); + msg = NULL; + } } - /* requeue completed messages */ - while (!list_empty(&session->s_cap_releases_done)) { - msg = list_first_entry(&session->s_cap_releases_done, - struct ceph_msg, list_head); - list_del_init(&msg->list_head); + BUG_ON(num_cap_releases != 0); - head = msg->front.iov_base; - num = le32_to_cpu(head->num); - dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg, - num); - session->s_num_cap_releases += num; - head->num = cpu_to_le32(0); - msg->front.iov_len = sizeof(*head); - list_add(&msg->list_head, &session->s_cap_releases); + spin_lock(&session->s_cap_lock); + if (!list_empty(&session->s_cap_releases)) + goto again; + spin_unlock(&session->s_cap_lock); + + if (msg) { + msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); + dout("send_cap_releases mds%d %p\n", session->s_mds, msg); + ceph_con_send(&session->s_con, msg); } + return; +out_err: + pr_err("send_cap_releases mds%d, failed to allocate message\n", + session->s_mds); + spin_lock(&session->s_cap_lock); + list_splice(&tmp_list, &session->s_cap_releases); + session->s_num_cap_releases += num_cap_releases; + spin_unlock(&session->s_cap_lock); } /* @@ -1635,7 +1668,8 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, order = get_order(size * num_entries); while (order >= 0) { - rinfo->dir_in = (void*)__get_free_pages(GFP_NOFS | __GFP_NOWARN, + rinfo->dir_in = (void*)__get_free_pages(GFP_KERNEL | + __GFP_NOWARN, order); if (rinfo->dir_in) break; @@ -1697,13 +1731,9 @@ static struct ceph_mds_request *__get_oldest_req(struct ceph_mds_client *mdsc) struct ceph_mds_request, r_node); } -static u64 __get_oldest_tid(struct ceph_mds_client *mdsc) +static inline u64 __get_oldest_tid(struct ceph_mds_client *mdsc) { - struct ceph_mds_request *req = __get_oldest_req(mdsc); - - if (req) - return req->r_tid; - return 0; + return mdsc->oldest_tid; } /* @@ -2267,15 +2297,18 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, /* wait */ mutex_unlock(&mdsc->mutex); dout("do_request waiting\n"); - if (req->r_timeout) { - err = (long)wait_for_completion_killable_timeout( - &req->r_completion, req->r_timeout); - if (err == 0) - err = -EIO; - } else if (req->r_wait_for_completion) { + if (!req->r_timeout && req->r_wait_for_completion) { err = req->r_wait_for_completion(mdsc, req); } else { - err = wait_for_completion_killable(&req->r_completion); + long timeleft = wait_for_completion_killable_timeout( + &req->r_completion, + ceph_timeout_jiffies(req->r_timeout)); + if (timeleft > 0) + err = 0; + else if (!timeleft) + err = -EIO; /* timed out */ + else + err = timeleft; /* killed */ } dout("do_request waited, got %d\n", err); mutex_lock(&mdsc->mutex); @@ -2496,7 +2529,6 @@ out_err: } mutex_unlock(&mdsc->mutex); - ceph_add_cap_releases(mdsc, req->r_session); mutex_unlock(&session->s_mutex); /* kick calling process */ @@ -2888,8 +2920,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, */ session->s_cap_reconnect = 1; /* drop old cap expires; we're about to reestablish that state */ - discard_cap_releases(mdsc, session); - spin_unlock(&session->s_cap_lock); + cleanup_cap_releases(mdsc, session); /* trim unused caps to reduce MDS's cache rejoin time */ if (mdsc->fsc->sb->s_root) @@ -2956,6 +2987,9 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, reply->hdr.data_len = cpu_to_le32(pagelist->length); ceph_msg_data_add_pagelist(reply, pagelist); + + ceph_early_kick_flushing_caps(mdsc, session); + ceph_con_send(&session->s_con, reply); mutex_unlock(&session->s_mutex); @@ -3352,7 +3386,6 @@ static void delayed_work(struct work_struct *work) send_renew_caps(mdsc, s); else ceph_con_keepalive(&s->s_con); - ceph_add_cap_releases(mdsc, s); if (s->s_state == CEPH_MDS_SESSION_OPEN || s->s_state == CEPH_MDS_SESSION_HUNG) ceph_send_cap_releases(mdsc, s); @@ -3390,11 +3423,13 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) atomic_set(&mdsc->num_sessions, 0); mdsc->max_sessions = 0; mdsc->stopping = 0; + mdsc->last_snap_seq = 0; init_rwsem(&mdsc->snap_rwsem); mdsc->snap_realms = RB_ROOT; INIT_LIST_HEAD(&mdsc->snap_empty); spin_lock_init(&mdsc->snap_empty_lock); mdsc->last_tid = 0; + mdsc->oldest_tid = 0; mdsc->request_tree = RB_ROOT; INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work); mdsc->last_renew_caps = jiffies; @@ -3402,7 +3437,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) spin_lock_init(&mdsc->cap_delay_lock); INIT_LIST_HEAD(&mdsc->snap_flush_list); spin_lock_init(&mdsc->snap_flush_lock); - mdsc->cap_flush_seq = 0; + mdsc->last_cap_flush_tid = 1; + mdsc->cap_flush_tree = RB_ROOT; INIT_LIST_HEAD(&mdsc->cap_dirty); INIT_LIST_HEAD(&mdsc->cap_dirty_migrating); mdsc->num_cap_flushing = 0; @@ -3414,6 +3450,9 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) ceph_caps_init(mdsc); ceph_adjust_min_caps(mdsc, fsc->min_caps); + init_rwsem(&mdsc->pool_perm_rwsem); + mdsc->pool_perm_tree = RB_ROOT; + return 0; } @@ -3423,8 +3462,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) */ static void wait_requests(struct ceph_mds_client *mdsc) { + struct ceph_options *opts = mdsc->fsc->client->options; struct ceph_mds_request *req; - struct ceph_fs_client *fsc = mdsc->fsc; mutex_lock(&mdsc->mutex); if (__get_oldest_req(mdsc)) { @@ -3432,7 +3471,7 @@ static void wait_requests(struct ceph_mds_client *mdsc) dout("wait_requests waiting for requests\n"); wait_for_completion_timeout(&mdsc->safe_umount_waiters, - fsc->client->options->mount_timeout * HZ); + ceph_timeout_jiffies(opts->mount_timeout)); /* tear down remaining requests */ mutex_lock(&mdsc->mutex); @@ -3485,7 +3524,8 @@ restart: nextreq = rb_entry(n, struct ceph_mds_request, r_node); else nextreq = NULL; - if ((req->r_op & CEPH_MDS_OP_WRITE)) { + if (req->r_op != CEPH_MDS_OP_SETFILELOCK && + (req->r_op & CEPH_MDS_OP_WRITE)) { /* write op */ ceph_mdsc_get_request(req); if (nextreq) @@ -3513,7 +3553,7 @@ restart: void ceph_mdsc_sync(struct ceph_mds_client *mdsc) { - u64 want_tid, want_flush; + u64 want_tid, want_flush, want_snap; if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) return; @@ -3525,13 +3565,18 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) ceph_flush_dirty_caps(mdsc); spin_lock(&mdsc->cap_dirty_lock); - want_flush = mdsc->cap_flush_seq; + want_flush = mdsc->last_cap_flush_tid; spin_unlock(&mdsc->cap_dirty_lock); - dout("sync want tid %lld flush_seq %lld\n", want_tid, want_flush); + down_read(&mdsc->snap_rwsem); + want_snap = mdsc->last_snap_seq; + up_read(&mdsc->snap_rwsem); + + dout("sync want tid %lld flush_seq %lld snap_seq %lld\n", + want_tid, want_flush, want_snap); wait_unsafe_requests(mdsc, want_tid); - wait_caps_flush(mdsc, want_flush); + wait_caps_flush(mdsc, want_flush, want_snap); } /* @@ -3549,10 +3594,9 @@ static bool done_closing_sessions(struct ceph_mds_client *mdsc) */ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) { + struct ceph_options *opts = mdsc->fsc->client->options; struct ceph_mds_session *session; int i; - struct ceph_fs_client *fsc = mdsc->fsc; - unsigned long timeout = fsc->client->options->mount_timeout * HZ; dout("close_sessions\n"); @@ -3573,7 +3617,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) dout("waiting for sessions to close\n"); wait_event_timeout(mdsc->session_close_wq, done_closing_sessions(mdsc), - timeout); + ceph_timeout_jiffies(opts->mount_timeout)); /* tear down remaining sessions */ mutex_lock(&mdsc->mutex); @@ -3607,6 +3651,7 @@ static void ceph_mdsc_stop(struct ceph_mds_client *mdsc) ceph_mdsmap_destroy(mdsc->mdsmap); kfree(mdsc->sessions); ceph_caps_finalize(mdsc); + ceph_pool_perm_destroy(mdsc); } void ceph_mdsc_destroy(struct ceph_fs_client *fsc) diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 1875b5d985c6..762757e6cebf 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -139,7 +139,6 @@ struct ceph_mds_session { int s_cap_reconnect; int s_readonly; struct list_head s_cap_releases; /* waiting cap_release messages */ - struct list_head s_cap_releases_done; /* ready to send */ struct ceph_cap *s_cap_iterator; /* protected by mutex */ @@ -228,7 +227,7 @@ struct ceph_mds_request { int r_err; bool r_aborted; - unsigned long r_timeout; /* optional. jiffies */ + unsigned long r_timeout; /* optional. jiffies, 0 is "wait forever" */ unsigned long r_started; /* start time to measure timeout against */ unsigned long r_request_started; /* start time for mds request only, used to measure lease durations */ @@ -254,12 +253,21 @@ struct ceph_mds_request { bool r_got_unsafe, r_got_safe, r_got_result; bool r_did_prepopulate; + long long r_dir_release_cnt; + long long r_dir_ordered_cnt; + int r_readdir_cache_idx; u32 r_readdir_offset; struct ceph_cap_reservation r_caps_reservation; int r_num_caps; }; +struct ceph_pool_perm { + struct rb_node node; + u32 pool; + int perm; +}; + /* * mds client state */ @@ -284,12 +292,15 @@ struct ceph_mds_client { * references (implying they contain no inodes with caps) that * should be destroyed. */ + u64 last_snap_seq; struct rw_semaphore snap_rwsem; struct rb_root snap_realms; struct list_head snap_empty; spinlock_t snap_empty_lock; /* protect snap_empty */ u64 last_tid; /* most recent mds request */ + u64 oldest_tid; /* oldest incomplete mds request, + excluding setfilelock requests */ struct rb_root request_tree; /* pending mds requests */ struct delayed_work delayed_work; /* delayed work */ unsigned long last_renew_caps; /* last time we renewed our caps */ @@ -298,7 +309,8 @@ struct ceph_mds_client { struct list_head snap_flush_list; /* cap_snaps ready to flush */ spinlock_t snap_flush_lock; - u64 cap_flush_seq; + u64 last_cap_flush_tid; + struct rb_root cap_flush_tree; struct list_head cap_dirty; /* inodes with dirty caps */ struct list_head cap_dirty_migrating; /* ...that are migration... */ int num_cap_flushing; /* # caps we are flushing */ @@ -328,6 +340,9 @@ struct ceph_mds_client { spinlock_t dentry_lru_lock; struct list_head dentry_lru; int num_dentry; + + struct rw_semaphore pool_perm_rwsem; + struct rb_root pool_perm_tree; }; extern const char *ceph_mds_op_name(int op); @@ -379,8 +394,6 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req) kref_put(&req->r_kref, ceph_mdsc_release_request); } -extern int ceph_add_cap_releases(struct ceph_mds_client *mdsc, - struct ceph_mds_session *session); extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc, struct ceph_mds_session *session); diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index a97e39f09ba6..233d906aec02 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -296,7 +296,7 @@ static int cmpu64_rev(const void *a, const void *b) } -static struct ceph_snap_context *empty_snapc; +struct ceph_snap_context *ceph_empty_snapc; /* * build the snap context for a given realm. @@ -338,9 +338,9 @@ static int build_snap_context(struct ceph_snap_realm *realm) return 0; } - if (num == 0 && realm->seq == empty_snapc->seq) { - ceph_get_snap_context(empty_snapc); - snapc = empty_snapc; + if (num == 0 && realm->seq == ceph_empty_snapc->seq) { + ceph_get_snap_context(ceph_empty_snapc); + snapc = ceph_empty_snapc; goto done; } @@ -436,6 +436,14 @@ static int dup_array(u64 **dst, __le64 *src, u32 num) return 0; } +static bool has_new_snaps(struct ceph_snap_context *o, + struct ceph_snap_context *n) +{ + if (n->num_snaps == 0) + return false; + /* snaps are in descending order */ + return n->snaps[0] > o->seq; +} /* * When a snapshot is applied, the size/mtime inode metadata is queued @@ -455,6 +463,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) { struct inode *inode = &ci->vfs_inode; struct ceph_cap_snap *capsnap; + struct ceph_snap_context *old_snapc, *new_snapc; int used, dirty; capsnap = kzalloc(sizeof(*capsnap), GFP_NOFS); @@ -467,6 +476,9 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) used = __ceph_caps_used(ci); dirty = __ceph_caps_dirty(ci); + old_snapc = ci->i_head_snapc; + new_snapc = ci->i_snap_realm->cached_context; + /* * If there is a write in progress, treat that as a dirty Fw, * even though it hasn't completed yet; by the time we finish @@ -481,76 +493,95 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) writes in progress now were started before the previous cap_snap. lucky us. */ dout("queue_cap_snap %p already pending\n", inode); - kfree(capsnap); - } else if (ci->i_snap_realm->cached_context == empty_snapc) { - dout("queue_cap_snap %p empty snapc\n", inode); - kfree(capsnap); - } else if (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL| - CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR)) { - struct ceph_snap_context *snapc = ci->i_head_snapc; - - /* - * if we are a sync write, we may need to go to the snaprealm - * to get the current snapc. - */ - if (!snapc) - snapc = ci->i_snap_realm->cached_context; + goto update_snapc; + } + if (ci->i_wrbuffer_ref_head == 0 && + !(dirty & (CEPH_CAP_ANY_EXCL|CEPH_CAP_FILE_WR))) { + dout("queue_cap_snap %p nothing dirty|writing\n", inode); + goto update_snapc; + } - dout("queue_cap_snap %p cap_snap %p queuing under %p %s\n", - inode, capsnap, snapc, ceph_cap_string(dirty)); - ihold(inode); + BUG_ON(!old_snapc); - atomic_set(&capsnap->nref, 1); - capsnap->ci = ci; - INIT_LIST_HEAD(&capsnap->ci_item); - INIT_LIST_HEAD(&capsnap->flushing_item); - - capsnap->follows = snapc->seq; - capsnap->issued = __ceph_caps_issued(ci, NULL); - capsnap->dirty = dirty; - - capsnap->mode = inode->i_mode; - capsnap->uid = inode->i_uid; - capsnap->gid = inode->i_gid; - - if (dirty & CEPH_CAP_XATTR_EXCL) { - __ceph_build_xattrs_blob(ci); - capsnap->xattr_blob = - ceph_buffer_get(ci->i_xattrs.blob); - capsnap->xattr_version = ci->i_xattrs.version; - } else { - capsnap->xattr_blob = NULL; - capsnap->xattr_version = 0; + /* + * There is no need to send FLUSHSNAP message to MDS if there is + * no new snapshot. But when there is dirty pages or on-going + * writes, we still need to create cap_snap. cap_snap is needed + * by the write path and page writeback path. + * + * also see ceph_try_drop_cap_snap() + */ + if (has_new_snaps(old_snapc, new_snapc)) { + if (dirty & (CEPH_CAP_ANY_EXCL|CEPH_CAP_FILE_WR)) + capsnap->need_flush = true; + } else { + if (!(used & CEPH_CAP_FILE_WR) && + ci->i_wrbuffer_ref_head == 0) { + dout("queue_cap_snap %p " + "no new_snap|dirty_page|writing\n", inode); + goto update_snapc; } + } - capsnap->inline_data = ci->i_inline_version != CEPH_INLINE_NONE; - - /* dirty page count moved from _head to this cap_snap; - all subsequent writes page dirties occur _after_ this - snapshot. */ - capsnap->dirty_pages = ci->i_wrbuffer_ref_head; - ci->i_wrbuffer_ref_head = 0; - capsnap->context = snapc; - ci->i_head_snapc = - ceph_get_snap_context(ci->i_snap_realm->cached_context); - dout(" new snapc is %p\n", ci->i_head_snapc); - list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps); - - if (used & CEPH_CAP_FILE_WR) { - dout("queue_cap_snap %p cap_snap %p snapc %p" - " seq %llu used WR, now pending\n", inode, - capsnap, snapc, snapc->seq); - capsnap->writing = 1; - } else { - /* note mtime, size NOW. */ - __ceph_finish_cap_snap(ci, capsnap); - } + dout("queue_cap_snap %p cap_snap %p queuing under %p %s %s\n", + inode, capsnap, old_snapc, ceph_cap_string(dirty), + capsnap->need_flush ? "" : "no_flush"); + ihold(inode); + + atomic_set(&capsnap->nref, 1); + capsnap->ci = ci; + INIT_LIST_HEAD(&capsnap->ci_item); + INIT_LIST_HEAD(&capsnap->flushing_item); + + capsnap->follows = old_snapc->seq; + capsnap->issued = __ceph_caps_issued(ci, NULL); + capsnap->dirty = dirty; + + capsnap->mode = inode->i_mode; + capsnap->uid = inode->i_uid; + capsnap->gid = inode->i_gid; + + if (dirty & CEPH_CAP_XATTR_EXCL) { + __ceph_build_xattrs_blob(ci); + capsnap->xattr_blob = + ceph_buffer_get(ci->i_xattrs.blob); + capsnap->xattr_version = ci->i_xattrs.version; } else { - dout("queue_cap_snap %p nothing dirty|writing\n", inode); - kfree(capsnap); + capsnap->xattr_blob = NULL; + capsnap->xattr_version = 0; } + capsnap->inline_data = ci->i_inline_version != CEPH_INLINE_NONE; + + /* dirty page count moved from _head to this cap_snap; + all subsequent writes page dirties occur _after_ this + snapshot. */ + capsnap->dirty_pages = ci->i_wrbuffer_ref_head; + ci->i_wrbuffer_ref_head = 0; + capsnap->context = old_snapc; + list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps); + old_snapc = NULL; + + if (used & CEPH_CAP_FILE_WR) { + dout("queue_cap_snap %p cap_snap %p snapc %p" + " seq %llu used WR, now pending\n", inode, + capsnap, old_snapc, old_snapc->seq); + capsnap->writing = 1; + } else { + /* note mtime, size NOW. */ + __ceph_finish_cap_snap(ci, capsnap); + } + capsnap = NULL; + +update_snapc: + if (ci->i_head_snapc) { + ci->i_head_snapc = ceph_get_snap_context(new_snapc); + dout(" new snapc is %p\n", new_snapc); + } spin_unlock(&ci->i_ceph_lock); + + kfree(capsnap); + ceph_put_snap_context(old_snapc); } /* @@ -699,6 +730,8 @@ more: /* queue realm for cap_snap creation */ list_add(&realm->dirty_item, &dirty_realms); + if (realm->seq > mdsc->last_snap_seq) + mdsc->last_snap_seq = realm->seq; invalidate = 1; } else if (!realm->cached_context) { @@ -964,14 +997,14 @@ out: int __init ceph_snap_init(void) { - empty_snapc = ceph_create_snap_context(0, GFP_NOFS); - if (!empty_snapc) + ceph_empty_snapc = ceph_create_snap_context(0, GFP_NOFS); + if (!ceph_empty_snapc) return -ENOMEM; - empty_snapc->seq = 1; + ceph_empty_snapc->seq = 1; return 0; } void ceph_snap_exit(void) { - ceph_put_snap_context(empty_snapc); + ceph_put_snap_context(ceph_empty_snapc); } diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 4e9905374078..d1c833c321b9 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -134,10 +134,12 @@ enum { Opt_noino32, Opt_fscache, Opt_nofscache, + Opt_poolperm, + Opt_nopoolperm, #ifdef CONFIG_CEPH_FS_POSIX_ACL Opt_acl, #endif - Opt_noacl + Opt_noacl, }; static match_table_t fsopt_tokens = { @@ -165,6 +167,8 @@ static match_table_t fsopt_tokens = { {Opt_noino32, "noino32"}, {Opt_fscache, "fsc"}, {Opt_nofscache, "nofsc"}, + {Opt_poolperm, "poolperm"}, + {Opt_nopoolperm, "nopoolperm"}, #ifdef CONFIG_CEPH_FS_POSIX_ACL {Opt_acl, "acl"}, #endif @@ -268,6 +272,13 @@ static int parse_fsopt_token(char *c, void *private) case Opt_nofscache: fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; break; + case Opt_poolperm: + fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM; + printk ("pool perm"); + break; + case Opt_nopoolperm: + fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; + break; #ifdef CONFIG_CEPH_FS_POSIX_ACL case Opt_acl: fsopt->sb_flags |= MS_POSIXACL; @@ -436,6 +447,8 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) seq_puts(m, ",nodcache"); if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) seq_puts(m, ",fsc"); + if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM) + seq_puts(m, ",nopoolperm"); #ifdef CONFIG_CEPH_FS_POSIX_ACL if (fsopt->sb_flags & MS_POSIXACL) @@ -609,6 +622,7 @@ static void destroy_fs_client(struct ceph_fs_client *fsc) */ struct kmem_cache *ceph_inode_cachep; struct kmem_cache *ceph_cap_cachep; +struct kmem_cache *ceph_cap_flush_cachep; struct kmem_cache *ceph_dentry_cachep; struct kmem_cache *ceph_file_cachep; @@ -634,6 +648,10 @@ static int __init init_caches(void) SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); if (ceph_cap_cachep == NULL) goto bad_cap; + ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, + SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); + if (ceph_cap_flush_cachep == NULL) + goto bad_cap_flush; ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); @@ -652,6 +670,8 @@ static int __init init_caches(void) bad_file: kmem_cache_destroy(ceph_dentry_cachep); bad_dentry: + kmem_cache_destroy(ceph_cap_flush_cachep); +bad_cap_flush: kmem_cache_destroy(ceph_cap_cachep); bad_cap: kmem_cache_destroy(ceph_inode_cachep); @@ -668,6 +688,7 @@ static void destroy_caches(void) kmem_cache_destroy(ceph_inode_cachep); kmem_cache_destroy(ceph_cap_cachep); + kmem_cache_destroy(ceph_cap_flush_cachep); kmem_cache_destroy(ceph_dentry_cachep); kmem_cache_destroy(ceph_file_cachep); @@ -729,7 +750,7 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, req->r_ino1.ino = CEPH_INO_ROOT; req->r_ino1.snap = CEPH_NOSNAP; req->r_started = started; - req->r_timeout = fsc->client->options->mount_timeout * HZ; + req->r_timeout = fsc->client->options->mount_timeout; req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); req->r_num_caps = 2; err = ceph_mdsc_do_request(mdsc, NULL, req); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index fa20e1318939..860cc016e70d 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -35,6 +35,7 @@ #define CEPH_MOUNT_OPT_INO32 (1<<8) /* 32 bit inos */ #define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */ #define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */ +#define CEPH_MOUNT_OPT_NOPOOLPERM (1<<11) /* no pool permission check */ #define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES | \ CEPH_MOUNT_OPT_DCACHE) @@ -121,11 +122,21 @@ struct ceph_cap { struct rb_node ci_node; /* per-ci cap tree */ struct ceph_mds_session *session; struct list_head session_caps; /* per-session caplist */ - int mds; u64 cap_id; /* unique cap id (mds provided) */ - int issued; /* latest, from the mds */ - int implemented; /* implemented superset of issued (for revocation) */ - int mds_wanted; + union { + /* in-use caps */ + struct { + int issued; /* latest, from the mds */ + int implemented; /* implemented superset of + issued (for revocation) */ + int mds, mds_wanted; + }; + /* caps to release */ + struct { + u64 cap_ino; + int queue_release; + }; + }; u32 seq, issue_seq, mseq; u32 cap_gen; /* active/stale cycle */ unsigned long last_used; @@ -163,6 +174,7 @@ struct ceph_cap_snap { int writing; /* a sync write is still in progress */ int dirty_pages; /* dirty pages awaiting writeback */ bool inline_data; + bool need_flush; }; static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap) @@ -174,6 +186,17 @@ static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap) } } +struct ceph_cap_flush { + u64 tid; + int caps; + bool kick; + struct rb_node g_node; // global + union { + struct rb_node i_node; // inode + struct list_head list; + }; +}; + /* * The frag tree describes how a directory is fragmented, potentially across * multiple metadata servers. It is also used to indicate points where @@ -259,9 +282,9 @@ struct ceph_inode_info { u32 i_time_warp_seq; unsigned i_ceph_flags; - int i_ordered_count; - atomic_t i_release_count; - atomic_t i_complete_count; + atomic64_t i_release_count; + atomic64_t i_ordered_count; + atomic64_t i_complete_seq[2]; struct ceph_dir_layout i_dir_layout; struct ceph_file_layout i_layout; @@ -283,11 +306,11 @@ struct ceph_inode_info { struct ceph_cap *i_auth_cap; /* authoritative cap, if any */ unsigned i_dirty_caps, i_flushing_caps; /* mask of dirtied fields */ struct list_head i_dirty_item, i_flushing_item; - u64 i_cap_flush_seq; /* we need to track cap writeback on a per-cap-bit basis, to allow * overlapping, pipelined cap flushes to the mds. we can probably * reduce the tid to 8 bits if we're concerned about inode size. */ - u16 i_cap_flush_last_tid, i_cap_flush_tid[CEPH_CAP_BITS]; + struct ceph_cap_flush *i_prealloc_cap_flush; + struct rb_root i_cap_flush_tree; wait_queue_head_t i_cap_wq; /* threads waiting on a capability */ unsigned long i_hold_caps_min; /* jiffies */ unsigned long i_hold_caps_max; /* jiffies */ @@ -438,36 +461,46 @@ static inline struct inode *ceph_find_inode(struct super_block *sb, /* * Ceph inode. */ -#define CEPH_I_DIR_ORDERED 1 /* dentries in dir are ordered */ -#define CEPH_I_NODELAY 4 /* do not delay cap release */ -#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */ -#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */ +#define CEPH_I_DIR_ORDERED (1 << 0) /* dentries in dir are ordered */ +#define CEPH_I_NODELAY (1 << 1) /* do not delay cap release */ +#define CEPH_I_FLUSH (1 << 2) /* do not delay flush of dirty metadata */ +#define CEPH_I_NOFLUSH (1 << 3) /* do not flush dirty caps */ +#define CEPH_I_POOL_PERM (1 << 4) /* pool rd/wr bits are valid */ +#define CEPH_I_POOL_RD (1 << 5) /* can read from pool */ +#define CEPH_I_POOL_WR (1 << 6) /* can write to pool */ + static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci, - int release_count, int ordered_count) + long long release_count, + long long ordered_count) { - atomic_set(&ci->i_complete_count, release_count); - if (ci->i_ordered_count == ordered_count) - ci->i_ceph_flags |= CEPH_I_DIR_ORDERED; - else - ci->i_ceph_flags &= ~CEPH_I_DIR_ORDERED; + smp_mb__before_atomic(); + atomic64_set(&ci->i_complete_seq[0], release_count); + atomic64_set(&ci->i_complete_seq[1], ordered_count); } static inline void __ceph_dir_clear_complete(struct ceph_inode_info *ci) { - atomic_inc(&ci->i_release_count); + atomic64_inc(&ci->i_release_count); +} + +static inline void __ceph_dir_clear_ordered(struct ceph_inode_info *ci) +{ + atomic64_inc(&ci->i_ordered_count); } static inline bool __ceph_dir_is_complete(struct ceph_inode_info *ci) { - return atomic_read(&ci->i_complete_count) == - atomic_read(&ci->i_release_count); + return atomic64_read(&ci->i_complete_seq[0]) == + atomic64_read(&ci->i_release_count); } static inline bool __ceph_dir_is_complete_ordered(struct ceph_inode_info *ci) { - return __ceph_dir_is_complete(ci) && - (ci->i_ceph_flags & CEPH_I_DIR_ORDERED); + return atomic64_read(&ci->i_complete_seq[0]) == + atomic64_read(&ci->i_release_count) && + atomic64_read(&ci->i_complete_seq[1]) == + atomic64_read(&ci->i_ordered_count); } static inline void ceph_dir_clear_complete(struct inode *inode) @@ -477,20 +510,13 @@ static inline void ceph_dir_clear_complete(struct inode *inode) static inline void ceph_dir_clear_ordered(struct inode *inode) { - struct ceph_inode_info *ci = ceph_inode(inode); - spin_lock(&ci->i_ceph_lock); - ci->i_ordered_count++; - ci->i_ceph_flags &= ~CEPH_I_DIR_ORDERED; - spin_unlock(&ci->i_ceph_lock); + __ceph_dir_clear_ordered(ceph_inode(inode)); } static inline bool ceph_dir_is_complete_ordered(struct inode *inode) { - struct ceph_inode_info *ci = ceph_inode(inode); - bool ret; - spin_lock(&ci->i_ceph_lock); - ret = __ceph_dir_is_complete_ordered(ci); - spin_unlock(&ci->i_ceph_lock); + bool ret = __ceph_dir_is_complete_ordered(ceph_inode(inode)); + smp_rmb(); return ret; } @@ -552,7 +578,10 @@ static inline int __ceph_caps_dirty(struct ceph_inode_info *ci) { return ci->i_dirty_caps | ci->i_flushing_caps; } -extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask); +extern struct ceph_cap_flush *ceph_alloc_cap_flush(void); +extern void ceph_free_cap_flush(struct ceph_cap_flush *cf); +extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask, + struct ceph_cap_flush **pcf); extern int __ceph_caps_revoking_other(struct ceph_inode_info *ci, struct ceph_cap *ocap, int mask); @@ -606,16 +635,20 @@ struct ceph_file_info { unsigned offset; /* offset of last chunk, adjusted for . and .. */ unsigned next_offset; /* offset of next chunk (last_name's + 1) */ char *last_name; /* last entry in previous chunk */ - struct dentry *dentry; /* next dentry (for dcache readdir) */ - int dir_release_count; - int dir_ordered_count; + long long dir_release_count; + long long dir_ordered_count; + int readdir_cache_idx; /* used for -o dirstat read() on directory thing */ char *dir_info; int dir_info_len; }; - +struct ceph_readdir_cache_control { + struct page *page; + struct dentry **dentries; + int index; +}; /* * A "snap realm" describes a subset of the file hierarchy sharing @@ -687,6 +720,7 @@ static inline int default_congestion_kb(void) /* snap.c */ +extern struct ceph_snap_context *ceph_empty_snapc; struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc, u64 ino); extern void ceph_get_snap_realm(struct ceph_mds_client *mdsc, @@ -713,8 +747,8 @@ extern void ceph_snap_exit(void); static inline bool __ceph_have_pending_cap_snap(struct ceph_inode_info *ci) { return !list_empty(&ci->i_cap_snaps) && - list_entry(ci->i_cap_snaps.prev, struct ceph_cap_snap, - ci_item)->writing; + list_last_entry(&ci->i_cap_snaps, struct ceph_cap_snap, + ci_item)->writing; } /* inode.c */ @@ -838,12 +872,12 @@ extern void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap); extern int ceph_is_any_caps(struct inode *inode); -extern void __queue_cap_release(struct ceph_mds_session *session, u64 ino, - u64 cap_id, u32 migrate_seq, u32 issue_seq); extern void ceph_queue_caps_release(struct inode *inode); extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc); extern int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync); +extern void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session); extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, struct ceph_mds_session *session); extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, @@ -879,6 +913,9 @@ extern void ceph_put_fmode(struct ceph_inode_info *ci, int mode); /* addr.c */ extern const struct address_space_operations ceph_aops; extern int ceph_mmap(struct file *file, struct vm_area_struct *vma); +extern int ceph_uninline_data(struct file *filp, struct page *locked_page); +extern int ceph_pool_perm_check(struct ceph_inode_info *ci, int need); +extern void ceph_pool_perm_destroy(struct ceph_mds_client* mdsc); /* file.c */ extern const struct file_operations ceph_file_fops; @@ -890,7 +927,6 @@ extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, extern int ceph_release(struct inode *inode, struct file *filp); extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, char *data, size_t len); -int ceph_uninline_data(struct file *filp, struct page *locked_page); /* dir.c */ extern const struct file_operations ceph_dir_fops; extern const struct file_operations ceph_snapdir_fops; @@ -911,6 +947,7 @@ extern void ceph_dentry_lru_del(struct dentry *dn); extern void ceph_invalidate_dentry_lease(struct dentry *dentry); extern unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn); extern struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry); +extern void ceph_readdir_cache_release(struct ceph_readdir_cache_control *ctl); /* * our d_ops vary depending on whether the inode is live, diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index cd7ffad4041d..819163d8313b 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -911,6 +911,8 @@ int __ceph_setxattr(struct dentry *dentry, const char *name, struct inode *inode = d_inode(dentry); struct ceph_vxattr *vxattr; struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; + struct ceph_cap_flush *prealloc_cf = NULL; int issued; int err; int dirty = 0; @@ -920,6 +922,7 @@ int __ceph_setxattr(struct dentry *dentry, const char *name, char *newval = NULL; struct ceph_inode_xattr *xattr = NULL; int required_blob_size; + bool lock_snap_rwsem = false; if (!ceph_is_valid_xattr(name)) return -EOPNOTSUPP; @@ -948,12 +951,27 @@ int __ceph_setxattr(struct dentry *dentry, const char *name, if (!xattr) goto out; + prealloc_cf = ceph_alloc_cap_flush(); + if (!prealloc_cf) + goto out; + spin_lock(&ci->i_ceph_lock); retry: issued = __ceph_caps_issued(ci, NULL); - dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued)); if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL)) goto do_sync; + + if (!lock_snap_rwsem && !ci->i_head_snapc) { + lock_snap_rwsem = true; + if (!down_read_trylock(&mdsc->snap_rwsem)) { + spin_unlock(&ci->i_ceph_lock); + down_read(&mdsc->snap_rwsem); + spin_lock(&ci->i_ceph_lock); + goto retry; + } + } + + dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued)); __build_xattrs(inode); required_blob_size = __get_required_blob_size(ci, name_len, val_len); @@ -966,7 +984,7 @@ retry: dout(" preaallocating new blob size=%d\n", required_blob_size); blob = ceph_buffer_new(required_blob_size, GFP_NOFS); if (!blob) - goto out; + goto do_sync_unlocked; spin_lock(&ci->i_ceph_lock); if (ci->i_xattrs.prealloc_blob) ceph_buffer_put(ci->i_xattrs.prealloc_blob); @@ -978,21 +996,28 @@ retry: flags, value ? 1 : -1, &xattr); if (!err) { - dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); + dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL, + &prealloc_cf); ci->i_xattrs.dirty = true; inode->i_ctime = CURRENT_TIME; } spin_unlock(&ci->i_ceph_lock); + if (lock_snap_rwsem) + up_read(&mdsc->snap_rwsem); if (dirty) __mark_inode_dirty(inode, dirty); + ceph_free_cap_flush(prealloc_cf); return err; do_sync: spin_unlock(&ci->i_ceph_lock); do_sync_unlocked: + if (lock_snap_rwsem) + up_read(&mdsc->snap_rwsem); err = ceph_sync_setxattr(dentry, name, value, size, flags); out: + ceph_free_cap_flush(prealloc_cf); kfree(newname); kfree(newval); kfree(xattr); @@ -1044,10 +1069,13 @@ int __ceph_removexattr(struct dentry *dentry, const char *name) struct inode *inode = d_inode(dentry); struct ceph_vxattr *vxattr; struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; + struct ceph_cap_flush *prealloc_cf = NULL; int issued; int err; int required_blob_size; int dirty; + bool lock_snap_rwsem = false; if (!ceph_is_valid_xattr(name)) return -EOPNOTSUPP; @@ -1060,14 +1088,29 @@ int __ceph_removexattr(struct dentry *dentry, const char *name) if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN)) goto do_sync_unlocked; + prealloc_cf = ceph_alloc_cap_flush(); + if (!prealloc_cf) + return -ENOMEM; + err = -ENOMEM; spin_lock(&ci->i_ceph_lock); retry: issued = __ceph_caps_issued(ci, NULL); - dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued)); - if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL)) goto do_sync; + + if (!lock_snap_rwsem && !ci->i_head_snapc) { + lock_snap_rwsem = true; + if (!down_read_trylock(&mdsc->snap_rwsem)) { + spin_unlock(&ci->i_ceph_lock); + down_read(&mdsc->snap_rwsem); + spin_lock(&ci->i_ceph_lock); + goto retry; + } + } + + dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued)); + __build_xattrs(inode); required_blob_size = __get_required_blob_size(ci, 0, 0); @@ -1080,7 +1123,7 @@ retry: dout(" preaallocating new blob size=%d\n", required_blob_size); blob = ceph_buffer_new(required_blob_size, GFP_NOFS); if (!blob) - goto out; + goto do_sync_unlocked; spin_lock(&ci->i_ceph_lock); if (ci->i_xattrs.prealloc_blob) ceph_buffer_put(ci->i_xattrs.prealloc_blob); @@ -1090,18 +1133,24 @@ retry: err = __remove_xattr_by_name(ceph_inode(inode), name); - dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); + dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL, + &prealloc_cf); ci->i_xattrs.dirty = true; inode->i_ctime = CURRENT_TIME; spin_unlock(&ci->i_ceph_lock); + if (lock_snap_rwsem) + up_read(&mdsc->snap_rwsem); if (dirty) __mark_inode_dirty(inode, dirty); + ceph_free_cap_flush(prealloc_cf); return err; do_sync: spin_unlock(&ci->i_ceph_lock); do_sync_unlocked: + if (lock_snap_rwsem) + up_read(&mdsc->snap_rwsem); + ceph_free_cap_flush(prealloc_cf); err = ceph_send_removexattr(dentry, name); -out: return err; } diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h index d6f7a76a1f5b..f829fe963f5b 100644 --- a/fs/coda/coda_linux.h +++ b/fs/coda/coda_linux.h @@ -79,7 +79,7 @@ void coda_sysctl_clean(void); static inline struct coda_inode_info *ITOC(struct inode *inode) { - return list_entry(inode, struct coda_inode_info, vfs_inode); + return container_of(inode, struct coda_inode_info, vfs_inode); } static __inline__ struct CodaFid *coda_i2f(struct inode *inode) diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 6b8e2f091f5b..48851f6ea6ec 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -896,6 +896,7 @@ COMPATIBLE_IOCTL(FIGETBSZ) /* 'X' - originally XFS but some now in the VFS */ COMPATIBLE_IOCTL(FIFREEZE) COMPATIBLE_IOCTL(FITHAW) +COMPATIBLE_IOCTL(FITRIM) COMPATIBLE_IOCTL(KDGETKEYCODE) COMPATIBLE_IOCTL(KDSETKEYCODE) COMPATIBLE_IOCTL(KDGKBTYPE) diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 8d89f5fd0331..eae87575e681 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -236,7 +236,7 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent) if (dentry) { spin_lock(&dentry->d_lock); - if (!d_unhashed(dentry) && d_really_is_positive(dentry)) { + if (simple_positive(dentry)) { dget_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index 537356742091..a8f3b589a2df 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -129,8 +129,6 @@ void configfs_release_fs(void) } -static struct kobject *config_kobj; - static int __init configfs_init(void) { int err = -ENOMEM; @@ -141,8 +139,8 @@ static int __init configfs_init(void) if (!configfs_dir_cachep) goto out; - config_kobj = kobject_create_and_add("config", kernel_kobj); - if (!config_kobj) + err = sysfs_create_mount_point(kernel_kobj, "config"); + if (err) goto out2; err = register_filesystem(&configfs_fs_type); @@ -152,7 +150,7 @@ static int __init configfs_init(void) return 0; out3: pr_err("Unable to register filesystem!\n"); - kobject_put(config_kobj); + sysfs_remove_mount_point(kernel_kobj, "config"); out2: kmem_cache_destroy(configfs_dir_cachep); configfs_dir_cachep = NULL; @@ -163,7 +161,7 @@ out: static void __exit configfs_exit(void) { unregister_filesystem(&configfs_fs_type); - kobject_put(config_kobj); + sysfs_remove_mount_point(kernel_kobj, "config"); kmem_cache_destroy(configfs_dir_cachep); configfs_dir_cachep = NULL; } diff --git a/fs/coredump.c b/fs/coredump.c index e52e0064feac..c5ecde6f3eed 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -140,7 +140,7 @@ static int cn_print_exe_file(struct core_name *cn) goto put_exe_file; } - path = d_path(&exe_file->f_path, pathbuf, PATH_MAX); + path = file_path(exe_file, pathbuf, PATH_MAX); if (IS_ERR(path)) { ret = PTR_ERR(path); goto free_buf; @@ -155,7 +155,7 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter, } if (iov_iter_rw(iter) == WRITE) - len = copy_from_iter(addr, max - pos, iter); + len = copy_from_iter_nocache(addr, max - pos, iter); else if (!hole) len = copy_to_iter(addr, max - pos, iter); else @@ -209,7 +209,8 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode, } /* Protects against truncate */ - inode_dio_begin(inode); + if (!(flags & DIO_SKIP_DIO_COUNT)) + inode_dio_begin(inode); retval = dax_io(inode, iter, pos, end, get_block, &bh); @@ -219,7 +220,8 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode, if ((retval > 0) && end_io) end_io(iocb, pos, retval, bh.b_private); - inode_dio_end(inode); + if (!(flags & DIO_SKIP_DIO_COUNT)) + inode_dio_end(inode); out: return retval; } diff --git a/fs/dcache.c b/fs/dcache.c index 592c4b582495..5c8ea15e73a5 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -642,7 +642,7 @@ static inline bool fast_dput(struct dentry *dentry) /* * If we have a d_op->d_delete() operation, we sould not - * let the dentry count go to zero, so use "put__or_lock". + * let the dentry count go to zero, so use "put_or_lock". */ if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) return lockref_put_or_lock(&dentry->d_lockref); @@ -697,7 +697,7 @@ static inline bool fast_dput(struct dentry *dentry) */ smp_rmb(); d_flags = ACCESS_ONCE(dentry->d_flags); - d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST; + d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST | DCACHE_DISCONNECTED; /* Nothing to do? Dropping the reference was all we needed? */ if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry)) @@ -776,6 +776,9 @@ repeat: if (unlikely(d_unhashed(dentry))) goto kill_it; + if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) + goto kill_it; + if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) { if (dentry->d_op->d_delete(dentry)) goto kill_it; @@ -1673,7 +1676,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) DCACHE_OP_COMPARE | DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE | - DCACHE_OP_DELETE )); + DCACHE_OP_DELETE | + DCACHE_OP_SELECT_INODE)); dentry->d_op = op; if (!op) return; @@ -1689,6 +1693,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) dentry->d_flags |= DCACHE_OP_DELETE; if (op->d_prune) dentry->d_flags |= DCACHE_OP_PRUNE; + if (op->d_select_inode) + dentry->d_flags |= DCACHE_OP_SELECT_INODE; } EXPORT_SYMBOL(d_set_d_op); @@ -2927,17 +2933,6 @@ restart: vfsmnt = &mnt->mnt; continue; } - /* - * Filesystems needing to implement special "root names" - * should do so with ->d_dname() - */ - if (IS_ROOT(dentry) && - (dentry->d_name.len != 1 || - dentry->d_name.name[0] != '/')) { - WARN(1, "Root dentry has weird name <%.*s>\n", - (int) dentry->d_name.len, - dentry->d_name.name); - } if (!error) error = is_mounted(vfsmnt) ? 1 : 2; break; diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 7eaec88ea970..c711be8d6a3c 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -44,11 +44,6 @@ static struct inode *debugfs_get_inode(struct super_block *sb) return inode; } -static inline int debugfs_positive(struct dentry *dentry) -{ - return d_really_is_positive(dentry) && !d_unhashed(dentry); -} - struct debugfs_mount_opts { kuid_t uid; kgid_t gid; @@ -522,7 +517,7 @@ static int __debugfs_remove(struct dentry *dentry, struct dentry *parent) { int ret = 0; - if (debugfs_positive(dentry)) { + if (simple_positive(dentry)) { dget(dentry); if (d_is_dir(dentry)) ret = simple_rmdir(d_inode(parent), dentry); @@ -602,7 +597,7 @@ void debugfs_remove_recursive(struct dentry *dentry) */ spin_lock(&parent->d_lock); list_for_each_entry(child, &parent->d_subdirs, d_child) { - if (!debugfs_positive(child)) + if (!simple_positive(child)) continue; /* perhaps simple_empty(child) makes more sense */ @@ -623,7 +618,7 @@ void debugfs_remove_recursive(struct dentry *dentry) * from d_subdirs. When releasing the parent->d_lock we can * no longer trust that the next pointer is valid. * Restart the loop. We'll skip this one with the - * debugfs_positive() check. + * simple_positive() check. */ goto loop; } @@ -716,20 +711,17 @@ bool debugfs_initialized(void) } EXPORT_SYMBOL_GPL(debugfs_initialized); - -static struct kobject *debug_kobj; - static int __init debugfs_init(void) { int retval; - debug_kobj = kobject_create_and_add("debug", kernel_kobj); - if (!debug_kobj) - return -EINVAL; + retval = sysfs_create_mount_point(kernel_kobj, "debug"); + if (retval) + return retval; retval = register_filesystem(&debug_fs_type); if (retval) - kobject_put(debug_kobj); + sysfs_remove_mount_point(kernel_kobj, "debug"); else debugfs_registered = true; diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 72afcc629d7b..feef8a9c4de7 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -325,7 +325,6 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return rc; switch (cmd) { - case FITRIM: case FS_IOC32_GETFLAGS: case FS_IOC32_SETFLAGS: case FS_IOC32_GETVERSION: diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c index 4deb0b05b011..e5bb2abf77f9 100644 --- a/fs/exofs/dir.c +++ b/fs/exofs/dir.c @@ -44,12 +44,6 @@ static inline void exofs_put_page(struct page *page) page_cache_release(page); } -/* Accesses dir's inode->i_size must be called under inode lock */ -static inline unsigned long dir_pages(struct inode *inode) -{ - return (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; -} - static unsigned exofs_last_byte(struct inode *inode, unsigned long page_nr) { loff_t last_byte = inode->i_size; diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 796b491e6978..0c6638b40f21 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -70,11 +70,6 @@ static inline void ext2_put_page(struct page *page) page_cache_release(page); } -static inline unsigned long dir_pages(struct inode *inode) -{ - return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; -} - /* * Return the offset into page `page_nr' of the last valid * byte in that page, plus one. diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index aadb72828834..2553aa8b608d 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -504,7 +504,7 @@ __read_extent_tree_block(const char *function, unsigned int line, struct buffer_head *bh; int err; - bh = sb_getblk(inode->i_sb, pblk); + bh = sb_getblk_gfp(inode->i_sb, pblk, __GFP_MOVABLE | GFP_NOFS); if (unlikely(!bh)) return ERR_PTR(-ENOMEM); @@ -1089,7 +1089,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, err = -EIO; goto cleanup; } - bh = sb_getblk(inode->i_sb, newblock); + bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS); if (unlikely(!bh)) { err = -ENOMEM; goto cleanup; @@ -1283,7 +1283,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, if (newblock == 0) return err; - bh = sb_getblk(inode->i_sb, newblock); + bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS); if (unlikely(!bh)) return -ENOMEM; lock_buffer(bh); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 41f8e55afcd1..cecf9aa10811 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1323,7 +1323,7 @@ static void ext4_da_page_release_reservation(struct page *page, unsigned int offset, unsigned int length) { - int to_release = 0; + int to_release = 0, contiguous_blks = 0; struct buffer_head *head, *bh; unsigned int curr_off = 0; struct inode *inode = page->mapping->host; @@ -1344,14 +1344,23 @@ static void ext4_da_page_release_reservation(struct page *page, if ((offset <= curr_off) && (buffer_delay(bh))) { to_release++; + contiguous_blks++; clear_buffer_delay(bh); + } else if (contiguous_blks) { + lblk = page->index << + (PAGE_CACHE_SHIFT - inode->i_blkbits); + lblk += (curr_off >> inode->i_blkbits) - + contiguous_blks; + ext4_es_remove_extent(inode, lblk, contiguous_blks); + contiguous_blks = 0; } curr_off = next_off; } while ((bh = bh->b_this_page) != head); - if (to_release) { + if (contiguous_blks) { lblk = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); - ext4_es_remove_extent(inode, lblk, to_release); + lblk += (curr_off >> inode->i_blkbits) - contiguous_blks; + ext4_es_remove_extent(inode, lblk, contiguous_blks); } /* If we have released all the blocks belonging to a cluster, then we @@ -4344,7 +4353,12 @@ static void ext4_update_other_inodes_time(struct super_block *sb, int inode_size = EXT4_INODE_SIZE(sb); oi.orig_ino = orig_ino; - ino = (orig_ino & ~(inodes_per_block - 1)) + 1; + /* + * Calculate the first inode in the inode table block. Inode + * numbers are one-based. That is, the first inode in a block + * (assuming 4k blocks and 256 byte inodes) is (n*16 + 1). + */ + ino = ((orig_ino - 1) & ~(inodes_per_block - 1)) + 1; for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) { if (ino == orig_ino) continue; diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index cb8451246b30..1346cfa355d0 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -755,7 +755,6 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return err; } case EXT4_IOC_MOVE_EXT: - case FITRIM: case EXT4_IOC_RESIZE_FS: case EXT4_IOC_PRECACHE_EXTENTS: case EXT4_IOC_SET_ENCRYPTION_POLICY: diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index f6aedf88da43..34b610ea5030 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4816,18 +4816,12 @@ do_more: /* * blocks being freed are metadata. these blocks shouldn't * be used until this transaction is committed + * + * We use __GFP_NOFAIL because ext4_free_blocks() is not allowed + * to fail. */ - retry: - new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); - if (!new_entry) { - /* - * We use a retry loop because - * ext4_free_blocks() is not allowed to fail. - */ - cond_resched(); - congestion_wait(BLK_RW_ASYNC, HZ/50); - goto retry; - } + new_entry = kmem_cache_alloc(ext4_free_data_cachep, + GFP_NOFS|__GFP_NOFAIL); new_entry->efd_start_cluster = bit; new_entry->efd_group = block_group; new_entry->efd_count = count_clusters; diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index b52374e42102..6163ad21cb0e 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -620,6 +620,7 @@ int ext4_ind_migrate(struct inode *inode) struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_extent *ex; unsigned int i, len; + ext4_lblk_t start, end; ext4_fsblk_t blk; handle_t *handle; int ret; @@ -633,6 +634,14 @@ int ext4_ind_migrate(struct inode *inode) EXT4_FEATURE_RO_COMPAT_BIGALLOC)) return -EOPNOTSUPP; + /* + * In order to get correct extent info, force all delayed allocation + * blocks to be allocated, otherwise delayed allocation blocks may not + * be reflected and bypass the checks on extent header. + */ + if (test_opt(inode->i_sb, DELALLOC)) + ext4_alloc_da_blocks(inode); + handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -650,11 +659,13 @@ int ext4_ind_migrate(struct inode *inode) goto errout; } if (eh->eh_entries == 0) - blk = len = 0; + blk = len = start = end = 0; else { len = le16_to_cpu(ex->ee_len); blk = ext4_ext_pblock(ex); - if (len > EXT4_NDIR_BLOCKS) { + start = le32_to_cpu(ex->ee_block); + end = start + len - 1; + if (end >= EXT4_NDIR_BLOCKS) { ret = -EOPNOTSUPP; goto errout; } @@ -662,7 +673,7 @@ int ext4_ind_migrate(struct inode *inode) ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); memset(ei->i_data, 0, sizeof(ei->i_data)); - for (i=0; i < len; i++) + for (i = start; i <= end; i++) ei->i_data[i] = cpu_to_le32(blk++); ext4_mark_inode_dirty(handle, inode); errout: diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 5c787647afe2..58987b5c514b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -452,7 +452,7 @@ void __ext4_error_file(struct file *file, const char *function, es = EXT4_SB(inode->i_sb)->s_es; es->s_last_error_ino = cpu_to_le32(inode->i_ino); if (ext4_error_ratelimit(inode->i_sb)) { - path = d_path(&(file->f_path), pathname, sizeof(pathname)); + path = file_path(file, pathname, sizeof(pathname)); if (IS_ERR(path)) path = "(unknown)"; va_start(args, fmt); diff --git a/fs/file.c b/fs/file.c index 93c5f89c248b..6c672ad329e9 100644 --- a/fs/file.c +++ b/fs/file.c @@ -147,6 +147,13 @@ static int expand_fdtable(struct files_struct *files, int nr) spin_unlock(&files->file_lock); new_fdt = alloc_fdtable(nr); + + /* make sure all __fd_install() have seen resize_in_progress + * or have finished their rcu_read_lock_sched() section. + */ + if (atomic_read(&files->count) > 1) + synchronize_sched(); + spin_lock(&files->file_lock); if (!new_fdt) return -ENOMEM; @@ -158,21 +165,14 @@ static int expand_fdtable(struct files_struct *files, int nr) __free_fdtable(new_fdt); return -EMFILE; } - /* - * Check again since another task may have expanded the fd table while - * we dropped the lock - */ cur_fdt = files_fdtable(files); - if (nr >= cur_fdt->max_fds) { - /* Continue as planned */ - copy_fdtable(new_fdt, cur_fdt); - rcu_assign_pointer(files->fdt, new_fdt); - if (cur_fdt != &files->fdtab) - call_rcu(&cur_fdt->rcu, free_fdtable_rcu); - } else { - /* Somebody else expanded, so undo our attempt */ - __free_fdtable(new_fdt); - } + BUG_ON(nr < cur_fdt->max_fds); + copy_fdtable(new_fdt, cur_fdt); + rcu_assign_pointer(files->fdt, new_fdt); + if (cur_fdt != &files->fdtab) + call_rcu(&cur_fdt->rcu, free_fdtable_rcu); + /* coupled with smp_rmb() in __fd_install() */ + smp_wmb(); return 1; } @@ -185,21 +185,38 @@ static int expand_fdtable(struct files_struct *files, int nr) * The files->file_lock should be held on entry, and will be held on exit. */ static int expand_files(struct files_struct *files, int nr) + __releases(files->file_lock) + __acquires(files->file_lock) { struct fdtable *fdt; + int expanded = 0; +repeat: fdt = files_fdtable(files); /* Do we need to expand? */ if (nr < fdt->max_fds) - return 0; + return expanded; /* Can we expand? */ if (nr >= sysctl_nr_open) return -EMFILE; + if (unlikely(files->resize_in_progress)) { + spin_unlock(&files->file_lock); + expanded = 1; + wait_event(files->resize_wait, !files->resize_in_progress); + spin_lock(&files->file_lock); + goto repeat; + } + /* All good, so we try */ - return expand_fdtable(files, nr); + files->resize_in_progress = true; + expanded = expand_fdtable(files, nr); + files->resize_in_progress = false; + + wake_up_all(&files->resize_wait); + return expanded; } static inline void __set_close_on_exec(int fd, struct fdtable *fdt) @@ -256,6 +273,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) atomic_set(&newf->count, 1); spin_lock_init(&newf->file_lock); + newf->resize_in_progress = false; + init_waitqueue_head(&newf->resize_wait); newf->next_fd = 0; new_fdt = &newf->fdtab; new_fdt->max_fds = NR_OPEN_DEFAULT; @@ -553,11 +572,21 @@ void __fd_install(struct files_struct *files, unsigned int fd, struct file *file) { struct fdtable *fdt; - spin_lock(&files->file_lock); - fdt = files_fdtable(files); + + might_sleep(); + rcu_read_lock_sched(); + + while (unlikely(files->resize_in_progress)) { + rcu_read_unlock_sched(); + wait_event(files->resize_wait, !files->resize_in_progress); + rcu_read_lock_sched(); + } + /* coupled with smp_wmb() in expand_fdtable() */ + smp_rmb(); + fdt = rcu_dereference_sched(files->fdt); BUG_ON(fdt->fd[fd] != NULL); rcu_assign_pointer(fdt->fd[fd], file); - spin_unlock(&files->file_lock); + rcu_read_unlock_sched(); } void fd_install(unsigned int fd, struct file *file) @@ -635,11 +664,17 @@ static struct file *__fget(unsigned int fd, fmode_t mask) struct file *file; rcu_read_lock(); +loop: file = fcheck_files(files, fd); if (file) { - /* File object ref couldn't be taken */ - if ((file->f_mode & mask) || !get_file_rcu(file)) + /* File object ref couldn't be taken. + * dup2() atomicity guarantee is the reason + * we loop to catch the new file (or NULL pointer) + */ + if (file->f_mode & mask) file = NULL; + else if (!get_file_rcu(file)) + goto loop; } rcu_read_unlock(); diff --git a/fs/file_table.c b/fs/file_table.c index 294174dcc226..7f9d407c7595 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -20,7 +20,6 @@ #include <linux/cdev.h> #include <linux/fsnotify.h> #include <linux/sysctl.h> -#include <linux/lglock.h> #include <linux/percpu_counter.h> #include <linux/percpu.h> #include <linux/hardirq.h> diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c index 99c7f0a37af4..484b32d3234a 100644 --- a/fs/freevxfs/vxfs_lookup.c +++ b/fs/freevxfs/vxfs_lookup.c @@ -61,13 +61,6 @@ const struct file_operations vxfs_dir_operations = { .iterate = vxfs_readdir, }; - -static inline u_long -dir_pages(struct inode *inode) -{ - return (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; -} - static inline u_long dir_blocks(struct inode *ip) { diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 89acec742e0b..d403c69bee08 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -327,7 +327,8 @@ static int fscache_alloc_object(struct fscache_cache *cache, object_already_extant: ret = -ENOBUFS; - if (fscache_object_is_dead(object)) { + if (fscache_object_is_dying(object) || + fscache_cache_is_broken(object)) { spin_unlock(&cookie->lock); goto error; } @@ -671,7 +672,7 @@ int __fscache_check_consistency(struct fscache_cookie *cookie) if (!op) return -ENOMEM; - fscache_operation_init(op, NULL, NULL); + fscache_operation_init(op, NULL, NULL, NULL); op->flags = FSCACHE_OP_MYTHREAD | (1 << FSCACHE_OP_WAITING) | (1 << FSCACHE_OP_UNUSE_COOKIE); @@ -695,8 +696,7 @@ int __fscache_check_consistency(struct fscache_cookie *cookie) /* the work queue now carries its own ref on the object */ spin_unlock(&cookie->lock); - ret = fscache_wait_for_operation_activation(object, op, - NULL, NULL, NULL); + ret = fscache_wait_for_operation_activation(object, op, NULL, NULL); if (ret == 0) { /* ask the cache to honour the operation */ ret = object->cache->ops->check_consistency(op); diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 7872a62ef30c..97ec45110957 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -124,8 +124,7 @@ extern int fscache_submit_exclusive_op(struct fscache_object *, struct fscache_operation *); extern int fscache_submit_op(struct fscache_object *, struct fscache_operation *); -extern int fscache_cancel_op(struct fscache_operation *, - void (*)(struct fscache_operation *)); +extern int fscache_cancel_op(struct fscache_operation *, bool); extern void fscache_cancel_all_ops(struct fscache_object *); extern void fscache_abort_object(struct fscache_object *); extern void fscache_start_operations(struct fscache_object *); @@ -138,8 +137,7 @@ extern int fscache_wait_for_deferred_lookup(struct fscache_cookie *); extern int fscache_wait_for_operation_activation(struct fscache_object *, struct fscache_operation *, atomic_t *, - atomic_t *, - void (*)(struct fscache_operation *)); + atomic_t *); extern void fscache_invalidate_writes(struct fscache_cookie *); /* @@ -164,6 +162,7 @@ extern atomic_t fscache_n_op_pend; extern atomic_t fscache_n_op_run; extern atomic_t fscache_n_op_enqueue; extern atomic_t fscache_n_op_deferred_release; +extern atomic_t fscache_n_op_initialised; extern atomic_t fscache_n_op_release; extern atomic_t fscache_n_op_gc; extern atomic_t fscache_n_op_cancelled; @@ -271,6 +270,11 @@ extern atomic_t fscache_n_cop_write_page; extern atomic_t fscache_n_cop_uncache_page; extern atomic_t fscache_n_cop_dissociate_pages; +extern atomic_t fscache_n_cache_no_space_reject; +extern atomic_t fscache_n_cache_stale_objects; +extern atomic_t fscache_n_cache_retired_objects; +extern atomic_t fscache_n_cache_culled_objects; + static inline void fscache_stat(atomic_t *stat) { atomic_inc(stat); diff --git a/fs/fscache/object.c b/fs/fscache/object.c index da032daf0e0d..9e792e30f4db 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -328,6 +328,17 @@ void fscache_object_init(struct fscache_object *object, EXPORT_SYMBOL(fscache_object_init); /* + * Mark the object as no longer being live, making sure that we synchronise + * against op submission. + */ +static inline void fscache_mark_object_dead(struct fscache_object *object) +{ + spin_lock(&object->lock); + clear_bit(FSCACHE_OBJECT_IS_LIVE, &object->flags); + spin_unlock(&object->lock); +} + +/* * Abort object initialisation before we start it. */ static const struct fscache_state *fscache_abort_initialisation(struct fscache_object *object, @@ -610,6 +621,8 @@ static const struct fscache_state *fscache_lookup_failure(struct fscache_object object->cache->ops->lookup_complete(object); fscache_stat_d(&fscache_n_cop_lookup_complete); + set_bit(FSCACHE_OBJECT_KILLED_BY_CACHE, &object->flags); + cookie = object->cookie; set_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags); if (test_and_clear_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags)) @@ -629,7 +642,7 @@ static const struct fscache_state *fscache_kill_object(struct fscache_object *ob _enter("{OBJ%x,%d,%d},%d", object->debug_id, object->n_ops, object->n_children, event); - clear_bit(FSCACHE_OBJECT_IS_LIVE, &object->flags); + fscache_mark_object_dead(object); object->oob_event_mask = 0; if (list_empty(&object->dependents) && @@ -948,7 +961,8 @@ static const struct fscache_state *_fscache_invalidate_object(struct fscache_obj if (!op) goto nomem; - fscache_operation_init(op, object->cache->ops->invalidate_object, NULL); + fscache_operation_init(op, object->cache->ops->invalidate_object, + NULL, NULL); op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE) | (1 << FSCACHE_OP_UNUSE_COOKIE); @@ -974,13 +988,13 @@ static const struct fscache_state *_fscache_invalidate_object(struct fscache_obj return transit_to(UPDATE_OBJECT); nomem: - clear_bit(FSCACHE_OBJECT_IS_LIVE, &object->flags); + fscache_mark_object_dead(object); fscache_unuse_cookie(object); _leave(" [ENOMEM]"); return transit_to(KILL_OBJECT); submit_op_failed: - clear_bit(FSCACHE_OBJECT_IS_LIVE, &object->flags); + fscache_mark_object_dead(object); spin_unlock(&cookie->lock); fscache_unuse_cookie(object); kfree(op); @@ -1016,3 +1030,50 @@ static const struct fscache_state *fscache_update_object(struct fscache_object * _leave(""); return transit_to(WAIT_FOR_CMD); } + +/** + * fscache_object_retrying_stale - Note retrying stale object + * @object: The object that will be retried + * + * Note that an object lookup found an on-disk object that was adjudged to be + * stale and has been deleted. The lookup will be retried. + */ +void fscache_object_retrying_stale(struct fscache_object *object) +{ + fscache_stat(&fscache_n_cache_no_space_reject); +} +EXPORT_SYMBOL(fscache_object_retrying_stale); + +/** + * fscache_object_mark_killed - Note that an object was killed + * @object: The object that was culled + * @why: The reason the object was killed. + * + * Note that an object was killed. Returns true if the object was + * already marked killed, false if it wasn't. + */ +void fscache_object_mark_killed(struct fscache_object *object, + enum fscache_why_object_killed why) +{ + if (test_and_set_bit(FSCACHE_OBJECT_KILLED_BY_CACHE, &object->flags)) { + pr_err("Error: Object already killed by cache [%s]\n", + object->cache->identifier); + return; + } + + switch (why) { + case FSCACHE_OBJECT_NO_SPACE: + fscache_stat(&fscache_n_cache_no_space_reject); + break; + case FSCACHE_OBJECT_IS_STALE: + fscache_stat(&fscache_n_cache_stale_objects); + break; + case FSCACHE_OBJECT_WAS_RETIRED: + fscache_stat(&fscache_n_cache_retired_objects); + break; + case FSCACHE_OBJECT_WAS_CULLED: + fscache_stat(&fscache_n_cache_culled_objects); + break; + } +} +EXPORT_SYMBOL(fscache_object_mark_killed); diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index e7b87a0e5185..de67745e1cd7 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -20,6 +20,35 @@ atomic_t fscache_op_debug_id; EXPORT_SYMBOL(fscache_op_debug_id); +static void fscache_operation_dummy_cancel(struct fscache_operation *op) +{ +} + +/** + * fscache_operation_init - Do basic initialisation of an operation + * @op: The operation to initialise + * @release: The release function to assign + * + * Do basic initialisation of an operation. The caller must still set flags, + * object and processor if needed. + */ +void fscache_operation_init(struct fscache_operation *op, + fscache_operation_processor_t processor, + fscache_operation_cancel_t cancel, + fscache_operation_release_t release) +{ + INIT_WORK(&op->work, fscache_op_work_func); + atomic_set(&op->usage, 1); + op->state = FSCACHE_OP_ST_INITIALISED; + op->debug_id = atomic_inc_return(&fscache_op_debug_id); + op->processor = processor; + op->cancel = cancel ?: fscache_operation_dummy_cancel; + op->release = release; + INIT_LIST_HEAD(&op->pend_link); + fscache_stat(&fscache_n_op_initialised); +} +EXPORT_SYMBOL(fscache_operation_init); + /** * fscache_enqueue_operation - Enqueue an operation for processing * @op: The operation to enqueue @@ -76,6 +105,43 @@ static void fscache_run_op(struct fscache_object *object, } /* + * report an unexpected submission + */ +static void fscache_report_unexpected_submission(struct fscache_object *object, + struct fscache_operation *op, + const struct fscache_state *ostate) +{ + static bool once_only; + struct fscache_operation *p; + unsigned n; + + if (once_only) + return; + once_only = true; + + kdebug("unexpected submission OP%x [OBJ%x %s]", + op->debug_id, object->debug_id, object->state->name); + kdebug("objstate=%s [%s]", object->state->name, ostate->name); + kdebug("objflags=%lx", object->flags); + kdebug("objevent=%lx [%lx]", object->events, object->event_mask); + kdebug("ops=%u inp=%u exc=%u", + object->n_ops, object->n_in_progress, object->n_exclusive); + + if (!list_empty(&object->pending_ops)) { + n = 0; + list_for_each_entry(p, &object->pending_ops, pend_link) { + ASSERTCMP(p->object, ==, object); + kdebug("%p %p", op->processor, op->release); + n++; + } + + kdebug("n=%u", n); + } + + dump_stack(); +} + +/* * submit an exclusive operation for an object * - other ops are excluded from running simultaneously with this one * - this gets any extra refs it needs on an op @@ -83,6 +149,8 @@ static void fscache_run_op(struct fscache_object *object, int fscache_submit_exclusive_op(struct fscache_object *object, struct fscache_operation *op) { + const struct fscache_state *ostate; + unsigned long flags; int ret; _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id); @@ -95,8 +163,21 @@ int fscache_submit_exclusive_op(struct fscache_object *object, ASSERTCMP(object->n_ops, >=, object->n_exclusive); ASSERT(list_empty(&op->pend_link)); + ostate = object->state; + smp_rmb(); + op->state = FSCACHE_OP_ST_PENDING; - if (fscache_object_is_active(object)) { + flags = READ_ONCE(object->flags); + if (unlikely(!(flags & BIT(FSCACHE_OBJECT_IS_LIVE)))) { + fscache_stat(&fscache_n_op_rejected); + op->cancel(op); + op->state = FSCACHE_OP_ST_CANCELLED; + ret = -ENOBUFS; + } else if (unlikely(fscache_cache_is_broken(object))) { + op->cancel(op); + op->state = FSCACHE_OP_ST_CANCELLED; + ret = -EIO; + } else if (flags & BIT(FSCACHE_OBJECT_IS_AVAILABLE)) { op->object = object; object->n_ops++; object->n_exclusive++; /* reads and writes must wait */ @@ -118,7 +199,7 @@ int fscache_submit_exclusive_op(struct fscache_object *object, /* need to issue a new write op after this */ clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); ret = 0; - } else if (test_bit(FSCACHE_OBJECT_IS_LOOKED_UP, &object->flags)) { + } else if (flags & BIT(FSCACHE_OBJECT_IS_LOOKED_UP)) { op->object = object; object->n_ops++; object->n_exclusive++; /* reads and writes must wait */ @@ -126,12 +207,15 @@ int fscache_submit_exclusive_op(struct fscache_object *object, list_add_tail(&op->pend_link, &object->pending_ops); fscache_stat(&fscache_n_op_pend); ret = 0; + } else if (flags & BIT(FSCACHE_OBJECT_KILLED_BY_CACHE)) { + op->cancel(op); + op->state = FSCACHE_OP_ST_CANCELLED; + ret = -ENOBUFS; } else { - /* If we're in any other state, there must have been an I/O - * error of some nature. - */ - ASSERT(test_bit(FSCACHE_IOERROR, &object->cache->flags)); - ret = -EIO; + fscache_report_unexpected_submission(object, op, ostate); + op->cancel(op); + op->state = FSCACHE_OP_ST_CANCELLED; + ret = -ENOBUFS; } spin_unlock(&object->lock); @@ -139,43 +223,6 @@ int fscache_submit_exclusive_op(struct fscache_object *object, } /* - * report an unexpected submission - */ -static void fscache_report_unexpected_submission(struct fscache_object *object, - struct fscache_operation *op, - const struct fscache_state *ostate) -{ - static bool once_only; - struct fscache_operation *p; - unsigned n; - - if (once_only) - return; - once_only = true; - - kdebug("unexpected submission OP%x [OBJ%x %s]", - op->debug_id, object->debug_id, object->state->name); - kdebug("objstate=%s [%s]", object->state->name, ostate->name); - kdebug("objflags=%lx", object->flags); - kdebug("objevent=%lx [%lx]", object->events, object->event_mask); - kdebug("ops=%u inp=%u exc=%u", - object->n_ops, object->n_in_progress, object->n_exclusive); - - if (!list_empty(&object->pending_ops)) { - n = 0; - list_for_each_entry(p, &object->pending_ops, pend_link) { - ASSERTCMP(p->object, ==, object); - kdebug("%p %p", op->processor, op->release); - n++; - } - - kdebug("n=%u", n); - } - - dump_stack(); -} - -/* * submit an operation for an object * - objects may be submitted only in the following states: * - during object creation (write ops may be submitted) @@ -187,6 +234,7 @@ int fscache_submit_op(struct fscache_object *object, struct fscache_operation *op) { const struct fscache_state *ostate; + unsigned long flags; int ret; _enter("{OBJ%x OP%x},{%u}", @@ -204,7 +252,17 @@ int fscache_submit_op(struct fscache_object *object, smp_rmb(); op->state = FSCACHE_OP_ST_PENDING; - if (fscache_object_is_active(object)) { + flags = READ_ONCE(object->flags); + if (unlikely(!(flags & BIT(FSCACHE_OBJECT_IS_LIVE)))) { + fscache_stat(&fscache_n_op_rejected); + op->cancel(op); + op->state = FSCACHE_OP_ST_CANCELLED; + ret = -ENOBUFS; + } else if (unlikely(fscache_cache_is_broken(object))) { + op->cancel(op); + op->state = FSCACHE_OP_ST_CANCELLED; + ret = -EIO; + } else if (flags & BIT(FSCACHE_OBJECT_IS_AVAILABLE)) { op->object = object; object->n_ops++; @@ -222,23 +280,21 @@ int fscache_submit_op(struct fscache_object *object, fscache_run_op(object, op); } ret = 0; - } else if (test_bit(FSCACHE_OBJECT_IS_LOOKED_UP, &object->flags)) { + } else if (flags & BIT(FSCACHE_OBJECT_IS_LOOKED_UP)) { op->object = object; object->n_ops++; atomic_inc(&op->usage); list_add_tail(&op->pend_link, &object->pending_ops); fscache_stat(&fscache_n_op_pend); ret = 0; - } else if (fscache_object_is_dying(object)) { - fscache_stat(&fscache_n_op_rejected); + } else if (flags & BIT(FSCACHE_OBJECT_KILLED_BY_CACHE)) { + op->cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; ret = -ENOBUFS; - } else if (!test_bit(FSCACHE_IOERROR, &object->cache->flags)) { + } else { fscache_report_unexpected_submission(object, op, ostate); ASSERT(!fscache_object_is_active(object)); - op->state = FSCACHE_OP_ST_CANCELLED; - ret = -ENOBUFS; - } else { + op->cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; ret = -ENOBUFS; } @@ -293,9 +349,10 @@ void fscache_start_operations(struct fscache_object *object) * cancel an operation that's pending on an object */ int fscache_cancel_op(struct fscache_operation *op, - void (*do_cancel)(struct fscache_operation *)) + bool cancel_in_progress_op) { struct fscache_object *object = op->object; + bool put = false; int ret; _enter("OBJ%x OP%x}", op->object->debug_id, op->debug_id); @@ -309,19 +366,37 @@ int fscache_cancel_op(struct fscache_operation *op, ret = -EBUSY; if (op->state == FSCACHE_OP_ST_PENDING) { ASSERT(!list_empty(&op->pend_link)); - fscache_stat(&fscache_n_op_cancelled); list_del_init(&op->pend_link); - if (do_cancel) - do_cancel(op); + put = true; + + fscache_stat(&fscache_n_op_cancelled); + op->cancel(op); + op->state = FSCACHE_OP_ST_CANCELLED; + if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) + object->n_exclusive--; + if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) + wake_up_bit(&op->flags, FSCACHE_OP_WAITING); + ret = 0; + } else if (op->state == FSCACHE_OP_ST_IN_PROGRESS && cancel_in_progress_op) { + ASSERTCMP(object->n_in_progress, >, 0); + if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) + object->n_exclusive--; + object->n_in_progress--; + if (object->n_in_progress == 0) + fscache_start_operations(object); + + fscache_stat(&fscache_n_op_cancelled); + op->cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) object->n_exclusive--; if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) wake_up_bit(&op->flags, FSCACHE_OP_WAITING); - fscache_put_operation(op); ret = 0; } + if (put) + fscache_put_operation(op); spin_unlock(&object->lock); _leave(" = %d", ret); return ret; @@ -345,6 +420,7 @@ void fscache_cancel_all_ops(struct fscache_object *object) list_del_init(&op->pend_link); ASSERTCMP(op->state, ==, FSCACHE_OP_ST_PENDING); + op->cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) @@ -377,8 +453,12 @@ void fscache_op_complete(struct fscache_operation *op, bool cancelled) spin_lock(&object->lock); - op->state = cancelled ? - FSCACHE_OP_ST_CANCELLED : FSCACHE_OP_ST_COMPLETE; + if (!cancelled) { + op->state = FSCACHE_OP_ST_COMPLETE; + } else { + op->cancel(op); + op->state = FSCACHE_OP_ST_CANCELLED; + } if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) object->n_exclusive--; @@ -409,9 +489,9 @@ void fscache_put_operation(struct fscache_operation *op) return; _debug("PUT OP"); - ASSERTIFCMP(op->state != FSCACHE_OP_ST_COMPLETE, + ASSERTIFCMP(op->state != FSCACHE_OP_ST_INITIALISED && + op->state != FSCACHE_OP_ST_COMPLETE, op->state, ==, FSCACHE_OP_ST_CANCELLED); - op->state = FSCACHE_OP_ST_DEAD; fscache_stat(&fscache_n_op_release); @@ -419,37 +499,39 @@ void fscache_put_operation(struct fscache_operation *op) op->release(op); op->release = NULL; } + op->state = FSCACHE_OP_ST_DEAD; object = op->object; + if (likely(object)) { + if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags)) + atomic_dec(&object->n_reads); + if (test_bit(FSCACHE_OP_UNUSE_COOKIE, &op->flags)) + fscache_unuse_cookie(object); + + /* now... we may get called with the object spinlock held, so we + * complete the cleanup here only if we can immediately acquire the + * lock, and defer it otherwise */ + if (!spin_trylock(&object->lock)) { + _debug("defer put"); + fscache_stat(&fscache_n_op_deferred_release); + + cache = object->cache; + spin_lock(&cache->op_gc_list_lock); + list_add_tail(&op->pend_link, &cache->op_gc_list); + spin_unlock(&cache->op_gc_list_lock); + schedule_work(&cache->op_gc); + _leave(" [defer]"); + return; + } - if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags)) - atomic_dec(&object->n_reads); - if (test_bit(FSCACHE_OP_UNUSE_COOKIE, &op->flags)) - fscache_unuse_cookie(object); - - /* now... we may get called with the object spinlock held, so we - * complete the cleanup here only if we can immediately acquire the - * lock, and defer it otherwise */ - if (!spin_trylock(&object->lock)) { - _debug("defer put"); - fscache_stat(&fscache_n_op_deferred_release); + ASSERTCMP(object->n_ops, >, 0); + object->n_ops--; + if (object->n_ops == 0) + fscache_raise_event(object, FSCACHE_OBJECT_EV_CLEARED); - cache = object->cache; - spin_lock(&cache->op_gc_list_lock); - list_add_tail(&op->pend_link, &cache->op_gc_list); - spin_unlock(&cache->op_gc_list_lock); - schedule_work(&cache->op_gc); - _leave(" [defer]"); - return; + spin_unlock(&object->lock); } - ASSERTCMP(object->n_ops, >, 0); - object->n_ops--; - if (object->n_ops == 0) - fscache_raise_event(object, FSCACHE_OBJECT_EV_CLEARED); - - spin_unlock(&object->lock); - kfree(op); _leave(" [done]"); } diff --git a/fs/fscache/page.c b/fs/fscache/page.c index de33b3fccca6..483bbc613bf0 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -213,7 +213,7 @@ int __fscache_attr_changed(struct fscache_cookie *cookie) return -ENOMEM; } - fscache_operation_init(op, fscache_attr_changed_op, NULL); + fscache_operation_init(op, fscache_attr_changed_op, NULL, NULL); op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE) | (1 << FSCACHE_OP_UNUSE_COOKIE); @@ -239,7 +239,7 @@ nobufs_dec: wake_cookie = __fscache_unuse_cookie(cookie); nobufs: spin_unlock(&cookie->lock); - kfree(op); + fscache_put_operation(op); if (wake_cookie) __fscache_wake_unused_cookie(cookie); fscache_stat(&fscache_n_attr_changed_nobufs); @@ -249,6 +249,17 @@ nobufs: EXPORT_SYMBOL(__fscache_attr_changed); /* + * Handle cancellation of a pending retrieval op + */ +static void fscache_do_cancel_retrieval(struct fscache_operation *_op) +{ + struct fscache_retrieval *op = + container_of(_op, struct fscache_retrieval, op); + + atomic_set(&op->n_pages, 0); +} + +/* * release a retrieval op reference */ static void fscache_release_retrieval_op(struct fscache_operation *_op) @@ -258,11 +269,12 @@ static void fscache_release_retrieval_op(struct fscache_operation *_op) _enter("{OP%x}", op->op.debug_id); - ASSERTCMP(atomic_read(&op->n_pages), ==, 0); + ASSERTIFCMP(op->op.state != FSCACHE_OP_ST_INITIALISED, + atomic_read(&op->n_pages), ==, 0); fscache_hist(fscache_retrieval_histogram, op->start_time); if (op->context) - fscache_put_context(op->op.object->cookie, op->context); + fscache_put_context(op->cookie, op->context); _leave(""); } @@ -285,15 +297,24 @@ static struct fscache_retrieval *fscache_alloc_retrieval( return NULL; } - fscache_operation_init(&op->op, NULL, fscache_release_retrieval_op); + fscache_operation_init(&op->op, NULL, + fscache_do_cancel_retrieval, + fscache_release_retrieval_op); op->op.flags = FSCACHE_OP_MYTHREAD | (1UL << FSCACHE_OP_WAITING) | (1UL << FSCACHE_OP_UNUSE_COOKIE); + op->cookie = cookie; op->mapping = mapping; op->end_io_func = end_io_func; op->context = context; op->start_time = jiffies; INIT_LIST_HEAD(&op->to_do); + + /* Pin the netfs read context in case we need to do the actual netfs + * read because we've encountered a cache read failure. + */ + if (context) + fscache_get_context(op->cookie, context); return op; } @@ -330,24 +351,12 @@ int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie) } /* - * Handle cancellation of a pending retrieval op - */ -static void fscache_do_cancel_retrieval(struct fscache_operation *_op) -{ - struct fscache_retrieval *op = - container_of(_op, struct fscache_retrieval, op); - - atomic_set(&op->n_pages, 0); -} - -/* * wait for an object to become active (or dead) */ int fscache_wait_for_operation_activation(struct fscache_object *object, struct fscache_operation *op, atomic_t *stat_op_waits, - atomic_t *stat_object_dead, - void (*do_cancel)(struct fscache_operation *)) + atomic_t *stat_object_dead) { int ret; @@ -359,7 +368,7 @@ int fscache_wait_for_operation_activation(struct fscache_object *object, fscache_stat(stat_op_waits); if (wait_on_bit(&op->flags, FSCACHE_OP_WAITING, TASK_INTERRUPTIBLE) != 0) { - ret = fscache_cancel_op(op, do_cancel); + ret = fscache_cancel_op(op, false); if (ret == 0) return -ERESTARTSYS; @@ -377,11 +386,13 @@ check_if_dead: _leave(" = -ENOBUFS [cancelled]"); return -ENOBUFS; } - if (unlikely(fscache_object_is_dead(object))) { - pr_err("%s() = -ENOBUFS [obj dead %d]\n", __func__, op->state); - fscache_cancel_op(op, do_cancel); + if (unlikely(fscache_object_is_dying(object) || + fscache_cache_is_broken(object))) { + enum fscache_operation_state state = op->state; + fscache_cancel_op(op, true); if (stat_object_dead) fscache_stat(stat_object_dead); + _leave(" = -ENOBUFS [obj dead %d]", state); return -ENOBUFS; } return 0; @@ -453,17 +464,12 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, fscache_stat(&fscache_n_retrieval_ops); - /* pin the netfs read context in case we need to do the actual netfs - * read because we've encountered a cache read failure */ - fscache_get_context(object->cookie, op->context); - /* we wait for the operation to become active, and then process it * *here*, in this thread, and not in the thread pool */ ret = fscache_wait_for_operation_activation( object, &op->op, __fscache_stat(&fscache_n_retrieval_op_waits), - __fscache_stat(&fscache_n_retrievals_object_dead), - fscache_do_cancel_retrieval); + __fscache_stat(&fscache_n_retrievals_object_dead)); if (ret < 0) goto error; @@ -503,7 +509,7 @@ nobufs_unlock: spin_unlock(&cookie->lock); if (wake_cookie) __fscache_wake_unused_cookie(cookie); - kfree(op); + fscache_put_retrieval(op); nobufs: fscache_stat(&fscache_n_retrievals_nobufs); _leave(" = -ENOBUFS"); @@ -584,17 +590,12 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, fscache_stat(&fscache_n_retrieval_ops); - /* pin the netfs read context in case we need to do the actual netfs - * read because we've encountered a cache read failure */ - fscache_get_context(object->cookie, op->context); - /* we wait for the operation to become active, and then process it * *here*, in this thread, and not in the thread pool */ ret = fscache_wait_for_operation_activation( object, &op->op, __fscache_stat(&fscache_n_retrieval_op_waits), - __fscache_stat(&fscache_n_retrievals_object_dead), - fscache_do_cancel_retrieval); + __fscache_stat(&fscache_n_retrievals_object_dead)); if (ret < 0) goto error; @@ -632,7 +633,7 @@ nobufs_unlock_dec: wake_cookie = __fscache_unuse_cookie(cookie); nobufs_unlock: spin_unlock(&cookie->lock); - kfree(op); + fscache_put_retrieval(op); if (wake_cookie) __fscache_wake_unused_cookie(cookie); nobufs: @@ -700,8 +701,7 @@ int __fscache_alloc_page(struct fscache_cookie *cookie, ret = fscache_wait_for_operation_activation( object, &op->op, __fscache_stat(&fscache_n_alloc_op_waits), - __fscache_stat(&fscache_n_allocs_object_dead), - fscache_do_cancel_retrieval); + __fscache_stat(&fscache_n_allocs_object_dead)); if (ret < 0) goto error; @@ -726,7 +726,7 @@ nobufs_unlock_dec: wake_cookie = __fscache_unuse_cookie(cookie); nobufs_unlock: spin_unlock(&cookie->lock); - kfree(op); + fscache_put_retrieval(op); if (wake_cookie) __fscache_wake_unused_cookie(cookie); nobufs: @@ -944,7 +944,7 @@ int __fscache_write_page(struct fscache_cookie *cookie, if (!op) goto nomem; - fscache_operation_init(&op->op, fscache_write_op, + fscache_operation_init(&op->op, fscache_write_op, NULL, fscache_release_write_op); op->op.flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_WAITING) | @@ -1016,7 +1016,7 @@ already_pending: spin_unlock(&object->lock); spin_unlock(&cookie->lock); radix_tree_preload_end(); - kfree(op); + fscache_put_operation(&op->op); fscache_stat(&fscache_n_stores_ok); _leave(" = 0"); return 0; @@ -1036,7 +1036,7 @@ nobufs_unlock_obj: nobufs: spin_unlock(&cookie->lock); radix_tree_preload_end(); - kfree(op); + fscache_put_operation(&op->op); if (wake_cookie) __fscache_wake_unused_cookie(cookie); fscache_stat(&fscache_n_stores_nobufs); @@ -1044,7 +1044,7 @@ nobufs: return -ENOBUFS; nomem_free: - kfree(op); + fscache_put_operation(&op->op); nomem: fscache_stat(&fscache_n_stores_oom); _leave(" = -ENOMEM"); diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 40d13c70ef51..7cfa0aacdf6d 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -23,6 +23,7 @@ atomic_t fscache_n_op_run; atomic_t fscache_n_op_enqueue; atomic_t fscache_n_op_requeue; atomic_t fscache_n_op_deferred_release; +atomic_t fscache_n_op_initialised; atomic_t fscache_n_op_release; atomic_t fscache_n_op_gc; atomic_t fscache_n_op_cancelled; @@ -130,6 +131,11 @@ atomic_t fscache_n_cop_write_page; atomic_t fscache_n_cop_uncache_page; atomic_t fscache_n_cop_dissociate_pages; +atomic_t fscache_n_cache_no_space_reject; +atomic_t fscache_n_cache_stale_objects; +atomic_t fscache_n_cache_retired_objects; +atomic_t fscache_n_cache_culled_objects; + /* * display the general statistics */ @@ -246,7 +252,8 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_op_enqueue), atomic_read(&fscache_n_op_cancelled), atomic_read(&fscache_n_op_rejected)); - seq_printf(m, "Ops : dfr=%u rel=%u gc=%u\n", + seq_printf(m, "Ops : ini=%u dfr=%u rel=%u gc=%u\n", + atomic_read(&fscache_n_op_initialised), atomic_read(&fscache_n_op_deferred_release), atomic_read(&fscache_n_op_release), atomic_read(&fscache_n_op_gc)); @@ -271,6 +278,11 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_cop_write_page), atomic_read(&fscache_n_cop_uncache_page), atomic_read(&fscache_n_cop_dissociate_pages)); + seq_printf(m, "CacheEv: nsp=%d stl=%d rtr=%d cul=%d\n", + atomic_read(&fscache_n_cache_no_space_reject), + atomic_read(&fscache_n_cache_stale_objects), + atomic_read(&fscache_n_cache_retired_objects), + atomic_read(&fscache_n_cache_culled_objects)); return 0; } diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index e5bbf748b698..eae2c11268bc 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -489,6 +489,7 @@ static void cuse_fc_release(struct fuse_conn *fc) */ static int cuse_channel_open(struct inode *inode, struct file *file) { + struct fuse_dev *fud; struct cuse_conn *cc; int rc; @@ -499,17 +500,22 @@ static int cuse_channel_open(struct inode *inode, struct file *file) fuse_conn_init(&cc->fc); + fud = fuse_dev_alloc(&cc->fc); + if (!fud) { + kfree(cc); + return -ENOMEM; + } + INIT_LIST_HEAD(&cc->list); cc->fc.release = cuse_fc_release; - cc->fc.connected = 1; cc->fc.initialized = 1; rc = cuse_send_init(cc); if (rc) { - fuse_conn_put(&cc->fc); + fuse_dev_free(fud); return rc; } - file->private_data = &cc->fc; /* channel owns base reference to cc */ + file->private_data = fud; return 0; } @@ -527,7 +533,8 @@ static int cuse_channel_open(struct inode *inode, struct file *file) */ static int cuse_channel_release(struct inode *inode, struct file *file) { - struct cuse_conn *cc = fc_to_cc(file->private_data); + struct fuse_dev *fud = file->private_data; + struct cuse_conn *cc = fc_to_cc(fud->fc); int rc; /* remove from the conntbl, no more access from this point on */ diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index c8b68ab2e574..80cc1b35d460 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -25,13 +25,13 @@ MODULE_ALIAS("devname:fuse"); static struct kmem_cache *fuse_req_cachep; -static struct fuse_conn *fuse_get_conn(struct file *file) +static struct fuse_dev *fuse_get_dev(struct file *file) { /* * Lockless access is OK, because file->private data is set * once during mount and is valid until the file is released. */ - return file->private_data; + return ACCESS_ONCE(file->private_data); } static void fuse_request_init(struct fuse_req *req, struct page **pages, @@ -48,6 +48,7 @@ static void fuse_request_init(struct fuse_req *req, struct page **pages, req->pages = pages; req->page_descs = page_descs; req->max_pages = npages; + __set_bit(FR_PENDING, &req->flags); } static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags) @@ -168,6 +169,10 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages, if (!fc->connected) goto out; + err = -ECONNREFUSED; + if (fc->conn_error) + goto out; + req = fuse_request_alloc(npages); err = -ENOMEM; if (!req) { @@ -177,8 +182,10 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages, } fuse_req_init_context(req); - req->waiting = 1; - req->background = for_background; + __set_bit(FR_WAITING, &req->flags); + if (for_background) + __set_bit(FR_BACKGROUND, &req->flags); + return req; out: @@ -268,15 +275,15 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc, req = get_reserved_req(fc, file); fuse_req_init_context(req); - req->waiting = 1; - req->background = 0; + __set_bit(FR_WAITING, &req->flags); + __clear_bit(FR_BACKGROUND, &req->flags); return req; } void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) { if (atomic_dec_and_test(&req->count)) { - if (unlikely(req->background)) { + if (test_bit(FR_BACKGROUND, &req->flags)) { /* * We get here in the unlikely case that a background * request was allocated but not sent @@ -287,8 +294,10 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) spin_unlock(&fc->lock); } - if (req->waiting) + if (test_bit(FR_WAITING, &req->flags)) { + __clear_bit(FR_WAITING, &req->flags); atomic_dec(&fc->num_waiting); + } if (req->stolen_file) put_reserved_req(fc, req); @@ -309,46 +318,38 @@ static unsigned len_args(unsigned numargs, struct fuse_arg *args) return nbytes; } -static u64 fuse_get_unique(struct fuse_conn *fc) +static u64 fuse_get_unique(struct fuse_iqueue *fiq) { - fc->reqctr++; - /* zero is special */ - if (fc->reqctr == 0) - fc->reqctr = 1; - - return fc->reqctr; + return ++fiq->reqctr; } -static void queue_request(struct fuse_conn *fc, struct fuse_req *req) +static void queue_request(struct fuse_iqueue *fiq, struct fuse_req *req) { req->in.h.len = sizeof(struct fuse_in_header) + len_args(req->in.numargs, (struct fuse_arg *) req->in.args); - list_add_tail(&req->list, &fc->pending); - req->state = FUSE_REQ_PENDING; - if (!req->waiting) { - req->waiting = 1; - atomic_inc(&fc->num_waiting); - } - wake_up(&fc->waitq); - kill_fasync(&fc->fasync, SIGIO, POLL_IN); + list_add_tail(&req->list, &fiq->pending); + wake_up_locked(&fiq->waitq); + kill_fasync(&fiq->fasync, SIGIO, POLL_IN); } void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, u64 nodeid, u64 nlookup) { + struct fuse_iqueue *fiq = &fc->iq; + forget->forget_one.nodeid = nodeid; forget->forget_one.nlookup = nlookup; - spin_lock(&fc->lock); - if (fc->connected) { - fc->forget_list_tail->next = forget; - fc->forget_list_tail = forget; - wake_up(&fc->waitq); - kill_fasync(&fc->fasync, SIGIO, POLL_IN); + spin_lock(&fiq->waitq.lock); + if (fiq->connected) { + fiq->forget_list_tail->next = forget; + fiq->forget_list_tail = forget; + wake_up_locked(&fiq->waitq); + kill_fasync(&fiq->fasync, SIGIO, POLL_IN); } else { kfree(forget); } - spin_unlock(&fc->lock); + spin_unlock(&fiq->waitq.lock); } static void flush_bg_queue(struct fuse_conn *fc) @@ -356,12 +357,15 @@ static void flush_bg_queue(struct fuse_conn *fc) while (fc->active_background < fc->max_background && !list_empty(&fc->bg_queue)) { struct fuse_req *req; + struct fuse_iqueue *fiq = &fc->iq; req = list_entry(fc->bg_queue.next, struct fuse_req, list); list_del(&req->list); fc->active_background++; - req->in.h.unique = fuse_get_unique(fc); - queue_request(fc, req); + spin_lock(&fiq->waitq.lock); + req->in.h.unique = fuse_get_unique(fiq); + queue_request(fiq, req); + spin_unlock(&fiq->waitq.lock); } } @@ -372,20 +376,22 @@ static void flush_bg_queue(struct fuse_conn *fc) * was closed. The requester thread is woken up (if still waiting), * the 'end' callback is called if given, else the reference to the * request is released - * - * Called with fc->lock, unlocks it */ static void request_end(struct fuse_conn *fc, struct fuse_req *req) -__releases(fc->lock) { - void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; - req->end = NULL; - list_del(&req->list); - list_del(&req->intr_entry); - req->state = FUSE_REQ_FINISHED; - if (req->background) { - req->background = 0; + struct fuse_iqueue *fiq = &fc->iq; + + if (test_and_set_bit(FR_FINISHED, &req->flags)) + return; + spin_lock(&fiq->waitq.lock); + list_del_init(&req->intr_entry); + spin_unlock(&fiq->waitq.lock); + WARN_ON(test_bit(FR_PENDING, &req->flags)); + WARN_ON(test_bit(FR_SENT, &req->flags)); + if (test_bit(FR_BACKGROUND, &req->flags)) { + spin_lock(&fc->lock); + clear_bit(FR_BACKGROUND, &req->flags); if (fc->num_background == fc->max_background) fc->blocked = 0; @@ -401,122 +407,105 @@ __releases(fc->lock) fc->num_background--; fc->active_background--; flush_bg_queue(fc); + spin_unlock(&fc->lock); } - spin_unlock(&fc->lock); wake_up(&req->waitq); - if (end) - end(fc, req); + if (req->end) + req->end(fc, req); fuse_put_request(fc, req); } -static void wait_answer_interruptible(struct fuse_conn *fc, - struct fuse_req *req) -__releases(fc->lock) -__acquires(fc->lock) -{ - if (signal_pending(current)) - return; - - spin_unlock(&fc->lock); - wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED); - spin_lock(&fc->lock); -} - -static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req) +static void queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) { - list_add_tail(&req->intr_entry, &fc->interrupts); - wake_up(&fc->waitq); - kill_fasync(&fc->fasync, SIGIO, POLL_IN); + spin_lock(&fiq->waitq.lock); + if (list_empty(&req->intr_entry)) { + list_add_tail(&req->intr_entry, &fiq->interrupts); + wake_up_locked(&fiq->waitq); + } + spin_unlock(&fiq->waitq.lock); + kill_fasync(&fiq->fasync, SIGIO, POLL_IN); } static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) -__releases(fc->lock) -__acquires(fc->lock) { + struct fuse_iqueue *fiq = &fc->iq; + int err; + if (!fc->no_interrupt) { /* Any signal may interrupt this */ - wait_answer_interruptible(fc, req); - - if (req->aborted) - goto aborted; - if (req->state == FUSE_REQ_FINISHED) + err = wait_event_interruptible(req->waitq, + test_bit(FR_FINISHED, &req->flags)); + if (!err) return; - req->interrupted = 1; - if (req->state == FUSE_REQ_SENT) - queue_interrupt(fc, req); + set_bit(FR_INTERRUPTED, &req->flags); + /* matches barrier in fuse_dev_do_read() */ + smp_mb__after_atomic(); + if (test_bit(FR_SENT, &req->flags)) + queue_interrupt(fiq, req); } - if (!req->force) { + if (!test_bit(FR_FORCE, &req->flags)) { sigset_t oldset; /* Only fatal signals may interrupt this */ block_sigs(&oldset); - wait_answer_interruptible(fc, req); + err = wait_event_interruptible(req->waitq, + test_bit(FR_FINISHED, &req->flags)); restore_sigs(&oldset); - if (req->aborted) - goto aborted; - if (req->state == FUSE_REQ_FINISHED) + if (!err) return; + spin_lock(&fiq->waitq.lock); /* Request is not yet in userspace, bail out */ - if (req->state == FUSE_REQ_PENDING) { + if (test_bit(FR_PENDING, &req->flags)) { list_del(&req->list); + spin_unlock(&fiq->waitq.lock); __fuse_put_request(req); req->out.h.error = -EINTR; return; } + spin_unlock(&fiq->waitq.lock); } /* * Either request is already in userspace, or it was forced. * Wait it out. */ - spin_unlock(&fc->lock); - wait_event(req->waitq, req->state == FUSE_REQ_FINISHED); - spin_lock(&fc->lock); - - if (!req->aborted) - return; - - aborted: - BUG_ON(req->state != FUSE_REQ_FINISHED); - if (req->locked) { - /* This is uninterruptible sleep, because data is - being copied to/from the buffers of req. During - locked state, there mustn't be any filesystem - operation (e.g. page fault), since that could lead - to deadlock */ - spin_unlock(&fc->lock); - wait_event(req->waitq, !req->locked); - spin_lock(&fc->lock); - } + wait_event(req->waitq, test_bit(FR_FINISHED, &req->flags)); } static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) { - BUG_ON(req->background); - spin_lock(&fc->lock); - if (!fc->connected) + struct fuse_iqueue *fiq = &fc->iq; + + BUG_ON(test_bit(FR_BACKGROUND, &req->flags)); + spin_lock(&fiq->waitq.lock); + if (!fiq->connected) { + spin_unlock(&fiq->waitq.lock); req->out.h.error = -ENOTCONN; - else if (fc->conn_error) - req->out.h.error = -ECONNREFUSED; - else { - req->in.h.unique = fuse_get_unique(fc); - queue_request(fc, req); + } else { + req->in.h.unique = fuse_get_unique(fiq); + queue_request(fiq, req); /* acquire extra reference, since request is still needed after request_end() */ __fuse_get_request(req); + spin_unlock(&fiq->waitq.lock); request_wait_answer(fc, req); + /* Pairs with smp_wmb() in request_end() */ + smp_rmb(); } - spin_unlock(&fc->lock); } void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) { - req->isreply = 1; + __set_bit(FR_ISREPLY, &req->flags); + if (!test_bit(FR_WAITING, &req->flags)) { + __set_bit(FR_WAITING, &req->flags); + atomic_inc(&fc->num_waiting); + } __fuse_request_send(fc, req); } EXPORT_SYMBOL_GPL(fuse_request_send); @@ -586,10 +575,20 @@ ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args) return ret; } -static void fuse_request_send_nowait_locked(struct fuse_conn *fc, - struct fuse_req *req) +/* + * Called under fc->lock + * + * fc->connected must have been checked previously + */ +void fuse_request_send_background_locked(struct fuse_conn *fc, + struct fuse_req *req) { - BUG_ON(!req->background); + BUG_ON(!test_bit(FR_BACKGROUND, &req->flags)); + if (!test_bit(FR_WAITING, &req->flags)) { + __set_bit(FR_WAITING, &req->flags); + atomic_inc(&fc->num_waiting); + } + __set_bit(FR_ISREPLY, &req->flags); fc->num_background++; if (fc->num_background == fc->max_background) fc->blocked = 1; @@ -602,54 +601,40 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc, flush_bg_queue(fc); } -static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req) +void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req) { + BUG_ON(!req->end); spin_lock(&fc->lock); if (fc->connected) { - fuse_request_send_nowait_locked(fc, req); + fuse_request_send_background_locked(fc, req); spin_unlock(&fc->lock); } else { + spin_unlock(&fc->lock); req->out.h.error = -ENOTCONN; - request_end(fc, req); + req->end(fc, req); + fuse_put_request(fc, req); } } - -void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req) -{ - req->isreply = 1; - fuse_request_send_nowait(fc, req); -} EXPORT_SYMBOL_GPL(fuse_request_send_background); static int fuse_request_send_notify_reply(struct fuse_conn *fc, struct fuse_req *req, u64 unique) { int err = -ENODEV; + struct fuse_iqueue *fiq = &fc->iq; - req->isreply = 0; + __clear_bit(FR_ISREPLY, &req->flags); req->in.h.unique = unique; - spin_lock(&fc->lock); - if (fc->connected) { - queue_request(fc, req); + spin_lock(&fiq->waitq.lock); + if (fiq->connected) { + queue_request(fiq, req); err = 0; } - spin_unlock(&fc->lock); + spin_unlock(&fiq->waitq.lock); return err; } -/* - * Called under fc->lock - * - * fc->connected must have been checked previously - */ -void fuse_request_send_background_locked(struct fuse_conn *fc, - struct fuse_req *req) -{ - req->isreply = 1; - fuse_request_send_nowait_locked(fc, req); -} - void fuse_force_forget(struct file *file, u64 nodeid) { struct inode *inode = file_inode(file); @@ -665,7 +650,7 @@ void fuse_force_forget(struct file *file, u64 nodeid) req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; - req->isreply = 0; + __clear_bit(FR_ISREPLY, &req->flags); __fuse_request_send(fc, req); /* ignore errors */ fuse_put_request(fc, req); @@ -676,38 +661,39 @@ void fuse_force_forget(struct file *file, u64 nodeid) * anything that could cause a page-fault. If the request was already * aborted bail out. */ -static int lock_request(struct fuse_conn *fc, struct fuse_req *req) +static int lock_request(struct fuse_req *req) { int err = 0; if (req) { - spin_lock(&fc->lock); - if (req->aborted) + spin_lock(&req->waitq.lock); + if (test_bit(FR_ABORTED, &req->flags)) err = -ENOENT; else - req->locked = 1; - spin_unlock(&fc->lock); + set_bit(FR_LOCKED, &req->flags); + spin_unlock(&req->waitq.lock); } return err; } /* - * Unlock request. If it was aborted during being locked, the - * requester thread is currently waiting for it to be unlocked, so - * wake it up. + * Unlock request. If it was aborted while locked, caller is responsible + * for unlocking and ending the request. */ -static void unlock_request(struct fuse_conn *fc, struct fuse_req *req) +static int unlock_request(struct fuse_req *req) { + int err = 0; if (req) { - spin_lock(&fc->lock); - req->locked = 0; - if (req->aborted) - wake_up(&req->waitq); - spin_unlock(&fc->lock); + spin_lock(&req->waitq.lock); + if (test_bit(FR_ABORTED, &req->flags)) + err = -ENOENT; + else + clear_bit(FR_LOCKED, &req->flags); + spin_unlock(&req->waitq.lock); } + return err; } struct fuse_copy_state { - struct fuse_conn *fc; int write; struct fuse_req *req; struct iov_iter *iter; @@ -721,13 +707,10 @@ struct fuse_copy_state { unsigned move_pages:1; }; -static void fuse_copy_init(struct fuse_copy_state *cs, - struct fuse_conn *fc, - int write, +static void fuse_copy_init(struct fuse_copy_state *cs, int write, struct iov_iter *iter) { memset(cs, 0, sizeof(*cs)); - cs->fc = fc; cs->write = write; cs->iter = iter; } @@ -760,7 +743,10 @@ static int fuse_copy_fill(struct fuse_copy_state *cs) struct page *page; int err; - unlock_request(cs->fc, cs->req); + err = unlock_request(cs->req); + if (err) + return err; + fuse_copy_finish(cs); if (cs->pipebufs) { struct pipe_buffer *buf = cs->pipebufs; @@ -809,7 +795,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs) iov_iter_advance(cs->iter, err); } - return lock_request(cs->fc, cs->req); + return lock_request(cs->req); } /* Do as much copy to/from userspace buffer as we can */ @@ -860,7 +846,10 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) struct page *newpage; struct pipe_buffer *buf = cs->pipebufs; - unlock_request(cs->fc, cs->req); + err = unlock_request(cs->req); + if (err) + return err; + fuse_copy_finish(cs); err = buf->ops->confirm(cs->pipe, buf); @@ -914,12 +903,12 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) lru_cache_add_file(newpage); err = 0; - spin_lock(&cs->fc->lock); - if (cs->req->aborted) + spin_lock(&cs->req->waitq.lock); + if (test_bit(FR_ABORTED, &cs->req->flags)) err = -ENOENT; else *pagep = newpage; - spin_unlock(&cs->fc->lock); + spin_unlock(&cs->req->waitq.lock); if (err) { unlock_page(newpage); @@ -939,7 +928,7 @@ out_fallback: cs->pg = buf->page; cs->offset = buf->offset; - err = lock_request(cs->fc, cs->req); + err = lock_request(cs->req); if (err) return err; @@ -950,11 +939,15 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page, unsigned offset, unsigned count) { struct pipe_buffer *buf; + int err; if (cs->nr_segs == cs->pipe->buffers) return -EIO; - unlock_request(cs->fc, cs->req); + err = unlock_request(cs->req); + if (err) + return err; + fuse_copy_finish(cs); buf = cs->pipebufs; @@ -1065,36 +1058,15 @@ static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs, return err; } -static int forget_pending(struct fuse_conn *fc) +static int forget_pending(struct fuse_iqueue *fiq) { - return fc->forget_list_head.next != NULL; + return fiq->forget_list_head.next != NULL; } -static int request_pending(struct fuse_conn *fc) +static int request_pending(struct fuse_iqueue *fiq) { - return !list_empty(&fc->pending) || !list_empty(&fc->interrupts) || - forget_pending(fc); -} - -/* Wait until a request is available on the pending list */ -static void request_wait(struct fuse_conn *fc) -__releases(fc->lock) -__acquires(fc->lock) -{ - DECLARE_WAITQUEUE(wait, current); - - add_wait_queue_exclusive(&fc->waitq, &wait); - while (fc->connected && !request_pending(fc)) { - set_current_state(TASK_INTERRUPTIBLE); - if (signal_pending(current)) - break; - - spin_unlock(&fc->lock); - schedule(); - spin_lock(&fc->lock); - } - set_current_state(TASK_RUNNING); - remove_wait_queue(&fc->waitq, &wait); + return !list_empty(&fiq->pending) || !list_empty(&fiq->interrupts) || + forget_pending(fiq); } /* @@ -1103,11 +1075,12 @@ __acquires(fc->lock) * Unlike other requests this is assembled on demand, without a need * to allocate a separate fuse_req structure. * - * Called with fc->lock held, releases it + * Called with fiq->waitq.lock held, releases it */ -static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs, +static int fuse_read_interrupt(struct fuse_iqueue *fiq, + struct fuse_copy_state *cs, size_t nbytes, struct fuse_req *req) -__releases(fc->lock) +__releases(fiq->waitq.lock) { struct fuse_in_header ih; struct fuse_interrupt_in arg; @@ -1115,7 +1088,7 @@ __releases(fc->lock) int err; list_del_init(&req->intr_entry); - req->intr_unique = fuse_get_unique(fc); + req->intr_unique = fuse_get_unique(fiq); memset(&ih, 0, sizeof(ih)); memset(&arg, 0, sizeof(arg)); ih.len = reqsize; @@ -1123,7 +1096,7 @@ __releases(fc->lock) ih.unique = req->intr_unique; arg.unique = req->in.h.unique; - spin_unlock(&fc->lock); + spin_unlock(&fiq->waitq.lock); if (nbytes < reqsize) return -EINVAL; @@ -1135,21 +1108,21 @@ __releases(fc->lock) return err ? err : reqsize; } -static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc, +static struct fuse_forget_link *dequeue_forget(struct fuse_iqueue *fiq, unsigned max, unsigned *countp) { - struct fuse_forget_link *head = fc->forget_list_head.next; + struct fuse_forget_link *head = fiq->forget_list_head.next; struct fuse_forget_link **newhead = &head; unsigned count; for (count = 0; *newhead != NULL && count < max; count++) newhead = &(*newhead)->next; - fc->forget_list_head.next = *newhead; + fiq->forget_list_head.next = *newhead; *newhead = NULL; - if (fc->forget_list_head.next == NULL) - fc->forget_list_tail = &fc->forget_list_head; + if (fiq->forget_list_head.next == NULL) + fiq->forget_list_tail = &fiq->forget_list_head; if (countp != NULL) *countp = count; @@ -1157,24 +1130,24 @@ static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc, return head; } -static int fuse_read_single_forget(struct fuse_conn *fc, +static int fuse_read_single_forget(struct fuse_iqueue *fiq, struct fuse_copy_state *cs, size_t nbytes) -__releases(fc->lock) +__releases(fiq->waitq.lock) { int err; - struct fuse_forget_link *forget = dequeue_forget(fc, 1, NULL); + struct fuse_forget_link *forget = dequeue_forget(fiq, 1, NULL); struct fuse_forget_in arg = { .nlookup = forget->forget_one.nlookup, }; struct fuse_in_header ih = { .opcode = FUSE_FORGET, .nodeid = forget->forget_one.nodeid, - .unique = fuse_get_unique(fc), + .unique = fuse_get_unique(fiq), .len = sizeof(ih) + sizeof(arg), }; - spin_unlock(&fc->lock); + spin_unlock(&fiq->waitq.lock); kfree(forget); if (nbytes < ih.len) return -EINVAL; @@ -1190,9 +1163,9 @@ __releases(fc->lock) return ih.len; } -static int fuse_read_batch_forget(struct fuse_conn *fc, +static int fuse_read_batch_forget(struct fuse_iqueue *fiq, struct fuse_copy_state *cs, size_t nbytes) -__releases(fc->lock) +__releases(fiq->waitq.lock) { int err; unsigned max_forgets; @@ -1201,18 +1174,18 @@ __releases(fc->lock) struct fuse_batch_forget_in arg = { .count = 0 }; struct fuse_in_header ih = { .opcode = FUSE_BATCH_FORGET, - .unique = fuse_get_unique(fc), + .unique = fuse_get_unique(fiq), .len = sizeof(ih) + sizeof(arg), }; if (nbytes < ih.len) { - spin_unlock(&fc->lock); + spin_unlock(&fiq->waitq.lock); return -EINVAL; } max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one); - head = dequeue_forget(fc, max_forgets, &count); - spin_unlock(&fc->lock); + head = dequeue_forget(fiq, max_forgets, &count); + spin_unlock(&fiq->waitq.lock); arg.count = count; ih.len += count * sizeof(struct fuse_forget_one); @@ -1239,14 +1212,15 @@ __releases(fc->lock) return ih.len; } -static int fuse_read_forget(struct fuse_conn *fc, struct fuse_copy_state *cs, +static int fuse_read_forget(struct fuse_conn *fc, struct fuse_iqueue *fiq, + struct fuse_copy_state *cs, size_t nbytes) -__releases(fc->lock) +__releases(fiq->waitq.lock) { - if (fc->minor < 16 || fc->forget_list_head.next->next == NULL) - return fuse_read_single_forget(fc, cs, nbytes); + if (fc->minor < 16 || fiq->forget_list_head.next->next == NULL) + return fuse_read_single_forget(fiq, cs, nbytes); else - return fuse_read_batch_forget(fc, cs, nbytes); + return fuse_read_batch_forget(fiq, cs, nbytes); } /* @@ -1258,46 +1232,51 @@ __releases(fc->lock) * request_end(). Otherwise add it to the processing list, and set * the 'sent' flag. */ -static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, +static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, struct fuse_copy_state *cs, size_t nbytes) { - int err; + ssize_t err; + struct fuse_conn *fc = fud->fc; + struct fuse_iqueue *fiq = &fc->iq; + struct fuse_pqueue *fpq = &fud->pq; struct fuse_req *req; struct fuse_in *in; unsigned reqsize; restart: - spin_lock(&fc->lock); + spin_lock(&fiq->waitq.lock); err = -EAGAIN; - if ((file->f_flags & O_NONBLOCK) && fc->connected && - !request_pending(fc)) + if ((file->f_flags & O_NONBLOCK) && fiq->connected && + !request_pending(fiq)) goto err_unlock; - request_wait(fc); - err = -ENODEV; - if (!fc->connected) + err = wait_event_interruptible_exclusive_locked(fiq->waitq, + !fiq->connected || request_pending(fiq)); + if (err) goto err_unlock; - err = -ERESTARTSYS; - if (!request_pending(fc)) + + err = -ENODEV; + if (!fiq->connected) goto err_unlock; - if (!list_empty(&fc->interrupts)) { - req = list_entry(fc->interrupts.next, struct fuse_req, + if (!list_empty(&fiq->interrupts)) { + req = list_entry(fiq->interrupts.next, struct fuse_req, intr_entry); - return fuse_read_interrupt(fc, cs, nbytes, req); + return fuse_read_interrupt(fiq, cs, nbytes, req); } - if (forget_pending(fc)) { - if (list_empty(&fc->pending) || fc->forget_batch-- > 0) - return fuse_read_forget(fc, cs, nbytes); + if (forget_pending(fiq)) { + if (list_empty(&fiq->pending) || fiq->forget_batch-- > 0) + return fuse_read_forget(fc, fiq, cs, nbytes); - if (fc->forget_batch <= -8) - fc->forget_batch = 16; + if (fiq->forget_batch <= -8) + fiq->forget_batch = 16; } - req = list_entry(fc->pending.next, struct fuse_req, list); - req->state = FUSE_REQ_READING; - list_move(&req->list, &fc->io); + req = list_entry(fiq->pending.next, struct fuse_req, list); + clear_bit(FR_PENDING, &req->flags); + list_del_init(&req->list); + spin_unlock(&fiq->waitq.lock); in = &req->in; reqsize = in->h.len; @@ -1310,37 +1289,48 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, request_end(fc, req); goto restart; } - spin_unlock(&fc->lock); + spin_lock(&fpq->lock); + list_add(&req->list, &fpq->io); + spin_unlock(&fpq->lock); cs->req = req; err = fuse_copy_one(cs, &in->h, sizeof(in->h)); if (!err) err = fuse_copy_args(cs, in->numargs, in->argpages, (struct fuse_arg *) in->args, 0); fuse_copy_finish(cs); - spin_lock(&fc->lock); - req->locked = 0; - if (req->aborted) { - request_end(fc, req); - return -ENODEV; + spin_lock(&fpq->lock); + clear_bit(FR_LOCKED, &req->flags); + if (!fpq->connected) { + err = -ENODEV; + goto out_end; } if (err) { req->out.h.error = -EIO; - request_end(fc, req); - return err; + goto out_end; } - if (!req->isreply) - request_end(fc, req); - else { - req->state = FUSE_REQ_SENT; - list_move_tail(&req->list, &fc->processing); - if (req->interrupted) - queue_interrupt(fc, req); - spin_unlock(&fc->lock); + if (!test_bit(FR_ISREPLY, &req->flags)) { + err = reqsize; + goto out_end; } + list_move_tail(&req->list, &fpq->processing); + spin_unlock(&fpq->lock); + set_bit(FR_SENT, &req->flags); + /* matches barrier in request_wait_answer() */ + smp_mb__after_atomic(); + if (test_bit(FR_INTERRUPTED, &req->flags)) + queue_interrupt(fiq, req); + return reqsize; +out_end: + if (!test_bit(FR_PRIVATE, &req->flags)) + list_del_init(&req->list); + spin_unlock(&fpq->lock); + request_end(fc, req); + return err; + err_unlock: - spin_unlock(&fc->lock); + spin_unlock(&fiq->waitq.lock); return err; } @@ -1359,16 +1349,17 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to) { struct fuse_copy_state cs; struct file *file = iocb->ki_filp; - struct fuse_conn *fc = fuse_get_conn(file); - if (!fc) + struct fuse_dev *fud = fuse_get_dev(file); + + if (!fud) return -EPERM; if (!iter_is_iovec(to)) return -EINVAL; - fuse_copy_init(&cs, fc, 1, to); + fuse_copy_init(&cs, 1, to); - return fuse_dev_do_read(fc, file, &cs, iov_iter_count(to)); + return fuse_dev_do_read(fud, file, &cs, iov_iter_count(to)); } static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, @@ -1380,18 +1371,19 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, int do_wakeup = 0; struct pipe_buffer *bufs; struct fuse_copy_state cs; - struct fuse_conn *fc = fuse_get_conn(in); - if (!fc) + struct fuse_dev *fud = fuse_get_dev(in); + + if (!fud) return -EPERM; bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL); if (!bufs) return -ENOMEM; - fuse_copy_init(&cs, fc, 1, NULL); + fuse_copy_init(&cs, 1, NULL); cs.pipebufs = bufs; cs.pipe = pipe; - ret = fuse_dev_do_read(fc, in, &cs, len); + ret = fuse_dev_do_read(fud, in, &cs, len); if (ret < 0) goto out; @@ -1830,11 +1822,11 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, } /* Look up request on processing list by unique ID */ -static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique) +static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique) { struct fuse_req *req; - list_for_each_entry(req, &fc->processing, list) { + list_for_each_entry(req, &fpq->processing, list) { if (req->in.h.unique == unique || req->intr_unique == unique) return req; } @@ -1871,10 +1863,12 @@ static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out, * it from the list and copy the rest of the buffer to the request. * The request is finished by calling request_end() */ -static ssize_t fuse_dev_do_write(struct fuse_conn *fc, +static ssize_t fuse_dev_do_write(struct fuse_dev *fud, struct fuse_copy_state *cs, size_t nbytes) { int err; + struct fuse_conn *fc = fud->fc; + struct fuse_pqueue *fpq = &fud->pq; struct fuse_req *req; struct fuse_out_header oh; @@ -1902,63 +1896,60 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc, if (oh.error <= -1000 || oh.error > 0) goto err_finish; - spin_lock(&fc->lock); + spin_lock(&fpq->lock); err = -ENOENT; - if (!fc->connected) - goto err_unlock; + if (!fpq->connected) + goto err_unlock_pq; - req = request_find(fc, oh.unique); + req = request_find(fpq, oh.unique); if (!req) - goto err_unlock; + goto err_unlock_pq; - if (req->aborted) { - spin_unlock(&fc->lock); - fuse_copy_finish(cs); - spin_lock(&fc->lock); - request_end(fc, req); - return -ENOENT; - } /* Is it an interrupt reply? */ if (req->intr_unique == oh.unique) { + spin_unlock(&fpq->lock); + err = -EINVAL; if (nbytes != sizeof(struct fuse_out_header)) - goto err_unlock; + goto err_finish; if (oh.error == -ENOSYS) fc->no_interrupt = 1; else if (oh.error == -EAGAIN) - queue_interrupt(fc, req); + queue_interrupt(&fc->iq, req); - spin_unlock(&fc->lock); fuse_copy_finish(cs); return nbytes; } - req->state = FUSE_REQ_WRITING; - list_move(&req->list, &fc->io); + clear_bit(FR_SENT, &req->flags); + list_move(&req->list, &fpq->io); req->out.h = oh; - req->locked = 1; + set_bit(FR_LOCKED, &req->flags); + spin_unlock(&fpq->lock); cs->req = req; if (!req->out.page_replace) cs->move_pages = 0; - spin_unlock(&fc->lock); err = copy_out_args(cs, &req->out, nbytes); fuse_copy_finish(cs); - spin_lock(&fc->lock); - req->locked = 0; - if (!err) { - if (req->aborted) - err = -ENOENT; - } else if (!req->aborted) + spin_lock(&fpq->lock); + clear_bit(FR_LOCKED, &req->flags); + if (!fpq->connected) + err = -ENOENT; + else if (err) req->out.h.error = -EIO; + if (!test_bit(FR_PRIVATE, &req->flags)) + list_del_init(&req->list); + spin_unlock(&fpq->lock); + request_end(fc, req); return err ? err : nbytes; - err_unlock: - spin_unlock(&fc->lock); + err_unlock_pq: + spin_unlock(&fpq->lock); err_finish: fuse_copy_finish(cs); return err; @@ -1967,16 +1958,17 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc, static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from) { struct fuse_copy_state cs; - struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp); - if (!fc) + struct fuse_dev *fud = fuse_get_dev(iocb->ki_filp); + + if (!fud) return -EPERM; if (!iter_is_iovec(from)) return -EINVAL; - fuse_copy_init(&cs, fc, 0, from); + fuse_copy_init(&cs, 0, from); - return fuse_dev_do_write(fc, &cs, iov_iter_count(from)); + return fuse_dev_do_write(fud, &cs, iov_iter_count(from)); } static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, @@ -1987,12 +1979,12 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, unsigned idx; struct pipe_buffer *bufs; struct fuse_copy_state cs; - struct fuse_conn *fc; + struct fuse_dev *fud; size_t rem; ssize_t ret; - fc = fuse_get_conn(out); - if (!fc) + fud = fuse_get_dev(out); + if (!fud) return -EPERM; bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL); @@ -2039,7 +2031,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, } pipe_unlock(pipe); - fuse_copy_init(&cs, fc, 0, NULL); + fuse_copy_init(&cs, 0, NULL); cs.pipebufs = bufs; cs.nr_segs = nbuf; cs.pipe = pipe; @@ -2047,7 +2039,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, if (flags & SPLICE_F_MOVE) cs.move_pages = 1; - ret = fuse_dev_do_write(fc, &cs, len); + ret = fuse_dev_do_write(fud, &cs, len); for (idx = 0; idx < nbuf; idx++) { struct pipe_buffer *buf = &bufs[idx]; @@ -2061,18 +2053,21 @@ out: static unsigned fuse_dev_poll(struct file *file, poll_table *wait) { unsigned mask = POLLOUT | POLLWRNORM; - struct fuse_conn *fc = fuse_get_conn(file); - if (!fc) + struct fuse_iqueue *fiq; + struct fuse_dev *fud = fuse_get_dev(file); + + if (!fud) return POLLERR; - poll_wait(file, &fc->waitq, wait); + fiq = &fud->fc->iq; + poll_wait(file, &fiq->waitq, wait); - spin_lock(&fc->lock); - if (!fc->connected) + spin_lock(&fiq->waitq.lock); + if (!fiq->connected) mask = POLLERR; - else if (request_pending(fc)) + else if (request_pending(fiq)) mask |= POLLIN | POLLRDNORM; - spin_unlock(&fc->lock); + spin_unlock(&fiq->waitq.lock); return mask; } @@ -2083,67 +2078,18 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait) * This function releases and reacquires fc->lock */ static void end_requests(struct fuse_conn *fc, struct list_head *head) -__releases(fc->lock) -__acquires(fc->lock) { while (!list_empty(head)) { struct fuse_req *req; req = list_entry(head->next, struct fuse_req, list); req->out.h.error = -ECONNABORTED; - request_end(fc, req); - spin_lock(&fc->lock); - } -} - -/* - * Abort requests under I/O - * - * The requests are set to aborted and finished, and the request - * waiter is woken up. This will make request_wait_answer() wait - * until the request is unlocked and then return. - * - * If the request is asynchronous, then the end function needs to be - * called after waiting for the request to be unlocked (if it was - * locked). - */ -static void end_io_requests(struct fuse_conn *fc) -__releases(fc->lock) -__acquires(fc->lock) -{ - while (!list_empty(&fc->io)) { - struct fuse_req *req = - list_entry(fc->io.next, struct fuse_req, list); - void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; - - req->aborted = 1; - req->out.h.error = -ECONNABORTED; - req->state = FUSE_REQ_FINISHED; + clear_bit(FR_PENDING, &req->flags); + clear_bit(FR_SENT, &req->flags); list_del_init(&req->list); - wake_up(&req->waitq); - if (end) { - req->end = NULL; - __fuse_get_request(req); - spin_unlock(&fc->lock); - wait_event(req->waitq, !req->locked); - end(fc, req); - fuse_put_request(fc, req); - spin_lock(&fc->lock); - } + request_end(fc, req); } } -static void end_queued_requests(struct fuse_conn *fc) -__releases(fc->lock) -__acquires(fc->lock) -{ - fc->max_background = UINT_MAX; - flush_bg_queue(fc); - end_requests(fc, &fc->pending); - end_requests(fc, &fc->processing); - while (forget_pending(fc)) - kfree(dequeue_forget(fc, 1, NULL)); -} - static void end_polls(struct fuse_conn *fc) { struct rb_node *p; @@ -2162,67 +2108,156 @@ static void end_polls(struct fuse_conn *fc) /* * Abort all requests. * - * Emergency exit in case of a malicious or accidental deadlock, or - * just a hung filesystem. + * Emergency exit in case of a malicious or accidental deadlock, or just a hung + * filesystem. * - * The same effect is usually achievable through killing the - * filesystem daemon and all users of the filesystem. The exception - * is the combination of an asynchronous request and the tricky - * deadlock (see Documentation/filesystems/fuse.txt). + * The same effect is usually achievable through killing the filesystem daemon + * and all users of the filesystem. The exception is the combination of an + * asynchronous request and the tricky deadlock (see + * Documentation/filesystems/fuse.txt). * - * During the aborting, progression of requests from the pending and - * processing lists onto the io list, and progression of new requests - * onto the pending list is prevented by req->connected being false. - * - * Progression of requests under I/O to the processing list is - * prevented by the req->aborted flag being true for these requests. - * For this reason requests on the io list must be aborted first. + * Aborting requests under I/O goes as follows: 1: Separate out unlocked + * requests, they should be finished off immediately. Locked requests will be + * finished after unlock; see unlock_request(). 2: Finish off the unlocked + * requests. It is possible that some request will finish before we can. This + * is OK, the request will in that case be removed from the list before we touch + * it. */ void fuse_abort_conn(struct fuse_conn *fc) { + struct fuse_iqueue *fiq = &fc->iq; + spin_lock(&fc->lock); if (fc->connected) { + struct fuse_dev *fud; + struct fuse_req *req, *next; + LIST_HEAD(to_end1); + LIST_HEAD(to_end2); + fc->connected = 0; fc->blocked = 0; fuse_set_initialized(fc); - end_io_requests(fc); - end_queued_requests(fc); + list_for_each_entry(fud, &fc->devices, entry) { + struct fuse_pqueue *fpq = &fud->pq; + + spin_lock(&fpq->lock); + fpq->connected = 0; + list_for_each_entry_safe(req, next, &fpq->io, list) { + req->out.h.error = -ECONNABORTED; + spin_lock(&req->waitq.lock); + set_bit(FR_ABORTED, &req->flags); + if (!test_bit(FR_LOCKED, &req->flags)) { + set_bit(FR_PRIVATE, &req->flags); + list_move(&req->list, &to_end1); + } + spin_unlock(&req->waitq.lock); + } + list_splice_init(&fpq->processing, &to_end2); + spin_unlock(&fpq->lock); + } + fc->max_background = UINT_MAX; + flush_bg_queue(fc); + + spin_lock(&fiq->waitq.lock); + fiq->connected = 0; + list_splice_init(&fiq->pending, &to_end2); + while (forget_pending(fiq)) + kfree(dequeue_forget(fiq, 1, NULL)); + wake_up_all_locked(&fiq->waitq); + spin_unlock(&fiq->waitq.lock); + kill_fasync(&fiq->fasync, SIGIO, POLL_IN); end_polls(fc); - wake_up_all(&fc->waitq); wake_up_all(&fc->blocked_waitq); - kill_fasync(&fc->fasync, SIGIO, POLL_IN); + spin_unlock(&fc->lock); + + while (!list_empty(&to_end1)) { + req = list_first_entry(&to_end1, struct fuse_req, list); + __fuse_get_request(req); + list_del_init(&req->list); + request_end(fc, req); + } + end_requests(fc, &to_end2); + } else { + spin_unlock(&fc->lock); } - spin_unlock(&fc->lock); } EXPORT_SYMBOL_GPL(fuse_abort_conn); int fuse_dev_release(struct inode *inode, struct file *file) { - struct fuse_conn *fc = fuse_get_conn(file); - if (fc) { - spin_lock(&fc->lock); - fc->connected = 0; - fc->blocked = 0; - fuse_set_initialized(fc); - end_queued_requests(fc); - end_polls(fc); - wake_up_all(&fc->blocked_waitq); - spin_unlock(&fc->lock); - fuse_conn_put(fc); - } + struct fuse_dev *fud = fuse_get_dev(file); + if (fud) { + struct fuse_conn *fc = fud->fc; + struct fuse_pqueue *fpq = &fud->pq; + + WARN_ON(!list_empty(&fpq->io)); + end_requests(fc, &fpq->processing); + /* Are we the last open device? */ + if (atomic_dec_and_test(&fc->dev_count)) { + WARN_ON(fc->iq.fasync != NULL); + fuse_abort_conn(fc); + } + fuse_dev_free(fud); + } return 0; } EXPORT_SYMBOL_GPL(fuse_dev_release); static int fuse_dev_fasync(int fd, struct file *file, int on) { - struct fuse_conn *fc = fuse_get_conn(file); - if (!fc) + struct fuse_dev *fud = fuse_get_dev(file); + + if (!fud) return -EPERM; /* No locking - fasync_helper does its own locking */ - return fasync_helper(fd, file, on, &fc->fasync); + return fasync_helper(fd, file, on, &fud->fc->iq.fasync); +} + +static int fuse_device_clone(struct fuse_conn *fc, struct file *new) +{ + struct fuse_dev *fud; + + if (new->private_data) + return -EINVAL; + + fud = fuse_dev_alloc(fc); + if (!fud) + return -ENOMEM; + + new->private_data = fud; + atomic_inc(&fc->dev_count); + + return 0; +} + +static long fuse_dev_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + int err = -ENOTTY; + + if (cmd == FUSE_DEV_IOC_CLONE) { + int oldfd; + + err = -EFAULT; + if (!get_user(oldfd, (__u32 __user *) arg)) { + struct file *old = fget(oldfd); + + err = -EINVAL; + if (old) { + struct fuse_dev *fud = fuse_get_dev(old); + + if (fud) { + mutex_lock(&fuse_mutex); + err = fuse_device_clone(fud->fc, file); + mutex_unlock(&fuse_mutex); + } + fput(old); + } + } + } + return err; } const struct file_operations fuse_dev_operations = { @@ -2236,6 +2271,8 @@ const struct file_operations fuse_dev_operations = { .poll = fuse_dev_poll, .release = fuse_dev_release, .fasync = fuse_dev_fasync, + .unlocked_ioctl = fuse_dev_ioctl, + .compat_ioctl = fuse_dev_ioctl, }; EXPORT_SYMBOL_GPL(fuse_dev_operations); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 8c5e2fa68835..f523f2f04c19 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -96,17 +96,17 @@ static void fuse_file_put(struct fuse_file *ff, bool sync) * Drop the release request when client does not * implement 'open' */ - req->background = 0; + __clear_bit(FR_BACKGROUND, &req->flags); iput(req->misc.release.inode); fuse_put_request(ff->fc, req); } else if (sync) { - req->background = 0; + __clear_bit(FR_BACKGROUND, &req->flags); fuse_request_send(ff->fc, req); iput(req->misc.release.inode); fuse_put_request(ff->fc, req); } else { req->end = fuse_release_end; - req->background = 1; + __set_bit(FR_BACKGROUND, &req->flags); fuse_request_send_background(ff->fc, req); } kfree(ff); @@ -299,8 +299,8 @@ void fuse_sync_release(struct fuse_file *ff, int flags) { WARN_ON(atomic_read(&ff->count) > 1); fuse_prepare_release(ff, flags, FUSE_RELEASE); - ff->reserved_req->force = 1; - ff->reserved_req->background = 0; + __set_bit(FR_FORCE, &ff->reserved_req->flags); + __clear_bit(FR_BACKGROUND, &ff->reserved_req->flags); fuse_request_send(ff->fc, ff->reserved_req); fuse_put_request(ff->fc, ff->reserved_req); kfree(ff); @@ -426,7 +426,7 @@ static int fuse_flush(struct file *file, fl_owner_t id) req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; - req->force = 1; + __set_bit(FR_FORCE, &req->flags); fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); @@ -1169,7 +1169,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (err <= 0) goto out; - err = file_remove_suid(file); + err = file_remove_privs(file); if (err) goto out; @@ -1611,7 +1611,8 @@ static int fuse_writepage_locked(struct page *page) if (!req) goto err; - req->background = 1; /* writeback always goes to bg_queue */ + /* writeback always goes to bg_queue */ + __set_bit(FR_BACKGROUND, &req->flags); tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); if (!tmp_page) goto err_free; @@ -1742,8 +1743,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req, } } - if (old_req->num_pages == 1 && (old_req->state == FUSE_REQ_INIT || - old_req->state == FUSE_REQ_PENDING)) { + if (old_req->num_pages == 1 && test_bit(FR_PENDING, &old_req->flags)) { struct backing_dev_info *bdi = inode_to_bdi(page->mapping->host); copy_highpage(old_req->pages[0], page); @@ -1830,7 +1830,7 @@ static int fuse_writepages_fill(struct page *page, req->misc.write.in.write_flags |= FUSE_WRITE_CACHE; req->misc.write.next = NULL; req->in.argpages = 1; - req->background = 1; + __set_bit(FR_BACKGROUND, &req->flags); req->num_pages = 0; req->end = fuse_writepage_end; req->inode = inode; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 7354dc142a50..405113101db8 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -241,16 +241,6 @@ struct fuse_args { #define FUSE_ARGS(args) struct fuse_args args = {} -/** The request state */ -enum fuse_req_state { - FUSE_REQ_INIT = 0, - FUSE_REQ_PENDING, - FUSE_REQ_READING, - FUSE_REQ_SENT, - FUSE_REQ_WRITING, - FUSE_REQ_FINISHED -}; - /** The request IO state (for asynchronous processing) */ struct fuse_io_priv { int async; @@ -267,7 +257,40 @@ struct fuse_io_priv { }; /** + * Request flags + * + * FR_ISREPLY: set if the request has reply + * FR_FORCE: force sending of the request even if interrupted + * FR_BACKGROUND: request is sent in the background + * FR_WAITING: request is counted as "waiting" + * FR_ABORTED: the request was aborted + * FR_INTERRUPTED: the request has been interrupted + * FR_LOCKED: data is being copied to/from the request + * FR_PENDING: request is not yet in userspace + * FR_SENT: request is in userspace, waiting for an answer + * FR_FINISHED: request is finished + * FR_PRIVATE: request is on private list + */ +enum fuse_req_flag { + FR_ISREPLY, + FR_FORCE, + FR_BACKGROUND, + FR_WAITING, + FR_ABORTED, + FR_INTERRUPTED, + FR_LOCKED, + FR_PENDING, + FR_SENT, + FR_FINISHED, + FR_PRIVATE, +}; + +/** * A request to the client + * + * .waitq.lock protects the following fields: + * - FR_ABORTED + * - FR_LOCKED (may also be modified under fc->lock, tested under both) */ struct fuse_req { /** This can be on either pending processing or io lists in @@ -283,35 +306,8 @@ struct fuse_req { /** Unique ID for the interrupt request */ u64 intr_unique; - /* - * The following bitfields are either set once before the - * request is queued or setting/clearing them is protected by - * fuse_conn->lock - */ - - /** True if the request has reply */ - unsigned isreply:1; - - /** Force sending of the request even if interrupted */ - unsigned force:1; - - /** The request was aborted */ - unsigned aborted:1; - - /** Request is sent in the background */ - unsigned background:1; - - /** The request has been interrupted */ - unsigned interrupted:1; - - /** Data is being copied to/from the request */ - unsigned locked:1; - - /** Request is counted as "waiting" */ - unsigned waiting:1; - - /** State of the request */ - enum fuse_req_state state; + /* Request flags, updated with test/set/clear_bit() */ + unsigned long flags; /** The request input */ struct fuse_in in; @@ -380,6 +376,61 @@ struct fuse_req { struct file *stolen_file; }; +struct fuse_iqueue { + /** Connection established */ + unsigned connected; + + /** Readers of the connection are waiting on this */ + wait_queue_head_t waitq; + + /** The next unique request id */ + u64 reqctr; + + /** The list of pending requests */ + struct list_head pending; + + /** Pending interrupts */ + struct list_head interrupts; + + /** Queue of pending forgets */ + struct fuse_forget_link forget_list_head; + struct fuse_forget_link *forget_list_tail; + + /** Batching of FORGET requests (positive indicates FORGET batch) */ + int forget_batch; + + /** O_ASYNC requests */ + struct fasync_struct *fasync; +}; + +struct fuse_pqueue { + /** Connection established */ + unsigned connected; + + /** Lock protecting accessess to members of this structure */ + spinlock_t lock; + + /** The list of requests being processed */ + struct list_head processing; + + /** The list of requests under I/O */ + struct list_head io; +}; + +/** + * Fuse device instance + */ +struct fuse_dev { + /** Fuse connection for this device */ + struct fuse_conn *fc; + + /** Processing queue */ + struct fuse_pqueue pq; + + /** list entry on fc->devices */ + struct list_head entry; +}; + /** * A Fuse connection. * @@ -394,6 +445,9 @@ struct fuse_conn { /** Refcount */ atomic_t count; + /** Number of fuse_dev's */ + atomic_t dev_count; + struct rcu_head rcu; /** The user id for this mount */ @@ -411,17 +465,8 @@ struct fuse_conn { /** Maximum write size */ unsigned max_write; - /** Readers of the connection are waiting on this */ - wait_queue_head_t waitq; - - /** The list of pending requests */ - struct list_head pending; - - /** The list of requests being processed */ - struct list_head processing; - - /** The list of requests under I/O */ - struct list_head io; + /** Input queue */ + struct fuse_iqueue iq; /** The next unique kernel file handle */ u64 khctr; @@ -444,16 +489,6 @@ struct fuse_conn { /** The list of background requests set aside for later queuing */ struct list_head bg_queue; - /** Pending interrupts */ - struct list_head interrupts; - - /** Queue of pending forgets */ - struct fuse_forget_link forget_list_head; - struct fuse_forget_link *forget_list_tail; - - /** Batching of FORGET requests (positive indicates FORGET batch) */ - int forget_batch; - /** Flag indicating that INIT reply has been received. Allocating * any fuse request will be suspended until the flag is set */ int initialized; @@ -469,9 +504,6 @@ struct fuse_conn { /** waitq for reserved requests */ wait_queue_head_t reserved_req_waitq; - /** The next unique request id */ - u64 reqctr; - /** Connection established, cleared on umount, connection abort and device release */ unsigned connected; @@ -594,9 +626,6 @@ struct fuse_conn { /** number of dentries used in the above array */ int ctl_ndents; - /** O_ASYNC requests */ - struct fasync_struct *fasync; - /** Key for lock owner ID scrambling */ u32 scramble_key[4]; @@ -614,6 +643,9 @@ struct fuse_conn { /** Read/write semaphore to hold when accessing sb. */ struct rw_semaphore killsb; + + /** List of device instances belonging to this connection */ + struct list_head devices; }; static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) @@ -826,6 +858,9 @@ void fuse_conn_init(struct fuse_conn *fc); */ void fuse_conn_put(struct fuse_conn *fc); +struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc); +void fuse_dev_free(struct fuse_dev *fud); + /** * Add connection to control filesystem */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 082ac1c97f39..2913db2a5b99 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -362,8 +362,8 @@ static void fuse_send_destroy(struct fuse_conn *fc) if (req && fc->conn_init) { fc->destroy_req = NULL; req->in.h.opcode = FUSE_DESTROY; - req->force = 1; - req->background = 0; + __set_bit(FR_FORCE, &req->flags); + __clear_bit(FR_BACKGROUND, &req->flags); fuse_request_send(fc, req); fuse_put_request(fc, req); } @@ -567,30 +567,46 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root) return 0; } +static void fuse_iqueue_init(struct fuse_iqueue *fiq) +{ + memset(fiq, 0, sizeof(struct fuse_iqueue)); + init_waitqueue_head(&fiq->waitq); + INIT_LIST_HEAD(&fiq->pending); + INIT_LIST_HEAD(&fiq->interrupts); + fiq->forget_list_tail = &fiq->forget_list_head; + fiq->connected = 1; +} + +static void fuse_pqueue_init(struct fuse_pqueue *fpq) +{ + memset(fpq, 0, sizeof(struct fuse_pqueue)); + spin_lock_init(&fpq->lock); + INIT_LIST_HEAD(&fpq->processing); + INIT_LIST_HEAD(&fpq->io); + fpq->connected = 1; +} + void fuse_conn_init(struct fuse_conn *fc) { memset(fc, 0, sizeof(*fc)); spin_lock_init(&fc->lock); init_rwsem(&fc->killsb); atomic_set(&fc->count, 1); - init_waitqueue_head(&fc->waitq); + atomic_set(&fc->dev_count, 1); init_waitqueue_head(&fc->blocked_waitq); init_waitqueue_head(&fc->reserved_req_waitq); - INIT_LIST_HEAD(&fc->pending); - INIT_LIST_HEAD(&fc->processing); - INIT_LIST_HEAD(&fc->io); - INIT_LIST_HEAD(&fc->interrupts); + fuse_iqueue_init(&fc->iq); INIT_LIST_HEAD(&fc->bg_queue); INIT_LIST_HEAD(&fc->entry); - fc->forget_list_tail = &fc->forget_list_head; + INIT_LIST_HEAD(&fc->devices); atomic_set(&fc->num_waiting, 0); fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND; fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD; fc->khctr = 0; fc->polled_files = RB_ROOT; - fc->reqctr = 0; fc->blocked = 0; fc->initialized = 0; + fc->connected = 1; fc->attr_version = 1; get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); } @@ -930,6 +946,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) static void fuse_free_conn(struct fuse_conn *fc) { + WARN_ON(!list_empty(&fc->devices)); kfree_rcu(fc, rcu); } @@ -975,8 +992,42 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) return 0; } +struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc) +{ + struct fuse_dev *fud; + + fud = kzalloc(sizeof(struct fuse_dev), GFP_KERNEL); + if (fud) { + fud->fc = fuse_conn_get(fc); + fuse_pqueue_init(&fud->pq); + + spin_lock(&fc->lock); + list_add_tail(&fud->entry, &fc->devices); + spin_unlock(&fc->lock); + } + + return fud; +} +EXPORT_SYMBOL_GPL(fuse_dev_alloc); + +void fuse_dev_free(struct fuse_dev *fud) +{ + struct fuse_conn *fc = fud->fc; + + if (fc) { + spin_lock(&fc->lock); + list_del(&fud->entry); + spin_unlock(&fc->lock); + + fuse_conn_put(fc); + } + kfree(fud); +} +EXPORT_SYMBOL_GPL(fuse_dev_free); + static int fuse_fill_super(struct super_block *sb, void *data, int silent) { + struct fuse_dev *fud; struct fuse_conn *fc; struct inode *root; struct fuse_mount_data d; @@ -1026,12 +1077,17 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) goto err_fput; fuse_conn_init(fc); + fc->release = fuse_free_conn; + + fud = fuse_dev_alloc(fc); + if (!fud) + goto err_put_conn; fc->dev = sb->s_dev; fc->sb = sb; err = fuse_bdi_init(fc, sb); if (err) - goto err_put_conn; + goto err_dev_free; sb->s_bdi = &fc->bdi; @@ -1040,7 +1096,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) fc->dont_mask = 1; sb->s_flags |= MS_POSIXACL; - fc->release = fuse_free_conn; fc->flags = d.flags; fc->user_id = d.user_id; fc->group_id = d.group_id; @@ -1053,14 +1108,14 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) root = fuse_get_root_inode(sb, d.rootmode); root_dentry = d_make_root(root); if (!root_dentry) - goto err_put_conn; + goto err_dev_free; /* only now - we want root dentry with NULL ->d_op */ sb->s_d_op = &fuse_dentry_operations; init_req = fuse_request_alloc(0); if (!init_req) goto err_put_root; - init_req->background = 1; + __set_bit(FR_BACKGROUND, &init_req->flags); if (is_bdev) { fc->destroy_req = fuse_request_alloc(0); @@ -1079,8 +1134,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) list_add_tail(&fc->entry, &fuse_conn_list); sb->s_root = root_dentry; - fc->connected = 1; - file->private_data = fuse_conn_get(fc); + file->private_data = fud; mutex_unlock(&fuse_mutex); /* * atomic_dec_and_test() in fput() provides the necessary @@ -1099,6 +1153,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) fuse_request_free(init_req); err_put_root: dput(root_dentry); + err_dev_free: + fuse_dev_free(fud); err_put_conn: fuse_bdi_destroy(fc); fuse_conn_put(fc); @@ -1238,7 +1294,6 @@ static void fuse_fs_cleanup(void) } static struct kobject *fuse_kobj; -static struct kobject *connections_kobj; static int fuse_sysfs_init(void) { @@ -1250,11 +1305,9 @@ static int fuse_sysfs_init(void) goto out_err; } - connections_kobj = kobject_create_and_add("connections", fuse_kobj); - if (!connections_kobj) { - err = -ENOMEM; + err = sysfs_create_mount_point(fuse_kobj, "connections"); + if (err) goto out_fuse_unregister; - } return 0; @@ -1266,7 +1319,7 @@ static int fuse_sysfs_init(void) static void fuse_sysfs_cleanup(void) { - kobject_put(connections_kobj); + sysfs_remove_mount_point(fuse_kobj, "connections"); kobject_put(fuse_kobj); } diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index 95d255219b1e..1f1c7dcbcc2f 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -252,7 +252,7 @@ extern void hfs_mark_mdb_dirty(struct super_block *sb); #define __hfs_u_to_mtime(sec) cpu_to_be32(sec + 2082844800U - sys_tz.tz_minuteswest * 60) #define __hfs_m_to_utime(sec) (be32_to_cpu(sec) - 2082844800U + sys_tz.tz_minuteswest * 60) -#define HFS_I(inode) (list_entry(inode, struct hfs_inode_info, vfs_inode)) +#define HFS_I(inode) (container_of(inode, struct hfs_inode_info, vfs_inode)) #define HFS_SB(sb) ((struct hfs_sb_info *)(sb)->s_fs_info) #define hfs_m_to_utime(time) (struct timespec){ .tv_sec = __hfs_m_to_utime(time) } diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index b0441d65fa54..f91a1faf819e 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -263,7 +263,7 @@ struct hfsplus_inode_info { static inline struct hfsplus_inode_info *HFSPLUS_I(struct inode *inode) { - return list_entry(inode, struct hfsplus_inode_info, vfs_inode); + return container_of(inode, struct hfsplus_inode_info, vfs_inode); } /* diff --git a/fs/hpfs/alloc.c b/fs/hpfs/alloc.c index f005046e1591..d6a4b55d2ab0 100644 --- a/fs/hpfs/alloc.c +++ b/fs/hpfs/alloc.c @@ -484,3 +484,98 @@ struct anode *hpfs_alloc_anode(struct super_block *s, secno near, anode_secno *a a->btree.first_free = cpu_to_le16(8); return a; } + +static unsigned find_run(__le32 *bmp, unsigned *idx) +{ + unsigned len; + while (tstbits(bmp, *idx, 1)) { + (*idx)++; + if (unlikely(*idx >= 0x4000)) + return 0; + } + len = 1; + while (!tstbits(bmp, *idx + len, 1)) + len++; + return len; +} + +static int do_trim(struct super_block *s, secno start, unsigned len, secno limit_start, secno limit_end, unsigned minlen, unsigned *result) +{ + int err; + secno end; + if (fatal_signal_pending(current)) + return -EINTR; + end = start + len; + if (start < limit_start) + start = limit_start; + if (end > limit_end) + end = limit_end; + if (start >= end) + return 0; + if (end - start < minlen) + return 0; + err = sb_issue_discard(s, start, end - start, GFP_NOFS, 0); + if (err) + return err; + *result += end - start; + return 0; +} + +int hpfs_trim_fs(struct super_block *s, u64 start, u64 end, u64 minlen, unsigned *result) +{ + int err = 0; + struct hpfs_sb_info *sbi = hpfs_sb(s); + unsigned idx, len, start_bmp, end_bmp; + __le32 *bmp; + struct quad_buffer_head qbh; + + *result = 0; + if (!end || end > sbi->sb_fs_size) + end = sbi->sb_fs_size; + if (start >= sbi->sb_fs_size) + return 0; + if (minlen > 0x4000) + return 0; + if (start < sbi->sb_dirband_start + sbi->sb_dirband_size && end > sbi->sb_dirband_start) { + hpfs_lock(s); + if (s->s_flags & MS_RDONLY) { + err = -EROFS; + goto unlock_1; + } + if (!(bmp = hpfs_map_dnode_bitmap(s, &qbh))) { + err = -EIO; + goto unlock_1; + } + idx = 0; + while ((len = find_run(bmp, &idx)) && !err) { + err = do_trim(s, sbi->sb_dirband_start + idx * 4, len * 4, start, end, minlen, result); + idx += len; + } + hpfs_brelse4(&qbh); +unlock_1: + hpfs_unlock(s); + } + start_bmp = start >> 14; + end_bmp = (end + 0x3fff) >> 14; + while (start_bmp < end_bmp && !err) { + hpfs_lock(s); + if (s->s_flags & MS_RDONLY) { + err = -EROFS; + goto unlock_2; + } + if (!(bmp = hpfs_map_bitmap(s, start_bmp, &qbh, "trim"))) { + err = -EIO; + goto unlock_2; + } + idx = 0; + while ((len = find_run(bmp, &idx)) && !err) { + err = do_trim(s, (start_bmp << 14) + idx, len, start, end, minlen, result); + idx += len; + } + hpfs_brelse4(&qbh); +unlock_2: + hpfs_unlock(s); + start_bmp++; + } + return err; +} diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index 2a8e07425de0..dc540bfcee1d 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -327,4 +327,5 @@ const struct file_operations hpfs_dir_ops = .iterate = hpfs_readdir, .release = hpfs_dir_release, .fsync = hpfs_file_fsync, + .unlocked_ioctl = hpfs_ioctl, }; diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index 6d8cfe9b52d6..7ca28d604bf7 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -203,6 +203,7 @@ const struct file_operations hpfs_file_ops = .release = hpfs_file_release, .fsync = hpfs_file_fsync, .splice_read = generic_file_splice_read, + .unlocked_ioctl = hpfs_ioctl, }; const struct inode_operations hpfs_file_iops = diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index b63b75fa00e7..c4867b5116dd 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -18,6 +18,8 @@ #include <linux/pagemap.h> #include <linux/buffer_head.h> #include <linux/slab.h> +#include <linux/sched.h> +#include <linux/blkdev.h> #include <asm/unaligned.h> #include "hpfs.h" @@ -200,6 +202,7 @@ void hpfs_free_dnode(struct super_block *, secno); struct dnode *hpfs_alloc_dnode(struct super_block *, secno, dnode_secno *, struct quad_buffer_head *); struct fnode *hpfs_alloc_fnode(struct super_block *, secno, fnode_secno *, struct buffer_head **); struct anode *hpfs_alloc_anode(struct super_block *, secno, anode_secno *, struct buffer_head **); +int hpfs_trim_fs(struct super_block *, u64, u64, u64, unsigned *); /* anode.c */ @@ -304,7 +307,7 @@ extern const struct address_space_operations hpfs_symlink_aops; static inline struct hpfs_inode_info *hpfs_i(struct inode *inode) { - return list_entry(inode, struct hpfs_inode_info, vfs_inode); + return container_of(inode, struct hpfs_inode_info, vfs_inode); } static inline struct hpfs_sb_info *hpfs_sb(struct super_block *sb) @@ -318,6 +321,7 @@ __printf(2, 3) void hpfs_error(struct super_block *, const char *, ...); int hpfs_stop_cycles(struct super_block *, int, int *, int *, char *); unsigned hpfs_get_free_dnodes(struct super_block *); +long hpfs_ioctl(struct file *file, unsigned cmd, unsigned long arg); /* * local time (HPFS) to GMT (Unix) diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 7cd00d3a7c9b..68a9bed05628 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -52,17 +52,20 @@ static void unmark_dirty(struct super_block *s) } /* Filesystem error... */ -static char err_buf[1024]; - void hpfs_error(struct super_block *s, const char *fmt, ...) { + struct va_format vaf; va_list args; va_start(args, fmt); - vsnprintf(err_buf, sizeof(err_buf), fmt, args); + + vaf.fmt = fmt; + vaf.va = &args; + + pr_err("filesystem error: %pV", &vaf); + va_end(args); - pr_err("filesystem error: %s", err_buf); if (!hpfs_sb(s)->sb_was_error) { if (hpfs_sb(s)->sb_err == 2) { pr_cont("; crashing the system because you wanted it\n"); @@ -196,12 +199,39 @@ static int hpfs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } + +long hpfs_ioctl(struct file *file, unsigned cmd, unsigned long arg) +{ + switch (cmd) { + case FITRIM: { + struct fstrim_range range; + secno n_trimmed; + int r; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range))) + return -EFAULT; + r = hpfs_trim_fs(file_inode(file)->i_sb, range.start >> 9, (range.start + range.len) >> 9, (range.minlen + 511) >> 9, &n_trimmed); + if (r) + return r; + range.len = (u64)n_trimmed << 9; + if (copy_to_user((struct fstrim_range __user *)arg, &range, sizeof(range))) + return -EFAULT; + return 0; + } + default: { + return -ENOIOCTLCMD; + } + } +} + + static struct kmem_cache * hpfs_inode_cachep; static struct inode *hpfs_alloc_inode(struct super_block *sb) { struct hpfs_inode_info *ei; - ei = (struct hpfs_inode_info *)kmem_cache_alloc(hpfs_inode_cachep, GFP_NOFS); + ei = kmem_cache_alloc(hpfs_inode_cachep, GFP_NOFS); if (!ei) return NULL; ei->vfs_inode.i_version = 1; @@ -424,11 +454,14 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) int o; struct hpfs_sb_info *sbi = hpfs_sb(s); char *new_opts = kstrdup(data, GFP_KERNEL); - + + if (!new_opts) + return -ENOMEM; + sync_filesystem(s); *flags |= MS_NOATIME; - + hpfs_lock(s); uid = sbi->sb_uid; gid = sbi->sb_gid; umask = 0777 & ~sbi->sb_mode; diff --git a/fs/inode.c b/fs/inode.c index 069721f0cc0e..d30640f7a193 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -841,7 +841,11 @@ unsigned int get_next_ino(void) } #endif - *p = ++res; + res++; + /* get_next_ino should not provide a 0 inode number */ + if (unlikely(!res)) + res++; + *p = res; put_cpu_var(last_ino); return res; } @@ -1674,7 +1678,31 @@ int should_remove_suid(struct dentry *dentry) } EXPORT_SYMBOL(should_remove_suid); -static int __remove_suid(struct dentry *dentry, int kill) +/* + * Return mask of changes for notify_change() that need to be done as a + * response to write or truncate. Return 0 if nothing has to be changed. + * Negative value on error (change should be denied). + */ +int dentry_needs_remove_privs(struct dentry *dentry) +{ + struct inode *inode = d_inode(dentry); + int mask = 0; + int ret; + + if (IS_NOSEC(inode)) + return 0; + + mask = should_remove_suid(dentry); + ret = security_inode_need_killpriv(dentry); + if (ret < 0) + return ret; + if (ret) + mask |= ATTR_KILL_PRIV; + return mask; +} +EXPORT_SYMBOL(dentry_needs_remove_privs); + +static int __remove_privs(struct dentry *dentry, int kill) { struct iattr newattrs; @@ -1686,33 +1714,32 @@ static int __remove_suid(struct dentry *dentry, int kill) return notify_change(dentry, &newattrs, NULL); } -int file_remove_suid(struct file *file) +/* + * Remove special file priviledges (suid, capabilities) when file is written + * to or truncated. + */ +int file_remove_privs(struct file *file) { struct dentry *dentry = file->f_path.dentry; struct inode *inode = d_inode(dentry); - int killsuid; - int killpriv; + int kill; int error = 0; /* Fast path for nothing security related */ if (IS_NOSEC(inode)) return 0; - killsuid = should_remove_suid(dentry); - killpriv = security_inode_need_killpriv(dentry); - - if (killpriv < 0) - return killpriv; - if (killpriv) - error = security_inode_killpriv(dentry); - if (!error && killsuid) - error = __remove_suid(dentry, killsuid); - if (!error && (inode->i_sb->s_flags & MS_NOSEC)) - inode->i_flags |= S_NOSEC; + kill = file_needs_remove_privs(file); + if (kill < 0) + return kill; + if (kill) + error = __remove_privs(dentry, kill); + if (!error) + inode_has_no_xattr(inode); return error; } -EXPORT_SYMBOL(file_remove_suid); +EXPORT_SYMBOL(file_remove_privs); /** * file_update_time - update mtime and ctime time @@ -1967,9 +1994,8 @@ EXPORT_SYMBOL(inode_dio_wait); * inode is being instantiated). The reason for the cmpxchg() loop * --- which wouldn't be necessary if all code paths which modify * i_flags actually followed this rule, is that there is at least one - * code path which doesn't today --- for example, - * __generic_file_aio_write() calls file_remove_suid() without holding - * i_mutex --- so we use cmpxchg() out of an abundance of caution. + * code path which doesn't today so we use cmpxchg() out of an abundance + * of caution. * * In the long run, i_mutex is overkill, and we should probably look * at using the i_lock spinlock to protect i_flags, and then make sure diff --git a/fs/internal.h b/fs/internal.h index 01dce1d1476b..4d5af583ab03 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -107,6 +107,7 @@ extern struct file *do_file_open_root(struct dentry *, struct vfsmount *, extern long do_handle_open(int mountdirfd, struct file_handle __user *ufh, int open_flag); extern int open_check_o_direct(struct file *f); +extern int vfs_open(const struct path *, struct file *, const struct cred *); /* * inode.c diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index d200a9b8fd5e..824e61ede465 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -19,7 +19,7 @@ struct kstatfs; struct kvec; -#define JFFS2_INODE_INFO(i) (list_entry(i, struct jffs2_inode_info, vfs_inode)) +#define JFFS2_INODE_INFO(i) (container_of(i, struct jffs2_inode_info, vfs_inode)) #define OFNI_EDONI_2SFFJ(f) (&(f)->vfs_inode) #define JFFS2_SB_INFO(sb) (sb->s_fs_info) #define OFNI_BS_2SFFJ(c) ((struct super_block *)c->os_priv) diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c index 93a1232894f6..8db8b7d61e40 100644 --- a/fs/jfs/ioctl.c +++ b/fs/jfs/ioctl.c @@ -180,9 +180,6 @@ long jfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case JFS_IOC_SETFLAGS32: cmd = JFS_IOC_SETFLAGS; break; - case FITRIM: - cmd = FITRIM; - break; } return jfs_ioctl(filp, cmd, arg); } diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index fa7e795bd8ae..1f26d1910409 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h @@ -206,7 +206,7 @@ struct jfs_sb_info { static inline struct jfs_inode_info *JFS_IP(struct inode *inode) { - return list_entry(inode, struct jfs_inode_info, vfs_inode); + return container_of(inode, struct jfs_inode_info, vfs_inode); } static inline int jfs_dirtable_inline(struct inode *inode) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index fffca9517321..2d48d28e1640 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -592,6 +592,9 @@ int kernfs_add_one(struct kernfs_node *kn) goto out_unlock; ret = -ENOENT; + if (parent->flags & KERNFS_EMPTY_DIR) + goto out_unlock; + if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent)) goto out_unlock; @@ -783,6 +786,38 @@ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, return ERR_PTR(rc); } +/** + * kernfs_create_empty_dir - create an always empty directory + * @parent: parent in which to create a new directory + * @name: name of the new directory + * + * Returns the created node on success, ERR_PTR() value on failure. + */ +struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, + const char *name) +{ + struct kernfs_node *kn; + int rc; + + /* allocate */ + kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR, KERNFS_DIR); + if (!kn) + return ERR_PTR(-ENOMEM); + + kn->flags |= KERNFS_EMPTY_DIR; + kn->dir.root = parent->dir.root; + kn->ns = NULL; + kn->priv = NULL; + + /* link in */ + rc = kernfs_add_one(kn); + if (!rc) + return kn; + + kernfs_put(kn); + return ERR_PTR(rc); +} + static struct dentry *kernfs_iop_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) @@ -1254,7 +1289,8 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, mutex_lock(&kernfs_mutex); error = -ENOENT; - if (!kernfs_active(kn) || !kernfs_active(new_parent)) + if (!kernfs_active(kn) || !kernfs_active(new_parent) || + (new_parent->flags & KERNFS_EMPTY_DIR)) goto out; error = 0; diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index 2da8493a380b..756dd56aaf60 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c @@ -296,6 +296,8 @@ static void kernfs_init_inode(struct kernfs_node *kn, struct inode *inode) case KERNFS_DIR: inode->i_op = &kernfs_dir_iops; inode->i_fop = &kernfs_dir_fops; + if (kn->flags & KERNFS_EMPTY_DIR) + make_empty_dir_inode(inode); break; case KERNFS_FILE: inode->i_size = kn->attr.size; diff --git a/fs/libfs.c b/fs/libfs.c index 65e1feca8b98..102edfd39000 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -20,11 +20,6 @@ #include "internal.h" -static inline int simple_positive(struct dentry *dentry) -{ - return d_really_is_positive(dentry) && !d_unhashed(dentry); -} - int simple_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { @@ -1108,3 +1103,98 @@ const struct inode_operations simple_symlink_inode_operations = { .readlink = generic_readlink }; EXPORT_SYMBOL(simple_symlink_inode_operations); + +/* + * Operations for a permanently empty directory. + */ +static struct dentry *empty_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) +{ + return ERR_PTR(-ENOENT); +} + +static int empty_dir_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + struct inode *inode = d_inode(dentry); + generic_fillattr(inode, stat); + return 0; +} + +static int empty_dir_setattr(struct dentry *dentry, struct iattr *attr) +{ + return -EPERM; +} + +static int empty_dir_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + return -EOPNOTSUPP; +} + +static ssize_t empty_dir_getxattr(struct dentry *dentry, const char *name, + void *value, size_t size) +{ + return -EOPNOTSUPP; +} + +static int empty_dir_removexattr(struct dentry *dentry, const char *name) +{ + return -EOPNOTSUPP; +} + +static ssize_t empty_dir_listxattr(struct dentry *dentry, char *list, size_t size) +{ + return -EOPNOTSUPP; +} + +static const struct inode_operations empty_dir_inode_operations = { + .lookup = empty_dir_lookup, + .permission = generic_permission, + .setattr = empty_dir_setattr, + .getattr = empty_dir_getattr, + .setxattr = empty_dir_setxattr, + .getxattr = empty_dir_getxattr, + .removexattr = empty_dir_removexattr, + .listxattr = empty_dir_listxattr, +}; + +static loff_t empty_dir_llseek(struct file *file, loff_t offset, int whence) +{ + /* An empty directory has two entries . and .. at offsets 0 and 1 */ + return generic_file_llseek_size(file, offset, whence, 2, 2); +} + +static int empty_dir_readdir(struct file *file, struct dir_context *ctx) +{ + dir_emit_dots(file, ctx); + return 0; +} + +static const struct file_operations empty_dir_operations = { + .llseek = empty_dir_llseek, + .read = generic_read_dir, + .iterate = empty_dir_readdir, + .fsync = noop_fsync, +}; + + +void make_empty_dir_inode(struct inode *inode) +{ + set_nlink(inode, 2); + inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; + inode->i_rdev = 0; + inode->i_size = 2; + inode->i_blkbits = PAGE_SHIFT; + inode->i_blocks = 0; + + inode->i_op = &empty_dir_inode_operations; + inode->i_fop = &empty_dir_operations; +} + +bool is_empty_dir_inode(struct inode *inode) +{ + return (inode->i_fop == &empty_dir_operations) && + (inode->i_op == &empty_dir_inode_operations); +} diff --git a/fs/minix/dir.c b/fs/minix/dir.c index 118e4e7bc935..d19ac258105a 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c @@ -45,11 +45,6 @@ minix_last_byte(struct inode *inode, unsigned long page_nr) return last_byte; } -static inline unsigned long dir_pages(struct inode *inode) -{ - return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; -} - static int dir_commit_chunk(struct page *page, loff_t pos, unsigned len) { struct address_space *mapping = page->mapping; diff --git a/fs/minix/minix.h b/fs/minix/minix.h index 1ebd11854622..01ad81dcacc5 100644 --- a/fs/minix/minix.h +++ b/fs/minix/minix.h @@ -84,7 +84,7 @@ static inline struct minix_sb_info *minix_sb(struct super_block *sb) static inline struct minix_inode_info *minix_i(struct inode *inode) { - return list_entry(inode, struct minix_inode_info, vfs_inode); + return container_of(inode, struct minix_inode_info, vfs_inode); } static inline unsigned minix_blocks_needed(unsigned bits, unsigned blocksize) diff --git a/fs/namei.c b/fs/namei.c index 2dad0eaf91d3..ae4e4c18b2ac 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -792,7 +792,7 @@ static void set_root(struct nameidata *nd) get_fs_root(current->fs, &nd->root); } -static unsigned set_root_rcu(struct nameidata *nd) +static void set_root_rcu(struct nameidata *nd) { struct fs_struct *fs = current->fs; unsigned seq; @@ -802,7 +802,6 @@ static unsigned set_root_rcu(struct nameidata *nd) nd->root = fs->root; nd->root_seq = __read_seqcount_begin(&nd->root.dentry->d_seq); } while (read_seqcount_retry(&fs->seq, seq)); - return nd->root_seq; } static void path_put_conditional(struct path *path, struct nameidata *nd) @@ -1998,7 +1997,8 @@ static const char *path_init(struct nameidata *nd, unsigned flags) if (*s == '/') { if (flags & LOOKUP_RCU) { rcu_read_lock(); - nd->seq = set_root_rcu(nd); + set_root_rcu(nd); + nd->seq = nd->root_seq; } else { set_root(nd); path_get(&nd->root); diff --git a/fs/namespace.c b/fs/namespace.c index e99f1f4e00cd..c7cb8a526c05 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2343,6 +2343,8 @@ unlock: return err; } +static bool fs_fully_visible(struct file_system_type *fs_type, int *new_mnt_flags); + /* * create a new mount for userspace and request it to be added into the * namespace's tree @@ -2374,6 +2376,10 @@ static int do_new_mount(struct path *path, const char *fstype, int flags, flags |= MS_NODEV; mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV; } + if (type->fs_flags & FS_USERNS_VISIBLE) { + if (!fs_fully_visible(type, &mnt_flags)) + return -EPERM; + } } mnt = vfs_kern_mount(type, flags, name, data); @@ -3175,9 +3181,10 @@ bool current_chrooted(void) return chrooted; } -bool fs_fully_visible(struct file_system_type *type) +static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags) { struct mnt_namespace *ns = current->nsproxy->mnt_ns; + int new_flags = *new_mnt_flags; struct mount *mnt; bool visible = false; @@ -3196,16 +3203,36 @@ bool fs_fully_visible(struct file_system_type *type) if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root) continue; - /* This mount is not fully visible if there are any child mounts - * that cover anything except for empty directories. + /* Verify the mount flags are equal to or more permissive + * than the proposed new mount. + */ + if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) && + !(new_flags & MNT_READONLY)) + continue; + if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) && + !(new_flags & MNT_NODEV)) + continue; + if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) && + ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK))) + continue; + + /* This mount is not fully visible if there are any + * locked child mounts that cover anything except for + * empty directories. */ list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { struct inode *inode = child->mnt_mountpoint->d_inode; - if (!S_ISDIR(inode->i_mode)) - goto next; - if (inode->i_nlink > 2) + /* Only worry about locked mounts */ + if (!(mnt->mnt.mnt_flags & MNT_LOCKED)) + continue; + /* Is the directory permanetly empty? */ + if (!is_empty_dir_inode(inode)) goto next; } + /* Preserve the locked attributes */ + *new_mnt_flags |= mnt->mnt.mnt_flags & (MNT_LOCK_READONLY | \ + MNT_LOCK_NODEV | \ + MNT_LOCK_ATIME); visible = true; goto found; next: ; diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 80021c709af9..93575e91a7aa 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -1145,6 +1145,8 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry, case 0x00: ncp_dbg(1, "renamed %pd -> %pd\n", old_dentry, new_dentry); + ncp_d_prune(old_dentry); + ncp_d_prune(new_dentry); break; case 0x9E: error = -ENAMETOOLONG; diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 8d129bb7355a..682529c00996 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -458,7 +458,7 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp) * pg_authenticate method for nfsv4 callback threads. * * The authflavor has been negotiated, so an incorrect flavor is a server - * bug. Drop packets with incorrect authflavor. + * bug. Deny packets with incorrect authflavor. * * All other checking done after NFS decoding where the nfs_client can be * found in nfs4_callback_compound @@ -468,12 +468,12 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp) switch (rqstp->rq_authop->flavour) { case RPC_AUTH_NULL: if (rqstp->rq_proc != CB_NULL) - return SVC_DROP; + return SVC_DENIED; break; case RPC_AUTH_GSS: /* No RPC_AUTH_GSS support yet in NFSv4.1 */ if (svc_is_backchannel(rqstp)) - return SVC_DROP; + return SVC_DENIED; } return SVC_OK; } diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 197806fb87ff..29e3c1b011b7 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -327,10 +327,8 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) dprintk("%s slot table seqid: %u\n", __func__, slot->seq_nr); /* Normal */ - if (likely(args->csa_sequenceid == slot->seq_nr + 1)) { - slot->seq_nr++; + if (likely(args->csa_sequenceid == slot->seq_nr + 1)) goto out_ok; - } /* Replay */ if (args->csa_sequenceid == slot->seq_nr) { @@ -418,6 +416,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, struct cb_process_state *cps) { struct nfs4_slot_table *tbl; + struct nfs4_slot *slot; struct nfs_client *clp; int i; __be32 status = htonl(NFS4ERR_BADSESSION); @@ -429,25 +428,32 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, if (!(clp->cl_session->flags & SESSION4_BACK_CHAN)) goto out; + tbl = &clp->cl_session->bc_slot_table; + slot = tbl->slots + args->csa_slotid; spin_lock(&tbl->slot_tbl_lock); /* state manager is resetting the session */ if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) { - spin_unlock(&tbl->slot_tbl_lock); status = htonl(NFS4ERR_DELAY); /* Return NFS4ERR_BADSESSION if we're draining the session * in order to reset it. */ if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) status = htonl(NFS4ERR_BADSESSION); - goto out; + goto out_unlock; } - status = validate_seqid(&clp->cl_session->bc_slot_table, args); - spin_unlock(&tbl->slot_tbl_lock); + memcpy(&res->csr_sessionid, &args->csa_sessionid, + sizeof(res->csr_sessionid)); + res->csr_sequenceid = args->csa_sequenceid; + res->csr_slotid = args->csa_slotid; + res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; + res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; + + status = validate_seqid(tbl, args); if (status) - goto out; + goto out_unlock; cps->slotid = args->csa_slotid; @@ -458,15 +464,17 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, */ if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists)) { status = htonl(NFS4ERR_DELAY); - goto out; + goto out_unlock; } - memcpy(&res->csr_sessionid, &args->csa_sessionid, - sizeof(res->csr_sessionid)); - res->csr_sequenceid = args->csa_sequenceid; - res->csr_slotid = args->csa_slotid; - res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; - res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; + /* + * RFC5661 20.9.3 + * If CB_SEQUENCE returns an error, then the state of the slot + * (sequence ID, cached reply) MUST NOT change. + */ + slot->seq_nr++; +out_unlock: + spin_unlock(&tbl->slot_tbl_lock); out: cps->clp = clp; /* put in nfs4_callback_compound */ diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 19ca95cdfd9b..6b1697a01dde 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -909,7 +909,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r xdr_init_encode(&xdr_out, &rqstp->rq_res, p); status = decode_compound_hdr_arg(&xdr_in, &hdr_arg); - if (status == __constant_htonl(NFS4ERR_RESOURCE)) + if (status == htonl(NFS4ERR_RESOURCE)) return rpc_garbage_args; if (hdr_arg.minorversion == 0) { diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 892aefff3630..ecebb406cc1a 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -825,7 +825,6 @@ error: * Load up the server record from information gained in an fsinfo record */ static void nfs_server_set_fsinfo(struct nfs_server *server, - struct nfs_fh *mntfh, struct nfs_fsinfo *fsinfo) { unsigned long max_rpc_payload; @@ -901,7 +900,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs if (error < 0) goto out_error; - nfs_server_set_fsinfo(server, mntfh, &fsinfo); + nfs_server_set_fsinfo(server, &fsinfo); /* Get some general file system info */ if (server->namelen == 0) { @@ -1193,8 +1192,6 @@ void nfs_clients_init(struct net *net) } #ifdef CONFIG_PROC_FS -static struct proc_dir_entry *proc_fs_nfs; - static int nfs_server_list_open(struct inode *inode, struct file *file); static void *nfs_server_list_start(struct seq_file *p, loff_t *pos); static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos); @@ -1364,27 +1361,29 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) { struct nfs_server *server; struct nfs_client *clp; - char dev[8], fsid[17]; + char dev[13]; // 8 for 2^24, 1 for ':', 3 for 2^8, 1 for '\0' + char fsid[34]; // 2 * 16 for %llx, 1 for ':', 1 for '\0' struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); /* display header on line 1 */ if (v == &nn->nfs_volume_list) { - seq_puts(m, "NV SERVER PORT DEV FSID FSC\n"); + seq_puts(m, "NV SERVER PORT DEV FSID" + " FSC\n"); return 0; } /* display one transport per line on subsequent lines */ server = list_entry(v, struct nfs_server, master_link); clp = server->nfs_client; - snprintf(dev, 8, "%u:%u", + snprintf(dev, sizeof(dev), "%u:%u", MAJOR(server->s_dev), MINOR(server->s_dev)); - snprintf(fsid, 17, "%llx:%llx", + snprintf(fsid, sizeof(fsid), "%llx:%llx", (unsigned long long) server->fsid.major, (unsigned long long) server->fsid.minor); rcu_read_lock(); - seq_printf(m, "v%u %s %s %-7s %-17s %s\n", + seq_printf(m, "v%u %s %s %-12s %-33s %s\n", clp->rpc_ops->version, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), @@ -1434,27 +1433,20 @@ void nfs_fs_proc_net_exit(struct net *net) */ int __init nfs_fs_proc_init(void) { - struct proc_dir_entry *p; - - proc_fs_nfs = proc_mkdir("fs/nfsfs", NULL); - if (!proc_fs_nfs) + if (!proc_mkdir("fs/nfsfs", NULL)) goto error_0; /* a file of servers with which we're dealing */ - p = proc_symlink("servers", proc_fs_nfs, "../../net/nfsfs/servers"); - if (!p) + if (!proc_symlink("fs/nfsfs/servers", NULL, "../../net/nfsfs/servers")) goto error_1; /* a file of volumes that we have mounted */ - p = proc_symlink("volumes", proc_fs_nfs, "../../net/nfsfs/volumes"); - if (!p) - goto error_2; - return 0; + if (!proc_symlink("fs/nfsfs/volumes", NULL, "../../net/nfsfs/volumes")) + goto error_1; -error_2: - remove_proc_entry("servers", proc_fs_nfs); + return 0; error_1: - remove_proc_entry("fs/nfsfs", NULL); + remove_proc_subtree("fs/nfsfs", NULL); error_0: return -ENOMEM; } @@ -1464,9 +1456,7 @@ error_0: */ void nfs_fs_proc_exit(void) { - remove_proc_entry("volumes", proc_fs_nfs); - remove_proc_entry("servers", proc_fs_nfs); - remove_proc_entry("fs/nfsfs", NULL); + remove_proc_subtree("fs/nfsfs", NULL); } #endif /* CONFIG_PROC_FS */ diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index b2c8b31b2be7..547308a5ec6f 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1470,9 +1470,6 @@ static int nfs_finish_open(struct nfs_open_context *ctx, { int err; - if ((open_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) - *opened |= FILE_CREATED; - err = finish_open(file, dentry, do_open, opened); if (err) goto out; @@ -1771,7 +1768,7 @@ EXPORT_SYMBOL_GPL(nfs_mkdir); static void nfs_dentry_handle_enoent(struct dentry *dentry) { - if (d_really_is_positive(dentry) && !d_unhashed(dentry)) + if (simple_positive(dentry)) d_delete(dentry); } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 8b8d83a526ce..cc4fa1ed61fc 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -555,31 +555,22 @@ static int nfs_launder_page(struct page *page) return nfs_wb_page(inode, page); } -#ifdef CONFIG_NFS_SWAP static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, sector_t *span) { - int ret; struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); *span = sis->pages; - rcu_read_lock(); - ret = xs_swapper(rcu_dereference(clnt->cl_xprt), 1); - rcu_read_unlock(); - - return ret; + return rpc_clnt_swap_activate(clnt); } static void nfs_swap_deactivate(struct file *file) { struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); - rcu_read_lock(); - xs_swapper(rcu_dereference(clnt->cl_xprt), 0); - rcu_read_unlock(); + rpc_clnt_swap_deactivate(clnt); } -#endif const struct address_space_operations nfs_file_aops = { .readpage = nfs_readpage, @@ -596,10 +587,8 @@ const struct address_space_operations nfs_file_aops = { .launder_page = nfs_launder_page, .is_dirty_writeback = nfs_check_dirty_writeback, .error_remove_page = generic_error_remove_page, -#ifdef CONFIG_NFS_SWAP .swap_activate = nfs_swap_activate, .swap_deactivate = nfs_swap_deactivate, -#endif }; /* diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 7d05089e52d6..c12951b9551e 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -20,6 +20,7 @@ #include "../nfs4trace.h" #include "../iostat.h" #include "../nfs.h" +#include "../nfs42.h" #define NFSDBG_FACILITY NFSDBG_PNFS_LD @@ -182,17 +183,14 @@ static void _ff_layout_free_lseg(struct nfs4_ff_layout_segment *fls) static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls) { - struct nfs4_ff_layout_mirror *tmp; int i, j; for (i = 0; i < fls->mirror_array_cnt - 1; i++) { for (j = i + 1; j < fls->mirror_array_cnt; j++) if (fls->mirror_array[i]->efficiency < - fls->mirror_array[j]->efficiency) { - tmp = fls->mirror_array[i]; - fls->mirror_array[i] = fls->mirror_array[j]; - fls->mirror_array[j] = tmp; - } + fls->mirror_array[j]->efficiency) + swap(fls->mirror_array[i], + fls->mirror_array[j]); } } @@ -274,6 +272,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, spin_lock_init(&fls->mirror_array[i]->lock); fls->mirror_array[i]->ds_count = ds_count; + fls->mirror_array[i]->lseg = &fls->generic_hdr; /* deviceid */ rc = decode_deviceid(&stream, &devid); @@ -344,6 +343,10 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, fls->mirror_array[i]->gid); } + p = xdr_inline_decode(&stream, 4); + if (p) + fls->flags = be32_to_cpup(p); + ff_layout_sort_mirrors(fls); rc = ff_layout_check_layout(lgr); if (rc) @@ -415,6 +418,146 @@ ff_layout_get_lseg_count(struct nfs4_ff_layout_segment *fls) return 1; } +static void +nfs4_ff_start_busy_timer(struct nfs4_ff_busy_timer *timer) +{ + /* first IO request? */ + if (atomic_inc_return(&timer->n_ops) == 1) { + timer->start_time = ktime_get(); + } +} + +static ktime_t +nfs4_ff_end_busy_timer(struct nfs4_ff_busy_timer *timer) +{ + ktime_t start, now; + + if (atomic_dec_return(&timer->n_ops) < 0) + WARN_ON_ONCE(1); + + now = ktime_get(); + start = timer->start_time; + timer->start_time = now; + return ktime_sub(now, start); +} + +static ktime_t +nfs4_ff_layout_calc_completion_time(struct rpc_task *task) +{ + return ktime_sub(ktime_get(), task->tk_start); +} + +static bool +nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror, + struct nfs4_ff_layoutstat *layoutstat) +{ + static const ktime_t notime = {0}; + ktime_t now = ktime_get(); + + nfs4_ff_start_busy_timer(&layoutstat->busy_timer); + if (ktime_equal(mirror->start_time, notime)) + mirror->start_time = now; + if (ktime_equal(mirror->last_report_time, notime)) + mirror->last_report_time = now; + if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >= + FF_LAYOUTSTATS_REPORT_INTERVAL) { + mirror->last_report_time = now; + return true; + } + + return false; +} + +static void +nfs4_ff_layout_stat_io_update_requested(struct nfs4_ff_layoutstat *layoutstat, + __u64 requested) +{ + struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat; + + iostat->ops_requested++; + iostat->bytes_requested += requested; +} + +static void +nfs4_ff_layout_stat_io_update_completed(struct nfs4_ff_layoutstat *layoutstat, + __u64 requested, + __u64 completed, + ktime_t time_completed) +{ + struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat; + ktime_t timer; + + iostat->ops_completed++; + iostat->bytes_completed += completed; + iostat->bytes_not_delivered += requested - completed; + + timer = nfs4_ff_end_busy_timer(&layoutstat->busy_timer); + iostat->total_busy_time = + ktime_add(iostat->total_busy_time, timer); + iostat->aggregate_completion_time = + ktime_add(iostat->aggregate_completion_time, time_completed); +} + +static void +nfs4_ff_layout_stat_io_start_read(struct nfs4_ff_layout_mirror *mirror, + __u64 requested) +{ + bool report; + + spin_lock(&mirror->lock); + report = nfs4_ff_layoutstat_start_io(mirror, &mirror->read_stat); + nfs4_ff_layout_stat_io_update_requested(&mirror->read_stat, requested); + spin_unlock(&mirror->lock); + + if (report) + pnfs_report_layoutstat(mirror->lseg->pls_layout->plh_inode); +} + +static void +nfs4_ff_layout_stat_io_end_read(struct rpc_task *task, + struct nfs4_ff_layout_mirror *mirror, + __u64 requested, + __u64 completed) +{ + spin_lock(&mirror->lock); + nfs4_ff_layout_stat_io_update_completed(&mirror->read_stat, + requested, completed, + nfs4_ff_layout_calc_completion_time(task)); + spin_unlock(&mirror->lock); +} + +static void +nfs4_ff_layout_stat_io_start_write(struct nfs4_ff_layout_mirror *mirror, + __u64 requested) +{ + bool report; + + spin_lock(&mirror->lock); + report = nfs4_ff_layoutstat_start_io(mirror , &mirror->write_stat); + nfs4_ff_layout_stat_io_update_requested(&mirror->write_stat, requested); + spin_unlock(&mirror->lock); + + if (report) + pnfs_report_layoutstat(mirror->lseg->pls_layout->plh_inode); +} + +static void +nfs4_ff_layout_stat_io_end_write(struct rpc_task *task, + struct nfs4_ff_layout_mirror *mirror, + __u64 requested, + __u64 completed, + enum nfs3_stable_how committed) +{ + if (committed == NFS_UNSTABLE) + requested = completed = 0; + + spin_lock(&mirror->lock); + nfs4_ff_layout_stat_io_update_completed(&mirror->write_stat, + requested, completed, + nfs4_ff_layout_calc_completion_time(task)); + spin_unlock(&mirror->lock); +} + static int ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo, @@ -631,7 +774,7 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs) nfs_direct_set_resched_writes(hdr->dreq); /* fake unstable write to let common nfs resend pages */ hdr->verf.committed = NFS_UNSTABLE; - hdr->good_bytes = 0; + hdr->good_bytes = hdr->args.count; } return; } @@ -879,6 +1022,12 @@ static int ff_layout_read_done_cb(struct rpc_task *task, return 0; } +static bool +ff_layout_need_layoutcommit(struct pnfs_layout_segment *lseg) +{ + return !(FF_LAYOUT_LSEG(lseg)->flags & FF_FLAGS_NO_LAYOUTCOMMIT); +} + /* * We reference the rpc_cred of the first WRITE that triggers the need for * a LAYOUTCOMMIT, and use it to send the layoutcommit compound. @@ -891,6 +1040,9 @@ static int ff_layout_read_done_cb(struct rpc_task *task, static void ff_layout_set_layoutcommit(struct nfs_pgio_header *hdr) { + if (!ff_layout_need_layoutcommit(hdr->lseg)) + return; + pnfs_set_layoutcommit(hdr->inode, hdr->lseg, hdr->mds_offset + hdr->res.count); dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, @@ -909,6 +1061,10 @@ ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx) static int ff_layout_read_prepare_common(struct rpc_task *task, struct nfs_pgio_header *hdr) { + nfs4_ff_layout_stat_io_start_read( + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), + hdr->args.count); + if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { rpc_exit(task, -EIO); return -EIO; @@ -962,15 +1118,15 @@ static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; - if (ff_layout_read_prepare_common(task, hdr)) - return; - if (ff_layout_setup_sequence(hdr->ds_clp, &hdr->args.seq_args, &hdr->res.seq_res, task)) return; + if (ff_layout_read_prepare_common(task, hdr)) + return; + if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, hdr->args.lock_context, FMODE_READ) == -EIO) rpc_exit(task, -EIO); /* lost lock, terminate I/O */ @@ -982,6 +1138,10 @@ static void ff_layout_read_call_done(struct rpc_task *task, void *data) dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); + nfs4_ff_layout_stat_io_end_read(task, + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), + hdr->args.count, hdr->res.count); + if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && task->tk_status == 0) { nfs4_sequence_done(task, &hdr->res.seq_res); @@ -1074,7 +1234,8 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, return -EAGAIN; } - if (data->verf.committed == NFS_UNSTABLE) + if (data->verf.committed == NFS_UNSTABLE + && ff_layout_need_layoutcommit(data->lseg)) pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb); return 0; @@ -1083,6 +1244,10 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, static int ff_layout_write_prepare_common(struct rpc_task *task, struct nfs_pgio_header *hdr) { + nfs4_ff_layout_stat_io_start_write( + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), + hdr->args.count); + if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { rpc_exit(task, -EIO); return -EIO; @@ -1116,15 +1281,15 @@ static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; - if (ff_layout_write_prepare_common(task, hdr)) - return; - if (ff_layout_setup_sequence(hdr->ds_clp, &hdr->args.seq_args, &hdr->res.seq_res, task)) return; + if (ff_layout_write_prepare_common(task, hdr)) + return; + if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, hdr->args.lock_context, FMODE_WRITE) == -EIO) rpc_exit(task, -EIO); /* lost lock, terminate I/O */ @@ -1134,6 +1299,11 @@ static void ff_layout_write_call_done(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; + nfs4_ff_layout_stat_io_end_write(task, + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), + hdr->args.count, hdr->res.count, + hdr->res.verf->committed); + if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && task->tk_status == 0) { nfs4_sequence_done(task, &hdr->res.seq_res); @@ -1152,8 +1322,17 @@ static void ff_layout_write_count_stats(struct rpc_task *task, void *data) &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]); } +static void ff_layout_commit_prepare_common(struct rpc_task *task, + struct nfs_commit_data *cdata) +{ + nfs4_ff_layout_stat_io_start_write( + FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), + 0); +} + static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data) { + ff_layout_commit_prepare_common(task, data); rpc_call_start(task); } @@ -1161,10 +1340,30 @@ static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data) { struct nfs_commit_data *wdata = data; - ff_layout_setup_sequence(wdata->ds_clp, + if (ff_layout_setup_sequence(wdata->ds_clp, &wdata->args.seq_args, &wdata->res.seq_res, - task); + task)) + return; + ff_layout_commit_prepare_common(task, data); +} + +static void ff_layout_commit_done(struct rpc_task *task, void *data) +{ + struct nfs_commit_data *cdata = data; + struct nfs_page *req; + __u64 count = 0; + + if (task->tk_status == 0) { + list_for_each_entry(req, &cdata->pages, wb_list) + count += req->wb_bytes; + } + + nfs4_ff_layout_stat_io_end_write(task, + FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), + count, count, NFS_FILE_SYNC); + + pnfs_generic_write_commit_done(task, data); } static void ff_layout_commit_count_stats(struct rpc_task *task, void *data) @@ -1205,14 +1404,14 @@ static const struct rpc_call_ops ff_layout_write_call_ops_v4 = { static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = { .rpc_call_prepare = ff_layout_commit_prepare_v3, - .rpc_call_done = pnfs_generic_write_commit_done, + .rpc_call_done = ff_layout_commit_done, .rpc_count_stats = ff_layout_commit_count_stats, .rpc_release = pnfs_generic_commit_release, }; static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = { .rpc_call_prepare = ff_layout_commit_prepare_v4, - .rpc_call_done = pnfs_generic_write_commit_done, + .rpc_call_done = ff_layout_commit_done, .rpc_count_stats = ff_layout_commit_count_stats, .rpc_release = pnfs_generic_commit_release, }; @@ -1256,7 +1455,6 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) fh = nfs4_ff_layout_select_ds_fh(lseg, idx); if (fh) hdr->args.fh = fh; - /* * Note that if we ever decide to split across DSes, * then we may need to handle dense-like offsets. @@ -1385,6 +1583,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); if (fh) data->args.fh = fh; + return nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops, vers == 3 ? &ff_layout_commit_call_ops_v3 : &ff_layout_commit_call_ops_v4, @@ -1488,6 +1687,247 @@ out: dprintk("%s: Return\n", __func__); } +static int +ff_layout_ntop4(const struct sockaddr *sap, char *buf, const size_t buflen) +{ + const struct sockaddr_in *sin = (struct sockaddr_in *)sap; + + return snprintf(buf, buflen, "%pI4", &sin->sin_addr); +} + +static size_t +ff_layout_ntop6_noscopeid(const struct sockaddr *sap, char *buf, + const int buflen) +{ + const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; + const struct in6_addr *addr = &sin6->sin6_addr; + + /* + * RFC 4291, Section 2.2.2 + * + * Shorthanded ANY address + */ + if (ipv6_addr_any(addr)) + return snprintf(buf, buflen, "::"); + + /* + * RFC 4291, Section 2.2.2 + * + * Shorthanded loopback address + */ + if (ipv6_addr_loopback(addr)) + return snprintf(buf, buflen, "::1"); + + /* + * RFC 4291, Section 2.2.3 + * + * Special presentation address format for mapped v4 + * addresses. + */ + if (ipv6_addr_v4mapped(addr)) + return snprintf(buf, buflen, "::ffff:%pI4", + &addr->s6_addr32[3]); + + /* + * RFC 4291, Section 2.2.1 + */ + return snprintf(buf, buflen, "%pI6c", addr); +} + +/* Derived from rpc_sockaddr2uaddr */ +static void +ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da) +{ + struct sockaddr *sap = (struct sockaddr *)&da->da_addr; + char portbuf[RPCBIND_MAXUADDRPLEN]; + char addrbuf[RPCBIND_MAXUADDRLEN]; + char *netid; + unsigned short port; + int len, netid_len; + __be32 *p; + + switch (sap->sa_family) { + case AF_INET: + if (ff_layout_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0) + return; + port = ntohs(((struct sockaddr_in *)sap)->sin_port); + netid = "tcp"; + netid_len = 3; + break; + case AF_INET6: + if (ff_layout_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf)) == 0) + return; + port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port); + netid = "tcp6"; + netid_len = 4; + break; + default: + /* we only support tcp and tcp6 */ + WARN_ON_ONCE(1); + return; + } + + snprintf(portbuf, sizeof(portbuf), ".%u.%u", port >> 8, port & 0xff); + len = strlcat(addrbuf, portbuf, sizeof(addrbuf)); + + p = xdr_reserve_space(xdr, 4 + netid_len); + xdr_encode_opaque(p, netid, netid_len); + + p = xdr_reserve_space(xdr, 4 + len); + xdr_encode_opaque(p, addrbuf, len); +} + +static void +ff_layout_encode_nfstime(struct xdr_stream *xdr, + ktime_t t) +{ + struct timespec64 ts; + __be32 *p; + + p = xdr_reserve_space(xdr, 12); + ts = ktime_to_timespec64(t); + p = xdr_encode_hyper(p, ts.tv_sec); + *p++ = cpu_to_be32(ts.tv_nsec); +} + +static void +ff_layout_encode_io_latency(struct xdr_stream *xdr, + struct nfs4_ff_io_stat *stat) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, 5 * 8); + p = xdr_encode_hyper(p, stat->ops_requested); + p = xdr_encode_hyper(p, stat->bytes_requested); + p = xdr_encode_hyper(p, stat->ops_completed); + p = xdr_encode_hyper(p, stat->bytes_completed); + p = xdr_encode_hyper(p, stat->bytes_not_delivered); + ff_layout_encode_nfstime(xdr, stat->total_busy_time); + ff_layout_encode_nfstime(xdr, stat->aggregate_completion_time); +} + +static void +ff_layout_encode_layoutstats(struct xdr_stream *xdr, + struct nfs42_layoutstat_args *args, + struct nfs42_layoutstat_devinfo *devinfo) +{ + struct nfs4_ff_layout_mirror *mirror = devinfo->layout_private; + struct nfs4_pnfs_ds_addr *da; + struct nfs4_pnfs_ds *ds = mirror->mirror_ds->ds; + struct nfs_fh *fh = &mirror->fh_versions[0]; + __be32 *p, *start; + + da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node); + dprintk("%s: DS %s: encoding address %s\n", + __func__, ds->ds_remotestr, da->da_remotestr); + /* layoutupdate length */ + start = xdr_reserve_space(xdr, 4); + /* netaddr4 */ + ff_layout_encode_netaddr(xdr, da); + /* nfs_fh4 */ + p = xdr_reserve_space(xdr, 4 + fh->size); + xdr_encode_opaque(p, fh->data, fh->size); + /* ff_io_latency4 read */ + spin_lock(&mirror->lock); + ff_layout_encode_io_latency(xdr, &mirror->read_stat.io_stat); + /* ff_io_latency4 write */ + ff_layout_encode_io_latency(xdr, &mirror->write_stat.io_stat); + spin_unlock(&mirror->lock); + /* nfstime4 */ + ff_layout_encode_nfstime(xdr, ktime_sub(ktime_get(), mirror->start_time)); + /* bool */ + p = xdr_reserve_space(xdr, 4); + *p = cpu_to_be32(false); + + *start = cpu_to_be32((xdr->p - start - 1) * 4); +} + +static bool +ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args, + struct pnfs_layout_segment *pls, + int *dev_count, int dev_limit) +{ + struct nfs4_ff_layout_mirror *mirror; + struct nfs4_deviceid_node *dev; + struct nfs42_layoutstat_devinfo *devinfo; + int i; + + for (i = 0; i <= FF_LAYOUT_MIRROR_COUNT(pls); i++) { + if (*dev_count >= dev_limit) + break; + mirror = FF_LAYOUT_COMP(pls, i); + if (!mirror || !mirror->mirror_ds) + continue; + dev = FF_LAYOUT_DEVID_NODE(pls, i); + devinfo = &args->devinfo[*dev_count]; + memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE); + devinfo->offset = pls->pls_range.offset; + devinfo->length = pls->pls_range.length; + /* well, we don't really know if IO is continuous or not! */ + devinfo->read_count = mirror->read_stat.io_stat.bytes_completed; + devinfo->read_bytes = mirror->read_stat.io_stat.bytes_completed; + devinfo->write_count = mirror->write_stat.io_stat.bytes_completed; + devinfo->write_bytes = mirror->write_stat.io_stat.bytes_completed; + devinfo->layout_type = LAYOUT_FLEX_FILES; + devinfo->layoutstats_encode = ff_layout_encode_layoutstats; + devinfo->layout_private = mirror; + /* lseg refcount put in cleanup_layoutstats */ + pnfs_get_lseg(pls); + + ++(*dev_count); + } + + return *dev_count < dev_limit; +} + +static int +ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args) +{ + struct pnfs_layout_segment *pls; + int dev_count = 0; + + spin_lock(&args->inode->i_lock); + list_for_each_entry(pls, &NFS_I(args->inode)->layout->plh_segs, pls_list) { + dev_count += FF_LAYOUT_MIRROR_COUNT(pls); + } + spin_unlock(&args->inode->i_lock); + /* For now, send at most PNFS_LAYOUTSTATS_MAXDEV statistics */ + if (dev_count > PNFS_LAYOUTSTATS_MAXDEV) { + dprintk("%s: truncating devinfo to limit (%d:%d)\n", + __func__, dev_count, PNFS_LAYOUTSTATS_MAXDEV); + dev_count = PNFS_LAYOUTSTATS_MAXDEV; + } + args->devinfo = kmalloc(dev_count * sizeof(*args->devinfo), GFP_KERNEL); + if (!args->devinfo) + return -ENOMEM; + + dev_count = 0; + spin_lock(&args->inode->i_lock); + list_for_each_entry(pls, &NFS_I(args->inode)->layout->plh_segs, pls_list) { + if (!ff_layout_mirror_prepare_stats(args, pls, &dev_count, + PNFS_LAYOUTSTATS_MAXDEV)) { + break; + } + } + spin_unlock(&args->inode->i_lock); + args->num_dev = dev_count; + + return 0; +} + +static void +ff_layout_cleanup_layoutstats(struct nfs42_layoutstat_data *data) +{ + struct nfs4_ff_layout_mirror *mirror; + int i; + + for (i = 0; i < data->args.num_dev; i++) { + mirror = data->args.devinfo[i].layout_private; + data->args.devinfo[i].layout_private = NULL; + pnfs_put_lseg(mirror->lseg); + } +} + static struct pnfs_layoutdriver_type flexfilelayout_type = { .id = LAYOUT_FLEX_FILES, .name = "LAYOUT_FLEX_FILES", @@ -1510,6 +1950,8 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = { .alloc_deviceid_node = ff_layout_alloc_deviceid_node, .encode_layoutreturn = ff_layout_encode_layoutreturn, .sync = pnfs_nfs_generic_sync, + .prepare_layoutstats = ff_layout_prepare_layoutstats, + .cleanup_layoutstats = ff_layout_cleanup_layoutstats, }; static int __init nfs4flexfilelayout_init(void) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 070f20445b2d..f92f9a0a856b 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -9,12 +9,17 @@ #ifndef FS_NFS_NFS4FLEXFILELAYOUT_H #define FS_NFS_NFS4FLEXFILELAYOUT_H +#define FF_FLAGS_NO_LAYOUTCOMMIT 1 + #include "../pnfs.h" /* XXX: Let's filter out insanely large mirror count for now to avoid oom * due to network error etc. */ #define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096 +/* LAYOUTSTATS report interval in ms */ +#define FF_LAYOUTSTATS_REPORT_INTERVAL (60000L) + struct nfs4_ff_ds_version { u32 version; u32 minor_version; @@ -41,24 +46,48 @@ struct nfs4_ff_layout_ds_err { struct nfs4_deviceid deviceid; }; +struct nfs4_ff_io_stat { + __u64 ops_requested; + __u64 bytes_requested; + __u64 ops_completed; + __u64 bytes_completed; + __u64 bytes_not_delivered; + ktime_t total_busy_time; + ktime_t aggregate_completion_time; +}; + +struct nfs4_ff_busy_timer { + ktime_t start_time; + atomic_t n_ops; +}; + +struct nfs4_ff_layoutstat { + struct nfs4_ff_io_stat io_stat; + struct nfs4_ff_busy_timer busy_timer; +}; + struct nfs4_ff_layout_mirror { + struct pnfs_layout_segment *lseg; /* back pointer */ u32 ds_count; u32 efficiency; struct nfs4_ff_layout_ds *mirror_ds; u32 fh_versions_cnt; struct nfs_fh *fh_versions; nfs4_stateid stateid; - struct nfs4_string user_name; - struct nfs4_string group_name; u32 uid; u32 gid; struct rpc_cred *cred; spinlock_t lock; + struct nfs4_ff_layoutstat read_stat; + struct nfs4_ff_layoutstat write_stat; + ktime_t start_time; + ktime_t last_report_time; }; struct nfs4_ff_layout_segment { struct pnfs_layout_segment generic_hdr; u64 stripe_unit; + u32 flags; u32 mirror_array_cnt; struct nfs4_ff_layout_mirror **mirror_array; }; diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 77a2d026aa12..f13e1969eedd 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -324,7 +324,8 @@ static int ff_layout_update_mirror_cred(struct nfs4_ff_layout_mirror *mirror, __func__, PTR_ERR(cred)); return PTR_ERR(cred); } else { - mirror->cred = cred; + if (cmpxchg(&mirror->cred, NULL, cred)) + put_rpccred(cred); } } return 0; @@ -386,7 +387,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ smp_rmb(); if (ds->ds_clp) - goto out; + goto out_update_creds; flavor = nfs4_ff_layout_choose_authflavor(mirror); @@ -430,7 +431,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, } } } - +out_update_creds: if (ff_layout_update_mirror_cred(mirror, ds)) ds = NULL; out: diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f734562c6d24..b77b328a06d7 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -678,6 +678,8 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) if (!err) { generic_fillattr(inode, stat); stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); + if (S_ISDIR(inode->i_mode)) + stat->blksize = NFS_SERVER(inode)->dtsize; } out: trace_nfs_getattr_exit(inode, err); @@ -2008,17 +2010,15 @@ static int __init init_nfs_fs(void) if (err) goto out1; -#ifdef CONFIG_PROC_FS rpc_proc_register(&init_net, &nfs_rpcstat); -#endif - if ((err = register_nfs_fs()) != 0) + + err = register_nfs_fs(); + if (err) goto out0; return 0; out0: -#ifdef CONFIG_PROC_FS rpc_proc_unregister(&init_net, "nfs"); -#endif nfs_destroy_directcache(); out1: nfs_destroy_writepagecache(); @@ -2049,9 +2049,7 @@ static void __exit exit_nfs_fs(void) nfs_destroy_nfspagecache(); nfs_fscache_unregister(); unregister_pernet_subsys(&nfs_net_ops); -#ifdef CONFIG_PROC_FS rpc_proc_unregister(&init_net, "nfs"); -#endif unregister_nfs_fs(); nfs_fs_proc_exit(); nfsiod_stop(); diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 53852a4bd88b..9b04c2e6fffc 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1342,7 +1342,7 @@ static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req, if (args->npages != 0) xdr_write_pages(xdr, args->pages, 0, args->len); else - xdr_reserve_space(xdr, NFS_ACL_INLINE_BUFSIZE); + xdr_reserve_space(xdr, args->len); error = nfsacl_encode(xdr->buf, base, args->inode, (args->mask & NFS_ACL) ? diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index 7afb8947dfdf..ff66ae700b89 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -5,11 +5,18 @@ #ifndef __LINUX_FS_NFS_NFS4_2_H #define __LINUX_FS_NFS_NFS4_2_H +/* + * FIXME: four LAYOUTSTATS calls per compound at most! Do we need to support + * more? Need to consider not to pre-alloc too much for a compound. + */ +#define PNFS_LAYOUTSTATS_MAXDEV (4) + /* nfs4.2proc.c */ int nfs42_proc_allocate(struct file *, loff_t, loff_t); int nfs42_proc_deallocate(struct file *, loff_t, loff_t); loff_t nfs42_proc_llseek(struct file *, loff_t, int); - +int nfs42_proc_layoutstats_generic(struct nfs_server *, + struct nfs42_layoutstat_data *); /* nfs4.2xdr.h */ extern struct rpc_procinfo nfs4_2_procedures[]; diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 3a9e75235f30..f486b80f927a 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -10,6 +10,11 @@ #include <linux/nfs_fs.h> #include "nfs4_fs.h" #include "nfs42.h" +#include "iostat.h" +#include "pnfs.h" +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_PNFS static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file, fmode_t fmode) @@ -165,3 +170,85 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes); } + +static void +nfs42_layoutstat_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs42_layoutstat_data *data = calldata; + struct nfs_server *server = NFS_SERVER(data->args.inode); + + nfs41_setup_sequence(nfs4_get_session(server), &data->args.seq_args, + &data->res.seq_res, task); +} + +static void +nfs42_layoutstat_done(struct rpc_task *task, void *calldata) +{ + struct nfs42_layoutstat_data *data = calldata; + + if (!nfs4_sequence_done(task, &data->res.seq_res)) + return; + + switch (task->tk_status) { + case 0: + break; + case -ENOTSUPP: + case -EOPNOTSUPP: + NFS_SERVER(data->inode)->caps &= ~NFS_CAP_LAYOUTSTATS; + default: + dprintk("%s server returns %d\n", __func__, task->tk_status); + } +} + +static void +nfs42_layoutstat_release(void *calldata) +{ + struct nfs42_layoutstat_data *data = calldata; + struct nfs_server *nfss = NFS_SERVER(data->args.inode); + + if (nfss->pnfs_curr_ld->cleanup_layoutstats) + nfss->pnfs_curr_ld->cleanup_layoutstats(data); + + pnfs_put_layout_hdr(NFS_I(data->args.inode)->layout); + smp_mb__before_atomic(); + clear_bit(NFS_INO_LAYOUTSTATS, &NFS_I(data->args.inode)->flags); + smp_mb__after_atomic(); + nfs_iput_and_deactive(data->inode); + kfree(data->args.devinfo); + kfree(data); +} + +static const struct rpc_call_ops nfs42_layoutstat_ops = { + .rpc_call_prepare = nfs42_layoutstat_prepare, + .rpc_call_done = nfs42_layoutstat_done, + .rpc_release = nfs42_layoutstat_release, +}; + +int nfs42_proc_layoutstats_generic(struct nfs_server *server, + struct nfs42_layoutstat_data *data) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTSTATS], + .rpc_argp = &data->args, + .rpc_resp = &data->res, + }; + struct rpc_task_setup task_setup = { + .rpc_client = server->client, + .rpc_message = &msg, + .callback_ops = &nfs42_layoutstat_ops, + .callback_data = data, + .flags = RPC_TASK_ASYNC, + }; + struct rpc_task *task; + + data->inode = nfs_igrab_and_active(data->args.inode); + if (!data->inode) { + nfs42_layoutstat_release(data); + return -EAGAIN; + } + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); + task = rpc_run_task(&task_setup); + if (IS_ERR(task)) + return PTR_ERR(task); + return 0; +} diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 1a25b27248f2..a6bd27da6286 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -4,6 +4,8 @@ #ifndef __LINUX_FS_NFS_NFS4_2XDR_H #define __LINUX_FS_NFS_NFS4_2XDR_H +#include "nfs42.h" + #define encode_fallocate_maxsz (encode_stateid_maxsz + \ 2 /* offset */ + \ 2 /* length */) @@ -22,6 +24,16 @@ 1 /* whence */ + \ 2 /* offset */ + \ 2 /* length */) +#define encode_io_info_maxsz 4 +#define encode_layoutstats_maxsz (op_decode_hdr_maxsz + \ + 2 /* offset */ + \ + 2 /* length */ + \ + encode_stateid_maxsz + \ + encode_io_info_maxsz + \ + encode_io_info_maxsz + \ + 1 /* opaque devaddr4 length */ + \ + XDR_QUADLEN(PNFS_LAYOUTSTATS_MAXSIZE)) +#define decode_layoutstats_maxsz (op_decode_hdr_maxsz) #define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ @@ -45,6 +57,14 @@ #define NFS4_dec_seek_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ decode_seek_maxsz) +#define NFS4_enc_layoutstats_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + PNFS_LAYOUTSTATS_MAXDEV * encode_layoutstats_maxsz) +#define NFS4_dec_layoutstats_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + PNFS_LAYOUTSTATS_MAXDEV * decode_layoutstats_maxsz) static void encode_fallocate(struct xdr_stream *xdr, @@ -81,6 +101,33 @@ static void encode_seek(struct xdr_stream *xdr, encode_uint32(xdr, args->sa_what); } +static void encode_layoutstats(struct xdr_stream *xdr, + struct nfs42_layoutstat_args *args, + struct nfs42_layoutstat_devinfo *devinfo, + struct compound_hdr *hdr) +{ + __be32 *p; + + encode_op_hdr(xdr, OP_LAYOUTSTATS, decode_layoutstats_maxsz, hdr); + p = reserve_space(xdr, 8 + 8); + p = xdr_encode_hyper(p, devinfo->offset); + p = xdr_encode_hyper(p, devinfo->length); + encode_nfs4_stateid(xdr, &args->stateid); + p = reserve_space(xdr, 4*8 + NFS4_DEVICEID4_SIZE + 4); + p = xdr_encode_hyper(p, devinfo->read_count); + p = xdr_encode_hyper(p, devinfo->read_bytes); + p = xdr_encode_hyper(p, devinfo->write_count); + p = xdr_encode_hyper(p, devinfo->write_bytes); + p = xdr_encode_opaque_fixed(p, devinfo->dev_id.data, + NFS4_DEVICEID4_SIZE); + /* Encode layoutupdate4 */ + *p++ = cpu_to_be32(devinfo->layout_type); + if (devinfo->layoutstats_encode != NULL) + devinfo->layoutstats_encode(xdr, args, devinfo); + else + encode_uint32(xdr, 0); +} + /* * Encode ALLOCATE request */ @@ -137,6 +184,28 @@ static void nfs4_xdr_enc_seek(struct rpc_rqst *req, encode_nops(&hdr); } +/* + * Encode LAYOUTSTATS request + */ +static void nfs4_xdr_enc_layoutstats(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs42_layoutstat_args *args) +{ + int i; + + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + WARN_ON(args->num_dev > PNFS_LAYOUTSTATS_MAXDEV); + for (i = 0; i < args->num_dev; i++) + encode_layoutstats(xdr, args, &args->devinfo[i], &hdr); + encode_nops(&hdr); +} + static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) { return decode_op_hdr(xdr, OP_ALLOCATE); @@ -169,6 +238,12 @@ out_overflow: return -EIO; } +static int decode_layoutstats(struct xdr_stream *xdr, + struct nfs42_layoutstat_res *res) +{ + return decode_op_hdr(xdr, OP_LAYOUTSTATS); +} + /* * Decode ALLOCATE request */ @@ -246,4 +321,35 @@ static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp, out: return status; } + +/* + * Decode LAYOUTSTATS request + */ +static int nfs4_xdr_dec_layoutstats(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs42_layoutstat_res *res) +{ + struct compound_hdr hdr; + int status, i; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + WARN_ON(res->num_dev > PNFS_LAYOUTSTATS_MAXDEV); + for (i = 0; i < res->num_dev; i++) { + status = decode_layoutstats(xdr, res); + if (status) + goto out; + } +out: + res->rpc_status = status; + return status; +} + #endif /* __LINUX_FS_NFS_NFS4_2XDR_H */ diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index fdef424b0cd3..ea3bee919a76 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -233,6 +233,7 @@ extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *, struct rpc_message *, struct nfs4_sequence_args *, struct nfs4_sequence_res *, int); +extern void nfs4_init_sequence(struct nfs4_sequence_args *, struct nfs4_sequence_res *, int); extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index e42be52a8c18..3aa6a9ba5113 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -676,7 +676,6 @@ found: break; } - /* No matching nfs_client found. */ spin_unlock(&nn->nfs_client_lock); dprintk("NFS: <-- %s status = %d\n", __func__, status); nfs_put_client(prev); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index f58c17b3b480..dcd39d4e2efe 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -41,6 +41,10 @@ nfs4_file_open(struct inode *inode, struct file *filp) dprintk("NFS: open file(%pd2)\n", dentry); + err = nfs_check_flags(openflags); + if (err) + return err; + if ((openflags & O_ACCMODE) == 3) openflags--; diff --git a/fs/nfs/nfs4getroot.c b/fs/nfs/nfs4getroot.c index c0b3a16b4a00..039b3eb6d834 100644 --- a/fs/nfs/nfs4getroot.c +++ b/fs/nfs/nfs4getroot.c @@ -35,13 +35,6 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_p goto out; } - if (fsinfo.fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { - printk(KERN_ERR "nfs4_get_rootfh:" - " getroot obtained referral\n"); - ret = -EREMOTE; - goto out; - } - memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid)); out: nfs_free_fattr(fsinfo.fattr); diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 2e1737c40a29..535dfc69c628 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -494,12 +494,7 @@ nfs_idmap_delete(struct nfs_client *clp) int nfs_idmap_init(void) { - int ret; - ret = nfs_idmap_init_keyring(); - if (ret != 0) - goto out; -out: - return ret; + return nfs_idmap_init_keyring(); } void nfs_idmap_quit(void) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 55e1e3af23a3..6f228b5af819 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -356,6 +356,9 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_ case 0: return 0; case -NFS4ERR_OPENMODE: + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: if (inode && nfs4_have_delegation(inode, FMODE_READ)) { nfs4_inode_return_delegation(inode); exception->retry = 1; @@ -367,15 +370,6 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_ if (ret < 0) break; goto wait_on_recovery; - case -NFS4ERR_DELEG_REVOKED: - case -NFS4ERR_ADMIN_REVOKED: - case -NFS4ERR_BAD_STATEID: - if (state == NULL) - break; - ret = nfs4_schedule_stateid_recovery(server, state); - if (ret < 0) - break; - goto wait_on_recovery; case -NFS4ERR_EXPIRED: if (state != NULL) { ret = nfs4_schedule_stateid_recovery(server, state); @@ -482,8 +476,8 @@ struct nfs4_call_sync_data { struct nfs4_sequence_res *seq_res; }; -static void nfs4_init_sequence(struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, int cache_reply) +void nfs4_init_sequence(struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, int cache_reply) { args->sa_slot = NULL; args->sa_cache_this = cache_reply; @@ -1553,6 +1547,13 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod struct nfs4_state *newstate; int ret; + if ((opendata->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR || + opendata->o_arg.claim == NFS4_OPEN_CLAIM_DELEG_CUR_FH) && + (opendata->o_arg.u.delegation_type & fmode) != fmode) + /* This mode can't have been delegated, so we must have + * a valid open_stateid to cover it - not need to reclaim. + */ + return 0; opendata->o_arg.open_flags = 0; opendata->o_arg.fmode = fmode; opendata->o_arg.share_access = nfs4_map_atomic_open_share( @@ -1684,6 +1685,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct "%d.\n", __func__, err); case 0: case -ENOENT: + case -EAGAIN: case -ESTALE: break; case -NFS4ERR_BADSESSION: @@ -3355,6 +3357,8 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, goto out; case -NFS4ERR_MOVED: err = nfs4_get_referral(client, dir, name, fattr, fhandle); + if (err == -NFS4ERR_MOVED) + err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception); goto out; case -NFS4ERR_WRONGSEC: err = -EPERM; @@ -4955,49 +4959,128 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp, memcpy(bootverf->data, verf, sizeof(bootverf->data)); } -static unsigned int -nfs4_init_nonuniform_client_string(struct nfs_client *clp, - char *buf, size_t len) +static int +nfs4_init_nonuniform_client_string(struct nfs_client *clp) { - unsigned int result; + int result; + size_t len; + char *str; + bool retried = false; if (clp->cl_owner_id != NULL) - return strlcpy(buf, clp->cl_owner_id, len); + return 0; +retry: + rcu_read_lock(); + len = 10 + strlen(clp->cl_ipaddr) + 1 + + strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)) + + 1 + + strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO)) + + 1; + rcu_read_unlock(); + + if (len > NFS4_OPAQUE_LIMIT + 1) + return -EINVAL; + + /* + * Since this string is allocated at mount time, and held until the + * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying + * about a memory-reclaim deadlock. + */ + str = kmalloc(len, GFP_KERNEL); + if (!str) + return -ENOMEM; rcu_read_lock(); - result = scnprintf(buf, len, "Linux NFSv4.0 %s/%s %s", - clp->cl_ipaddr, - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR), - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_PROTO)); + result = scnprintf(str, len, "Linux NFSv4.0 %s/%s %s", + clp->cl_ipaddr, + rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR), + rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO)); rcu_read_unlock(); - clp->cl_owner_id = kstrdup(buf, GFP_KERNEL); - return result; + + /* Did something change? */ + if (result >= len) { + kfree(str); + if (retried) + return -EINVAL; + retried = true; + goto retry; + } + clp->cl_owner_id = str; + return 0; } -static unsigned int -nfs4_init_uniform_client_string(struct nfs_client *clp, - char *buf, size_t len) +static int +nfs4_init_uniquifier_client_string(struct nfs_client *clp) +{ + int result; + size_t len; + char *str; + + len = 10 + 10 + 1 + 10 + 1 + + strlen(nfs4_client_id_uniquifier) + 1 + + strlen(clp->cl_rpcclient->cl_nodename) + 1; + + if (len > NFS4_OPAQUE_LIMIT + 1) + return -EINVAL; + + /* + * Since this string is allocated at mount time, and held until the + * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying + * about a memory-reclaim deadlock. + */ + str = kmalloc(len, GFP_KERNEL); + if (!str) + return -ENOMEM; + + result = scnprintf(str, len, "Linux NFSv%u.%u %s/%s", + clp->rpc_ops->version, clp->cl_minorversion, + nfs4_client_id_uniquifier, + clp->cl_rpcclient->cl_nodename); + if (result >= len) { + kfree(str); + return -EINVAL; + } + clp->cl_owner_id = str; + return 0; +} + +static int +nfs4_init_uniform_client_string(struct nfs_client *clp) { - const char *nodename = clp->cl_rpcclient->cl_nodename; - unsigned int result; + int result; + size_t len; + char *str; if (clp->cl_owner_id != NULL) - return strlcpy(buf, clp->cl_owner_id, len); + return 0; if (nfs4_client_id_uniquifier[0] != '\0') - result = scnprintf(buf, len, "Linux NFSv%u.%u %s/%s", - clp->rpc_ops->version, - clp->cl_minorversion, - nfs4_client_id_uniquifier, - nodename); - else - result = scnprintf(buf, len, "Linux NFSv%u.%u %s", - clp->rpc_ops->version, clp->cl_minorversion, - nodename); - clp->cl_owner_id = kstrdup(buf, GFP_KERNEL); - return result; + return nfs4_init_uniquifier_client_string(clp); + + len = 10 + 10 + 1 + 10 + 1 + + strlen(clp->cl_rpcclient->cl_nodename) + 1; + + if (len > NFS4_OPAQUE_LIMIT + 1) + return -EINVAL; + + /* + * Since this string is allocated at mount time, and held until the + * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying + * about a memory-reclaim deadlock. + */ + str = kmalloc(len, GFP_KERNEL); + if (!str) + return -ENOMEM; + + result = scnprintf(str, len, "Linux NFSv%u.%u %s", + clp->rpc_ops->version, clp->cl_minorversion, + clp->cl_rpcclient->cl_nodename); + if (result >= len) { + kfree(str); + return -EINVAL; + } + clp->cl_owner_id = str; + return 0; } /* @@ -5044,7 +5127,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, struct nfs4_setclientid setclientid = { .sc_verifier = &sc_verifier, .sc_prog = program, - .sc_cb_ident = clp->cl_cb_ident, + .sc_clnt = clp, }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID], @@ -5064,16 +5147,15 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, /* nfs_client_id4 */ nfs4_init_boot_verifier(clp, &sc_verifier); + if (test_bit(NFS_CS_MIGRATION, &clp->cl_flags)) - setclientid.sc_name_len = - nfs4_init_uniform_client_string(clp, - setclientid.sc_name, - sizeof(setclientid.sc_name)); + status = nfs4_init_uniform_client_string(clp); else - setclientid.sc_name_len = - nfs4_init_nonuniform_client_string(clp, - setclientid.sc_name, - sizeof(setclientid.sc_name)); + status = nfs4_init_nonuniform_client_string(clp); + + if (status) + goto out; + /* cb_client4 */ setclientid.sc_netid_len = nfs4_init_callback_netid(clp, @@ -5083,9 +5165,9 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, sizeof(setclientid.sc_uaddr), "%s.%u.%u", clp->cl_ipaddr, port >> 8, port & 255); - dprintk("NFS call setclientid auth=%s, '%.*s'\n", + dprintk("NFS call setclientid auth=%s, '%s'\n", clp->cl_rpcclient->cl_auth->au_ops->au_name, - setclientid.sc_name_len, setclientid.sc_name); + clp->cl_owner_id); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) { status = PTR_ERR(task); @@ -5402,6 +5484,7 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, atomic_inc(&lsp->ls_count); /* Ensure we don't close file until we're done freeing locks! */ p->ctx = get_nfs_open_context(ctx); + get_file(fl->fl_file); memcpy(&p->fl, fl, sizeof(p->fl)); p->server = NFS_SERVER(inode); return p; @@ -5413,6 +5496,7 @@ static void nfs4_locku_release_calldata(void *data) nfs_free_seqid(calldata->arg.seqid); nfs4_put_lock_state(calldata->lsp); put_nfs_open_context(calldata->ctx); + fput(calldata->fl.fl_file); kfree(calldata); } @@ -6846,11 +6930,14 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, }; nfs4_init_boot_verifier(clp, &verifier); - args.id_len = nfs4_init_uniform_client_string(clp, args.id, - sizeof(args.id)); - dprintk("NFS call exchange_id auth=%s, '%.*s'\n", + + status = nfs4_init_uniform_client_string(clp); + if (status) + goto out; + + dprintk("NFS call exchange_id auth=%s, '%s'\n", clp->cl_rpcclient->cl_auth->au_ops->au_name, - args.id_len, args.id); + clp->cl_owner_id); res.server_owner = kzalloc(sizeof(struct nfs41_server_owner), GFP_NOFS); @@ -6885,7 +6972,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, /* unsupported! */ WARN_ON_ONCE(1); status = -EINVAL; - goto out_server_scope; + goto out_impl_id; } status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); @@ -6913,6 +7000,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, /* use the most recent implementation id */ kfree(clp->cl_implid); clp->cl_implid = res.impl_id; + res.impl_id = NULL; if (clp->cl_serverscope != NULL && !nfs41_same_server_scope(clp->cl_serverscope, @@ -6926,15 +7014,16 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, if (clp->cl_serverscope == NULL) { clp->cl_serverscope = res.server_scope; - goto out; + res.server_scope = NULL; } - } else - kfree(res.impl_id); + } -out_server_owner: - kfree(res.server_owner); +out_impl_id: + kfree(res.impl_id); out_server_scope: kfree(res.server_scope); +out_server_owner: + kfree(res.server_owner); out: if (clp->cl_implid != NULL) dprintk("NFS reply exchange_id: Server Implementation ID: " @@ -8061,9 +8150,8 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync) struct rpc_task *task; int status = 0; - dprintk("NFS: %4d initiating layoutcommit call. sync %d " - "lbw: %llu inode %lu\n", - data->task.tk_pid, sync, + dprintk("NFS: initiating layoutcommit call. sync %d " + "lbw: %llu inode %lu\n", sync, data->args.lastbytewritten, data->args.inode->i_ino); @@ -8557,7 +8645,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_ATOMIC_OPEN_V1 | NFS_CAP_ALLOCATE | NFS_CAP_DEALLOCATE - | NFS_CAP_SEEK, + | NFS_CAP_SEEK + | NFS_CAP_LAYOUTSTATS, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 2782cfca2265..605840dc89cf 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -309,7 +309,6 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state)) goto do_confirm; - nfs4_begin_drain_session(clp); status = nfs4_proc_exchange_id(clp, cred); if (status != 0) goto out; @@ -1482,6 +1481,8 @@ restart: spin_unlock(&state->state_lock); } nfs4_put_open_state(state); + clear_bit(NFS4CLNT_RECLAIM_NOGRACE, + &state->flags); spin_lock(&sp->so_lock); goto restart; } @@ -1830,6 +1831,7 @@ static int nfs4_establish_lease(struct nfs_client *clp) clp->cl_mvops->reboot_recovery_ops; int status; + nfs4_begin_drain_session(clp); cred = nfs4_get_clid_cred(clp); if (cred == NULL) return -ENOENT; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 0aea97841d30..558cd65dbdb7 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -139,7 +139,8 @@ static int nfs4_stat_to_errno(int); #define encode_setclientid_maxsz \ (op_encode_hdr_maxsz + \ XDR_QUADLEN(NFS4_VERIFIER_SIZE) + \ - XDR_QUADLEN(NFS4_SETCLIENTID_NAMELEN) + \ + /* client name */ \ + 1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ 1 /* sc_prog */ + \ 1 + XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \ 1 + XDR_QUADLEN(RPCBIND_MAXUADDRLEN) + \ @@ -288,7 +289,8 @@ static int nfs4_stat_to_errno(int); #define encode_exchange_id_maxsz (op_encode_hdr_maxsz + \ encode_verifier_maxsz + \ 1 /* co_ownerid.len */ + \ - XDR_QUADLEN(NFS4_EXCHANGE_ID_LEN) + \ + /* eia_clientowner */ \ + 1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ 1 /* flags */ + \ 1 /* spa_how */ + \ /* max is SP4_MACH_CRED (for now) */ + \ @@ -1667,13 +1669,14 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie encode_op_hdr(xdr, OP_SETCLIENTID, decode_setclientid_maxsz, hdr); encode_nfs4_verifier(xdr, setclientid->sc_verifier); - encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name); + encode_string(xdr, strlen(setclientid->sc_clnt->cl_owner_id), + setclientid->sc_clnt->cl_owner_id); p = reserve_space(xdr, 4); *p = cpu_to_be32(setclientid->sc_prog); encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid); encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr); p = reserve_space(xdr, 4); - *p = cpu_to_be32(setclientid->sc_cb_ident); + *p = cpu_to_be32(setclientid->sc_clnt->cl_cb_ident); } static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_setclientid_res *arg, struct compound_hdr *hdr) @@ -1747,7 +1750,8 @@ static void encode_exchange_id(struct xdr_stream *xdr, encode_op_hdr(xdr, OP_EXCHANGE_ID, decode_exchange_id_maxsz, hdr); encode_nfs4_verifier(xdr, args->verifier); - encode_string(xdr, args->id_len, args->id); + encode_string(xdr, strlen(args->client->cl_owner_id), + args->client->cl_owner_id); encode_uint32(xdr, args->flags); encode_uint32(xdr, args->state_protect.how); @@ -7427,6 +7431,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(SEEK, enc_seek, dec_seek), PROC(ALLOCATE, enc_allocate, dec_allocate), PROC(DEALLOCATE, enc_deallocate, dec_deallocate), + PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats), #endif /* CONFIG_NFS_V4_2 */ }; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 282b39369510..1da68d3b1eda 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -636,9 +636,8 @@ int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, hdr->rw_ops->rw_initiate(hdr, &msg, rpc_ops, &task_setup_data, how); - dprintk("NFS: %5u initiated pgio call " + dprintk("NFS: initiated pgio call " "(req %s/%llu, %u bytes @ offset %llu)\n", - hdr->task.tk_pid, hdr->inode->i_sb->s_id, (unsigned long long)NFS_FILEID(hdr->inode), hdr->args.count, @@ -690,8 +689,6 @@ static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, static void nfs_pgio_release(void *calldata) { struct nfs_pgio_header *hdr = calldata; - if (hdr->rw_ops->rw_release) - hdr->rw_ops->rw_release(hdr); nfs_pgio_data_destroy(hdr); hdr->completion_ops->completion(hdr); } @@ -711,7 +708,9 @@ static void nfs_pageio_mirror_init(struct nfs_pgio_mirror *mirror, * nfs_pageio_init - initialise a page io descriptor * @desc: pointer to descriptor * @inode: pointer to inode - * @doio: pointer to io function + * @pg_ops: pointer to pageio operations + * @compl_ops: pointer to pageio completion operations + * @rw_ops: pointer to nfs read/write operations * @bsize: io block size * @io_flags: extra parameters for the io function */ @@ -1186,6 +1185,7 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, * nfs_pageio_complete_mirror - Complete I/O on the current mirror of an * nfs_pageio_descriptor * @desc: pointer to io descriptor + * @mirror_idx: pointer to mirror index */ static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc, u32 mirror_idx) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 230606243be6..0ba9a02c9566 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -35,6 +35,7 @@ #include "iostat.h" #include "nfs4trace.h" #include "delegation.h" +#include "nfs42.h" #define NFSDBG_FACILITY NFSDBG_PNFS #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) @@ -1821,6 +1822,7 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr) /* Resend all requests through the MDS */ nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true, hdr->completion_ops); + set_bit(NFS_CONTEXT_RESEND_WRITES, &hdr->args.context->flags); return nfs_pageio_resend(&pgio, hdr); } EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); @@ -1865,6 +1867,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, mirror->pg_recoalesce = 1; } nfs_pgio_data_destroy(hdr); + hdr->release(hdr); } static enum pnfs_try_status @@ -1979,6 +1982,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, mirror->pg_recoalesce = 1; } nfs_pgio_data_destroy(hdr); + hdr->release(hdr); } /* @@ -2247,3 +2251,63 @@ struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) } return thp; } + +#if IS_ENABLED(CONFIG_NFS_V4_2) +int +pnfs_report_layoutstat(struct inode *inode) +{ + struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs42_layoutstat_data *data; + struct pnfs_layout_hdr *hdr; + int status = 0; + + if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats) + goto out; + + if (!nfs_server_capable(inode, NFS_CAP_LAYOUTSTATS)) + goto out; + + if (test_and_set_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags)) + goto out; + + spin_lock(&inode->i_lock); + if (!NFS_I(inode)->layout) { + spin_unlock(&inode->i_lock); + goto out; + } + hdr = NFS_I(inode)->layout; + pnfs_get_layout_hdr(hdr); + spin_unlock(&inode->i_lock); + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) { + status = -ENOMEM; + goto out_put; + } + + data->args.fh = NFS_FH(inode); + data->args.inode = inode; + nfs4_stateid_copy(&data->args.stateid, &hdr->plh_stateid); + status = ld->prepare_layoutstats(&data->args); + if (status) + goto out_free; + + status = nfs42_proc_layoutstats_generic(NFS_SERVER(inode), data); + +out: + dprintk("%s returns %d\n", __func__, status); + return status; + +out_free: + kfree(data); +out_put: + pnfs_put_layout_hdr(hdr); + smp_mb__before_atomic(); + clear_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags); + smp_mb__after_atomic(); + goto out; +} +EXPORT_SYMBOL_GPL(pnfs_report_layoutstat); +#endif diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 1e6308f82fc3..3e6ab7bfbabd 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -178,6 +178,8 @@ struct pnfs_layoutdriver_type { void (*encode_layoutcommit) (struct pnfs_layout_hdr *lo, struct xdr_stream *xdr, const struct nfs4_layoutcommit_args *args); + int (*prepare_layoutstats) (struct nfs42_layoutstat_args *args); + void (*cleanup_layoutstats) (struct nfs42_layoutstat_data *data); }; struct pnfs_layout_hdr { @@ -290,7 +292,6 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *); struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); void pnfs_error_mark_layout_for_return(struct inode *inode, struct pnfs_layout_segment *lseg); - /* nfs4_deviceid_flags */ enum { NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */ @@ -689,4 +690,14 @@ static inline void nfs4_pnfs_v3_ds_connect_unload(void) #endif /* CONFIG_NFS_V4_1 */ +#if IS_ENABLED(CONFIG_NFS_V4_2) +int pnfs_report_layoutstat(struct inode *inode); +#else +static inline int +pnfs_report_layoutstat(struct inode *inode) +{ + return 0; +} +#endif + #endif /* FS_NFS_PNFS_H */ diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e6c262555e08..65869ca9c851 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1290,6 +1290,7 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr, static void nfs_redirty_request(struct nfs_page *req) { nfs_mark_request_dirty(req); + set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); nfs_unlock_request(req); nfs_end_page_writeback(req); nfs_release_request(req); @@ -1348,11 +1349,6 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata) NFS_PROTO(data->inode)->commit_rpc_prepare(task, data); } -static void nfs_writeback_release_common(struct nfs_pgio_header *hdr) -{ - /* do nothing! */ -} - /* * Special version of should_remove_suid() that ignores capabilities. */ @@ -1556,7 +1552,7 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, /* Set up the initial task struct. */ nfs_ops->commit_setup(data, &msg); - dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); + dprintk("NFS: initiated commit call\n"); nfs4_state_protect(NFS_SERVER(data->inode)->nfs_client, NFS_SP4_MACH_CRED_COMMIT, &task_setup_data.rpc_client, &msg); @@ -2013,7 +2009,6 @@ static const struct nfs_rw_ops nfs_rw_write_ops = { .rw_mode = FMODE_WRITE, .rw_alloc_header = nfs_writehdr_alloc, .rw_free_header = nfs_writehdr_free, - .rw_release = nfs_writeback_release_common, .rw_done = nfs_writeback_done, .rw_result = nfs_writeback_result, .rw_initiate = nfs_initiate_write, diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 0ee0bed3649b..6b8b92b19cec 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -61,11 +61,6 @@ static inline void nilfs_put_page(struct page *page) page_cache_release(page); } -static inline unsigned long dir_pages(struct inode *inode) -{ - return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; -} - /* * Return the offset into page `page_nr' of the last valid * byte in that page, plus one. diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 258d9fe2521a..4a73d6dffabf 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -307,31 +307,13 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping, static ssize_t nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) { - struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct inode *inode = file->f_mapping->host; - size_t count = iov_iter_count(iter); - ssize_t size; + struct inode *inode = file_inode(iocb->ki_filp); if (iov_iter_rw(iter) == WRITE) return 0; /* Needs synchronization with the cleaner */ - size = blockdev_direct_IO(iocb, inode, iter, offset, nilfs_get_block); - - /* - * In case of error extending write may have instantiated a few - * blocks outside i_size. Trim these off again. - */ - if (unlikely(iov_iter_rw(iter) == WRITE && size < 0)) { - loff_t isize = i_size_read(inode); - loff_t end = offset + count; - - if (end > isize) - nilfs_write_failed(mapping, end); - } - - return size; + return blockdev_direct_IO(iocb, inode, iter, offset, nilfs_get_block); } const struct address_space_operations nilfs_aops = { diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 9a20e513d7eb..aba43811d6ef 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -1369,7 +1369,6 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case NILFS_IOCTL_SYNC: case NILFS_IOCTL_RESIZE: case NILFS_IOCTL_SET_ALLOC_RANGE: - case FITRIM: break; default: return -ENOIOCTLCMD; diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 2cd653670764..262561fea923 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -382,7 +382,7 @@ static ssize_t ntfs_prepare_file_for_write(struct kiocb *iocb, base_ni = ni; if (NInoAttr(ni)) base_ni = ni->ext.base_ntfs_ino; - err = file_remove_suid(file); + err = file_remove_privs(file); if (unlikely(err)) goto out; /* diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h index 76b6cfb579d7..b3c3469de6cb 100644 --- a/fs/ntfs/inode.h +++ b/fs/ntfs/inode.h @@ -239,7 +239,7 @@ typedef struct { */ static inline ntfs_inode *NTFS_I(struct inode *inode) { - return (ntfs_inode *)list_entry(inode, big_ntfs_inode, vfs_inode); + return (ntfs_inode *)container_of(inode, big_ntfs_inode, vfs_inode); } static inline struct inode *VFS_I(ntfs_inode *ni) diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 53e6c40ed4c6..3cb097ccce60 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -980,7 +980,6 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) case OCFS2_IOC_GROUP_EXTEND: case OCFS2_IOC_GROUP_ADD: case OCFS2_IOC_GROUP_ADD64: - case FITRIM: break; case OCFS2_IOC_REFLINK: if (copy_from_user(&args, argp, sizeof(args))) diff --git a/fs/open.c b/fs/open.c index e0250bdcc440..e33dab287fa0 100644 --- a/fs/open.c +++ b/fs/open.c @@ -51,8 +51,10 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, newattrs.ia_valid |= ATTR_FILE; } - /* Remove suid/sgid on truncate too */ - ret = should_remove_suid(dentry); + /* Remove suid, sgid, and file capabilities on truncate too */ + ret = dentry_needs_remove_privs(dentry); + if (ret < 0) + return ret; if (ret) newattrs.ia_valid |= ret | ATTR_FORCE; @@ -678,18 +680,18 @@ int open_check_o_direct(struct file *f) } static int do_dentry_open(struct file *f, + struct inode *inode, int (*open)(struct inode *, struct file *), const struct cred *cred) { static const struct file_operations empty_fops = {}; - struct inode *inode; int error; f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; path_get(&f->f_path); - inode = f->f_inode = f->f_path.dentry->d_inode; + f->f_inode = inode; f->f_mapping = inode->i_mapping; if (unlikely(f->f_flags & O_PATH)) { @@ -793,7 +795,8 @@ int finish_open(struct file *file, struct dentry *dentry, BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ file->f_path.dentry = dentry; - error = do_dentry_open(file, open, current_cred()); + error = do_dentry_open(file, d_backing_inode(dentry), open, + current_cred()); if (!error) *opened |= FILE_OPENED; @@ -822,6 +825,34 @@ int finish_no_open(struct file *file, struct dentry *dentry) } EXPORT_SYMBOL(finish_no_open); +char *file_path(struct file *filp, char *buf, int buflen) +{ + return d_path(&filp->f_path, buf, buflen); +} +EXPORT_SYMBOL(file_path); + +/** + * vfs_open - open the file at the given path + * @path: path to open + * @file: newly allocated file with f_flag initialized + * @cred: credentials to use + */ +int vfs_open(const struct path *path, struct file *file, + const struct cred *cred) +{ + struct dentry *dentry = path->dentry; + struct inode *inode = dentry->d_inode; + + file->f_path = *path; + if (dentry->d_flags & DCACHE_OP_SELECT_INODE) { + inode = dentry->d_op->d_select_inode(dentry, file->f_flags); + if (IS_ERR(inode)) + return PTR_ERR(inode); + } + + return do_dentry_open(file, inode, NULL, cred); +} + struct file *dentry_open(const struct path *path, int flags, const struct cred *cred) { @@ -853,26 +884,6 @@ struct file *dentry_open(const struct path *path, int flags, } EXPORT_SYMBOL(dentry_open); -/** - * vfs_open - open the file at the given path - * @path: path to open - * @filp: newly allocated file with f_flag initialized - * @cred: credentials to use - */ -int vfs_open(const struct path *path, struct file *filp, - const struct cred *cred) -{ - struct inode *inode = path->dentry->d_inode; - - if (inode->i_op->dentry_open) - return inode->i_op->dentry_open(path->dentry, filp, cred); - else { - filp->f_path = *path; - return do_dentry_open(filp, NULL, cred); - } -} -EXPORT_SYMBOL(vfs_open); - static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) { int lookup_flags = 0; diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 308379b2d0b2..d9da5a4e9382 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -337,37 +337,33 @@ static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type, return true; } -static int ovl_dentry_open(struct dentry *dentry, struct file *file, - const struct cred *cred) +struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags) { int err; struct path realpath; enum ovl_path_type type; - bool want_write = false; + + if (d_is_dir(dentry)) + return d_backing_inode(dentry); type = ovl_path_real(dentry, &realpath); - if (ovl_open_need_copy_up(file->f_flags, type, realpath.dentry)) { - want_write = true; + if (ovl_open_need_copy_up(file_flags, type, realpath.dentry)) { err = ovl_want_write(dentry); if (err) - goto out; + return ERR_PTR(err); - if (file->f_flags & O_TRUNC) + if (file_flags & O_TRUNC) err = ovl_copy_up_last(dentry, NULL, true); else err = ovl_copy_up(dentry); + ovl_drop_write(dentry); if (err) - goto out_drop_write; + return ERR_PTR(err); ovl_path_upper(dentry, &realpath); } - err = vfs_open(&realpath, file, cred); -out_drop_write: - if (want_write) - ovl_drop_write(dentry); -out: - return err; + return d_backing_inode(realpath.dentry); } static const struct inode_operations ovl_file_inode_operations = { @@ -378,7 +374,6 @@ static const struct inode_operations ovl_file_inode_operations = { .getxattr = ovl_getxattr, .listxattr = ovl_listxattr, .removexattr = ovl_removexattr, - .dentry_open = ovl_dentry_open, }; static const struct inode_operations ovl_symlink_inode_operations = { diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 17ac5afc9ffb..ea5a40b06e3a 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -173,6 +173,7 @@ ssize_t ovl_getxattr(struct dentry *dentry, const char *name, void *value, size_t size); ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); int ovl_removexattr(struct dentry *dentry, const char *name); +struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags); struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, struct ovl_entry *oe); diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 907870e81a72..70e9af551600 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -23,6 +23,7 @@ struct ovl_cache_entry { u64 ino; struct list_head l_node; struct rb_node node; + struct ovl_cache_entry *next_maybe_whiteout; bool is_whiteout; char name[]; }; @@ -39,7 +40,7 @@ struct ovl_readdir_data { struct rb_root root; struct list_head *list; struct list_head middle; - struct dentry *dir; + struct ovl_cache_entry *first_maybe_whiteout; int count; int err; }; @@ -79,7 +80,7 @@ static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, return NULL; } -static struct ovl_cache_entry *ovl_cache_entry_new(struct dentry *dir, +static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd, const char *name, int len, u64 ino, unsigned int d_type) { @@ -98,29 +99,8 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct dentry *dir, p->is_whiteout = false; if (d_type == DT_CHR) { - struct dentry *dentry; - const struct cred *old_cred; - struct cred *override_cred; - - override_cred = prepare_creds(); - if (!override_cred) { - kfree(p); - return NULL; - } - - /* - * CAP_DAC_OVERRIDE for lookup - */ - cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); - old_cred = override_creds(override_cred); - - dentry = lookup_one_len(name, dir, len); - if (!IS_ERR(dentry)) { - p->is_whiteout = ovl_is_whiteout(dentry); - dput(dentry); - } - revert_creds(old_cred); - put_cred(override_cred); + p->next_maybe_whiteout = rdd->first_maybe_whiteout; + rdd->first_maybe_whiteout = p; } return p; } @@ -148,7 +128,7 @@ static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, return 0; } - p = ovl_cache_entry_new(rdd->dir, name, len, ino, d_type); + p = ovl_cache_entry_new(rdd, name, len, ino, d_type); if (p == NULL) return -ENOMEM; @@ -169,7 +149,7 @@ static int ovl_fill_lower(struct ovl_readdir_data *rdd, if (p) { list_move_tail(&p->l_node, &rdd->middle); } else { - p = ovl_cache_entry_new(rdd->dir, name, namelen, ino, d_type); + p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type); if (p == NULL) rdd->err = -ENOMEM; else @@ -219,6 +199,43 @@ static int ovl_fill_merge(struct dir_context *ctx, const char *name, return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type); } +static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd) +{ + int err; + struct ovl_cache_entry *p; + struct dentry *dentry; + const struct cred *old_cred; + struct cred *override_cred; + + override_cred = prepare_creds(); + if (!override_cred) + return -ENOMEM; + + /* + * CAP_DAC_OVERRIDE for lookup + */ + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); + old_cred = override_creds(override_cred); + + err = mutex_lock_killable(&dir->d_inode->i_mutex); + if (!err) { + while (rdd->first_maybe_whiteout) { + p = rdd->first_maybe_whiteout; + rdd->first_maybe_whiteout = p->next_maybe_whiteout; + dentry = lookup_one_len(p->name, dir, p->len); + if (!IS_ERR(dentry)) { + p->is_whiteout = ovl_is_whiteout(dentry); + dput(dentry); + } + } + mutex_unlock(&dir->d_inode->i_mutex); + } + revert_creds(old_cred); + put_cred(override_cred); + + return err; +} + static inline int ovl_dir_read(struct path *realpath, struct ovl_readdir_data *rdd) { @@ -229,7 +246,7 @@ static inline int ovl_dir_read(struct path *realpath, if (IS_ERR(realfile)) return PTR_ERR(realfile); - rdd->dir = realpath->dentry; + rdd->first_maybe_whiteout = NULL; rdd->ctx.pos = 0; do { rdd->count = 0; @@ -238,6 +255,10 @@ static inline int ovl_dir_read(struct path *realpath, if (err >= 0) err = rdd->err; } while (!err && rdd->count); + + if (!err && rdd->first_maybe_whiteout) + err = ovl_check_whiteouts(realpath->dentry, rdd); + fput(realfile); return err; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index bf8537c7f455..7466ff339c66 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -273,8 +273,56 @@ static void ovl_dentry_release(struct dentry *dentry) } } +static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) +{ + struct ovl_entry *oe = dentry->d_fsdata; + unsigned int i; + int ret = 1; + + for (i = 0; i < oe->numlower; i++) { + struct dentry *d = oe->lowerstack[i].dentry; + + if (d->d_flags & DCACHE_OP_REVALIDATE) { + ret = d->d_op->d_revalidate(d, flags); + if (ret < 0) + return ret; + if (!ret) { + if (!(flags & LOOKUP_RCU)) + d_invalidate(d); + return -ESTALE; + } + } + } + return 1; +} + +static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags) +{ + struct ovl_entry *oe = dentry->d_fsdata; + unsigned int i; + int ret = 1; + + for (i = 0; i < oe->numlower; i++) { + struct dentry *d = oe->lowerstack[i].dentry; + + if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) { + ret = d->d_op->d_weak_revalidate(d, flags); + if (ret <= 0) + break; + } + } + return ret; +} + static const struct dentry_operations ovl_dentry_operations = { .d_release = ovl_dentry_release, + .d_select_inode = ovl_d_select_inode, +}; + +static const struct dentry_operations ovl_reval_dentry_operations = { + .d_release = ovl_dentry_release, + .d_revalidate = ovl_dentry_revalidate, + .d_weak_revalidate = ovl_dentry_weak_revalidate, }; static struct ovl_entry *ovl_alloc_entry(unsigned int numlower) @@ -288,6 +336,20 @@ static struct ovl_entry *ovl_alloc_entry(unsigned int numlower) return oe; } +static bool ovl_dentry_remote(struct dentry *dentry) +{ + return dentry->d_flags & + (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE); +} + +static bool ovl_dentry_weird(struct dentry *dentry) +{ + return dentry->d_flags & (DCACHE_NEED_AUTOMOUNT | + DCACHE_MANAGE_TRANSIT | + DCACHE_OP_HASH | + DCACHE_OP_COMPARE); +} + static inline struct dentry *ovl_lookup_real(struct dentry *dir, struct qstr *name) { @@ -303,6 +365,10 @@ static inline struct dentry *ovl_lookup_real(struct dentry *dir, } else if (!dentry->d_inode) { dput(dentry); dentry = NULL; + } else if (ovl_dentry_weird(dentry)) { + dput(dentry); + /* Don't support traversing automounts and other weirdness */ + dentry = ERR_PTR(-EREMOTE); } return dentry; } @@ -350,6 +416,11 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, goto out; if (this) { + if (unlikely(ovl_dentry_remote(this))) { + dput(this); + err = -EREMOTE; + goto out; + } if (ovl_is_whiteout(this)) { dput(this); this = NULL; @@ -694,25 +765,6 @@ static void ovl_unescape(char *s) } } -static bool ovl_is_allowed_fs_type(struct dentry *root) -{ - const struct dentry_operations *dop = root->d_op; - - /* - * We don't support: - * - automount filesystems - * - filesystems with revalidate (FIXME for lower layer) - * - filesystems with case insensitive names - */ - if (dop && - (dop->d_manage || dop->d_automount || - dop->d_revalidate || dop->d_weak_revalidate || - dop->d_compare || dop->d_hash)) { - return false; - } - return true; -} - static int ovl_mount_dir_noesc(const char *name, struct path *path) { int err = -EINVAL; @@ -727,7 +779,7 @@ static int ovl_mount_dir_noesc(const char *name, struct path *path) goto out; } err = -EINVAL; - if (!ovl_is_allowed_fs_type(path->dentry)) { + if (ovl_dentry_weird(path->dentry)) { pr_err("overlayfs: filesystem on '%s' not supported\n", name); goto out_put; } @@ -751,13 +803,21 @@ static int ovl_mount_dir(const char *name, struct path *path) if (tmp) { ovl_unescape(tmp); err = ovl_mount_dir_noesc(tmp, path); + + if (!err) + if (ovl_dentry_remote(path->dentry)) { + pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n", + tmp); + path_put(path); + err = -EINVAL; + } kfree(tmp); } return err; } static int ovl_lower_dir(const char *name, struct path *path, long *namelen, - int *stack_depth) + int *stack_depth, bool *remote) { int err; struct kstatfs statfs; @@ -774,6 +834,9 @@ static int ovl_lower_dir(const char *name, struct path *path, long *namelen, *namelen = max(*namelen, statfs.f_namelen); *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth); + if (ovl_dentry_remote(path->dentry)) + *remote = true; + return 0; out_put: @@ -827,6 +890,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) unsigned int numlower; unsigned int stacklen = 0; unsigned int i; + bool remote = false; int err; err = -ENOMEM; @@ -900,7 +964,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) lower = lowertmp; for (numlower = 0; numlower < stacklen; numlower++) { err = ovl_lower_dir(lower, &stack[numlower], - &ufs->lower_namelen, &sb->s_stack_depth); + &ufs->lower_namelen, &sb->s_stack_depth, + &remote); if (err) goto out_put_lowerpath; @@ -958,7 +1023,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (!ufs->upper_mnt) sb->s_flags |= MS_RDONLY; - sb->s_d_op = &ovl_dentry_operations; + if (remote) + sb->s_d_op = &ovl_reval_dentry_operations; + else + sb->s_d_op = &ovl_dentry_operations; err = -ENOMEM; oe = ovl_alloc_entry(numlower); diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 84bb65b83570..4fb17ded7d47 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -547,51 +547,45 @@ posix_acl_create(struct inode *dir, umode_t *mode, struct posix_acl **default_acl, struct posix_acl **acl) { struct posix_acl *p; + struct posix_acl *clone; int ret; + *acl = NULL; + *default_acl = NULL; + if (S_ISLNK(*mode) || !IS_POSIXACL(dir)) - goto no_acl; + return 0; p = get_acl(dir, ACL_TYPE_DEFAULT); - if (IS_ERR(p)) { - if (p == ERR_PTR(-EOPNOTSUPP)) - goto apply_umask; - return PTR_ERR(p); + if (!p || p == ERR_PTR(-EOPNOTSUPP)) { + *mode &= ~current_umask(); + return 0; } + if (IS_ERR(p)) + return PTR_ERR(p); - if (!p) - goto apply_umask; - - *acl = posix_acl_clone(p, GFP_NOFS); - if (!*acl) + clone = posix_acl_clone(p, GFP_NOFS); + if (!clone) goto no_mem; - ret = posix_acl_create_masq(*acl, mode); + ret = posix_acl_create_masq(clone, mode); if (ret < 0) goto no_mem_clone; - if (ret == 0) { - posix_acl_release(*acl); - *acl = NULL; - } + if (ret == 0) + posix_acl_release(clone); + else + *acl = clone; - if (!S_ISDIR(*mode)) { + if (!S_ISDIR(*mode)) posix_acl_release(p); - *default_acl = NULL; - } else { + else *default_acl = p; - } - return 0; -apply_umask: - *mode &= ~current_umask(); -no_acl: - *default_acl = NULL; - *acl = NULL; return 0; no_mem_clone: - posix_acl_release(*acl); + posix_acl_release(clone); no_mem: posix_acl_release(p); return -ENOMEM; diff --git a/fs/proc/base.c b/fs/proc/base.c index 1d540b3f226f..87782e874b6a 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -491,14 +491,17 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, } #endif -#ifdef CONFIG_SCHEDSTATS +#ifdef CONFIG_SCHED_INFO /* * Provides /proc/PID/schedstat */ static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { - seq_printf(m, "%llu %llu %lu\n", + if (unlikely(!sched_info_on())) + seq_printf(m, "0 0 0\n"); + else + seq_printf(m, "%llu %llu %lu\n", (unsigned long long)task->se.sum_exec_runtime, (unsigned long long)task->sched_info.run_delay, task->sched_info.pcount); @@ -2787,7 +2790,7 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_STACKTRACE ONE("stack", S_IRUSR, proc_pid_stack), #endif -#ifdef CONFIG_SCHEDSTATS +#ifdef CONFIG_SCHED_INFO ONE("schedstat", S_IRUGO, proc_pid_schedstat), #endif #ifdef CONFIG_LATENCYTOP @@ -3135,7 +3138,7 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_STACKTRACE ONE("stack", S_IRUSR, proc_pid_stack), #endif -#ifdef CONFIG_SCHEDSTATS +#ifdef CONFIG_SCHED_INFO ONE("schedstat", S_IRUGO, proc_pid_schedstat), #endif #ifdef CONFIG_LATENCYTOP diff --git a/fs/proc/generic.c b/fs/proc/generic.c index df6327a2b865..e5dee5c3188e 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -373,6 +373,10 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, WARN(1, "create '/proc/%s' by hand\n", qstr.name); return NULL; } + if (is_empty_pde(*parent)) { + WARN(1, "attempt to add to permanently empty directory"); + return NULL; + } ent = kzalloc(sizeof(struct proc_dir_entry) + qstr.len + 1, GFP_KERNEL); if (!ent) @@ -455,6 +459,25 @@ struct proc_dir_entry *proc_mkdir(const char *name, } EXPORT_SYMBOL(proc_mkdir); +struct proc_dir_entry *proc_create_mount_point(const char *name) +{ + umode_t mode = S_IFDIR | S_IRUGO | S_IXUGO; + struct proc_dir_entry *ent, *parent = NULL; + + ent = __proc_create(&parent, name, mode, 2); + if (ent) { + ent->data = NULL; + ent->proc_fops = NULL; + ent->proc_iops = NULL; + if (proc_register(parent, ent) < 0) { + kfree(ent); + parent->nlink--; + ent = NULL; + } + } + return ent; +} + struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, struct proc_dir_entry *parent, const struct file_operations *proc_fops, diff --git a/fs/proc/inode.c b/fs/proc/inode.c index afe232b9df6e..bd95b9fdebb0 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -422,6 +422,10 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; PROC_I(inode)->pde = de; + if (is_empty_pde(de)) { + make_empty_dir_inode(inode); + return inode; + } if (de->mode) { inode->i_mode = de->mode; inode->i_uid = de->uid; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index c835b94c0cd3..aa2781095bd1 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -191,6 +191,12 @@ static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) } extern void pde_put(struct proc_dir_entry *); +static inline bool is_empty_pde(const struct proc_dir_entry *pde) +{ + return S_ISDIR(pde->mode) && !pde->proc_iops; +} +struct proc_dir_entry *proc_create_mount_point(const char *name); + /* * inode.c */ diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index d4a35746cab9..f8595e8b5cd0 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -64,7 +64,7 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region) if (file) { seq_pad(m, ' '); - seq_path(m, &file->f_path, ""); + seq_file_path(m, file, ""); } seq_putc(m, '\n'); diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index fea2561d773b..fdda62e6115e 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -19,6 +19,28 @@ static const struct inode_operations proc_sys_inode_operations; static const struct file_operations proc_sys_dir_file_operations; static const struct inode_operations proc_sys_dir_operations; +/* Support for permanently empty directories */ + +struct ctl_table sysctl_mount_point[] = { + { } +}; + +static bool is_empty_dir(struct ctl_table_header *head) +{ + return head->ctl_table[0].child == sysctl_mount_point; +} + +static void set_empty_dir(struct ctl_dir *dir) +{ + dir->header.ctl_table[0].child = sysctl_mount_point; +} + +static void clear_empty_dir(struct ctl_dir *dir) + +{ + dir->header.ctl_table[0].child = NULL; +} + void proc_sys_poll_notify(struct ctl_table_poll *poll) { if (!poll) @@ -187,6 +209,17 @@ static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) struct ctl_table *entry; int err; + /* Is this a permanently empty directory? */ + if (is_empty_dir(&dir->header)) + return -EROFS; + + /* Am I creating a permanently empty directory? */ + if (header->ctl_table == sysctl_mount_point) { + if (!RB_EMPTY_ROOT(&dir->root)) + return -EINVAL; + set_empty_dir(dir); + } + dir->header.nreg++; header->parent = dir; err = insert_links(header); @@ -202,6 +235,8 @@ fail: erase_header(header); put_links(header); fail_links: + if (header->ctl_table == sysctl_mount_point) + clear_empty_dir(dir); header->parent = NULL; drop_sysctl_table(&dir->header); return err; @@ -419,6 +454,8 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, inode->i_mode |= S_IFDIR; inode->i_op = &proc_sys_dir_operations; inode->i_fop = &proc_sys_dir_file_operations; + if (is_empty_dir(head)) + make_empty_dir_inode(inode); } out: return inode; diff --git a/fs/proc/root.c b/fs/proc/root.c index b7fa4bfe896a..68feb0f70e63 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -112,9 +112,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, ns = task_active_pid_ns(current); options = data; - if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type)) - return ERR_PTR(-EPERM); - /* Does the mounter have privilege over the pid namespace? */ if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); @@ -159,7 +156,7 @@ static struct file_system_type proc_fs_type = { .name = "proc", .mount = proc_mount, .kill_sb = proc_kill_sb, - .fs_flags = FS_USERNS_MOUNT, + .fs_flags = FS_USERNS_VISIBLE | FS_USERNS_MOUNT, }; void __init proc_root_init(void) @@ -182,10 +179,10 @@ void __init proc_root_init(void) #endif proc_mkdir("fs", NULL); proc_mkdir("driver", NULL); - proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */ + proc_create_mount_point("fs/nfsd"); /* somewhere for the nfsd filesystem to be mounted */ #if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE) /* just give it a mountpoint */ - proc_mkdir("openprom", NULL); + proc_create_mount_point("openprom"); #endif proc_tty_init(); proc_mkdir("bus", NULL); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 6dee68d013ff..ca1e091881d4 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -310,7 +310,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) */ if (file) { seq_pad(m, ' '); - seq_path(m, &file->f_path, "\n"); + seq_file_path(m, file, "\n"); goto done; } @@ -1509,7 +1509,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) if (file) { seq_puts(m, " file="); - seq_path(m, &file->f_path, "\n\t= "); + seq_file_path(m, file, "\n\t= "); } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { seq_puts(m, " heap"); } else { diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 599ec2e20104..e0d64c92e4f6 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -180,7 +180,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, if (file) { seq_pad(m, ' '); - seq_path(m, &file->f_path, ""); + seq_file_path(m, file, ""); } else if (mm) { pid_t tid = pid_of_stack(priv, vma, is_pid); diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index dc43b5f29305..3adcc4669fac 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -461,22 +461,18 @@ static struct file_system_type pstore_fs_type = { .kill_sb = pstore_kill_sb, }; -static struct kobject *pstore_kobj; - static int __init init_pstore_fs(void) { - int err = 0; + int err; /* Create a convenient mount point for people to access pstore */ - pstore_kobj = kobject_create_and_add("pstore", fs_kobj); - if (!pstore_kobj) { - err = -ENOMEM; + err = sysfs_create_mount_point(fs_kobj, "pstore"); + if (err) goto out; - } err = register_filesystem(&pstore_fs_type); if (err < 0) - kobject_put(pstore_kobj); + sysfs_remove_mount_point(fs_kobj, "pstore"); out: return err; diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c index 8d64bb5366bf..e1f37278cf97 100644 --- a/fs/qnx6/dir.c +++ b/fs/qnx6/dir.c @@ -32,11 +32,6 @@ static struct page *qnx6_get_page(struct inode *dir, unsigned long n) return page; } -static inline unsigned long dir_pages(struct inode *inode) -{ - return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; -} - static unsigned last_entry(struct inode *inode, unsigned long page_nr) { unsigned long last_byte = inode->i_size; diff --git a/fs/seq_file.c b/fs/seq_file.c index 1d9c1cbd4d0b..ce9e39fd5daf 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -490,6 +490,20 @@ int seq_path(struct seq_file *m, const struct path *path, const char *esc) } EXPORT_SYMBOL(seq_path); +/** + * seq_file_path - seq_file interface to print a pathname of a file + * @m: the seq_file handle + * @file: the struct file to print + * @esc: set of characters to escape in the output + * + * return the absolute path to the file. + */ +int seq_file_path(struct seq_file *m, struct file *file, const char *esc) +{ + return seq_path(m, &file->f_path, esc); +} +EXPORT_SYMBOL(seq_file_path); + /* * Same as seq_path, but relative to supplied root. */ diff --git a/fs/squashfs/squashfs_fs_i.h b/fs/squashfs/squashfs_fs_i.h index 73588e7700ed..d09fcd6fb85d 100644 --- a/fs/squashfs/squashfs_fs_i.h +++ b/fs/squashfs/squashfs_fs_i.h @@ -49,6 +49,6 @@ struct squashfs_inode_info { static inline struct squashfs_inode_info *squashfs_i(struct inode *inode) { - return list_entry(inode, struct squashfs_inode_info, vfs_inode); + return container_of(inode, struct squashfs_inode_info, vfs_inode); } #endif diff --git a/fs/super.c b/fs/super.c index 928c20f47af9..b61372354f2b 100644 --- a/fs/super.c +++ b/fs/super.c @@ -842,7 +842,7 @@ int get_anon_bdev(dev_t *p) else if (error) return -EAGAIN; - if (dev == (1 << MINORBITS)) { + if (dev >= (1 << MINORBITS)) { spin_lock(&unnamed_dev_lock); ida_remove(&unnamed_dev_ida, dev); if (unnamed_dev_start > dev) diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 0b45ff42f374..94374e435025 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -121,3 +121,37 @@ int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, return kernfs_rename_ns(kn, new_parent, kn->name, new_ns); } + +/** + * sysfs_create_mount_point - create an always empty directory + * @parent_kobj: kobject that will contain this always empty directory + * @name: The name of the always empty directory to add + */ +int sysfs_create_mount_point(struct kobject *parent_kobj, const char *name) +{ + struct kernfs_node *kn, *parent = parent_kobj->sd; + + kn = kernfs_create_empty_dir(parent, name); + if (IS_ERR(kn)) { + if (PTR_ERR(kn) == -EEXIST) + sysfs_warn_dup(parent, name); + return PTR_ERR(kn); + } + + return 0; +} +EXPORT_SYMBOL_GPL(sysfs_create_mount_point); + +/** + * sysfs_remove_mount_point - remove an always empty directory. + * @parent_kobj: kobject that will contain this always empty directory + * @name: The name of the always empty directory to remove + * + */ +void sysfs_remove_mount_point(struct kobject *parent_kobj, const char *name) +{ + struct kernfs_node *parent = parent_kobj->sd; + + kernfs_remove_by_name_ns(parent, name, NULL); +} +EXPORT_SYMBOL_GPL(sysfs_remove_mount_point); diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 8a49486bf30c..1c6ac6fcee9f 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -31,9 +31,6 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, bool new_sb; if (!(flags & MS_KERNMOUNT)) { - if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type)) - return ERR_PTR(-EPERM); - if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET)) return ERR_PTR(-EPERM); } @@ -58,7 +55,7 @@ static struct file_system_type sysfs_fs_type = { .name = "sysfs", .mount = sysfs_mount, .kill_sb = sysfs_kill_sb, - .fs_flags = FS_USERNS_MOUNT, + .fs_flags = FS_USERNS_VISIBLE | FS_USERNS_MOUNT, }; int __init sysfs_init(void) diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c index 8f3555f00c54..63c1bcb224ee 100644 --- a/fs/sysv/dir.c +++ b/fs/sysv/dir.c @@ -33,11 +33,6 @@ static inline void dir_put_page(struct page *page) page_cache_release(page); } -static inline unsigned long dir_pages(struct inode *inode) -{ - return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; -} - static int dir_commit_chunk(struct page *page, loff_t pos, unsigned len) { struct address_space *mapping = page->mapping; diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h index 2c13525131cd..6c212288adcb 100644 --- a/fs/sysv/sysv.h +++ b/fs/sysv/sysv.h @@ -73,7 +73,7 @@ struct sysv_inode_info { static inline struct sysv_inode_info *SYSV_I(struct inode *inode) { - return list_entry(inode, struct sysv_inode_info, vfs_inode); + return container_of(inode, struct sysv_inode_info, vfs_inode); } static inline struct sysv_sb_info *SYSV_SB(struct super_block *sb) diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index d92bdf3b079a..cbc8d5d2755a 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -496,16 +496,11 @@ struct dentry *tracefs_create_instance_dir(const char *name, struct dentry *pare return dentry; } -static inline int tracefs_positive(struct dentry *dentry) -{ - return dentry->d_inode && !d_unhashed(dentry); -} - static int __tracefs_remove(struct dentry *dentry, struct dentry *parent) { int ret = 0; - if (tracefs_positive(dentry)) { + if (simple_positive(dentry)) { if (dentry->d_inode) { dget(dentry); switch (dentry->d_inode->i_mode & S_IFMT) { @@ -582,7 +577,7 @@ void tracefs_remove_recursive(struct dentry *dentry) */ spin_lock(&parent->d_lock); list_for_each_entry(child, &parent->d_subdirs, d_child) { - if (!tracefs_positive(child)) + if (!simple_positive(child)) continue; /* perhaps simple_empty(child) makes more sense */ @@ -603,7 +598,7 @@ void tracefs_remove_recursive(struct dentry *dentry) * from d_subdirs. When releasing the parent->d_lock we can * no longer trust that the next pointer is valid. * Restart the loop. We'll skip this one with the - * tracefs_positive() check. + * simple_positive() check. */ goto loop; } @@ -631,14 +626,12 @@ bool tracefs_initialized(void) return tracefs_registered; } -static struct kobject *trace_kobj; - static int __init tracefs_init(void) { int retval; - trace_kobj = kobject_create_and_add("tracing", kernel_kobj); - if (!trace_kobj) + retval = sysfs_create_mount_point(kernel_kobj, "tracing"); + if (retval) return -EINVAL; retval = register_filesystem(&trace_fs_type); diff --git a/fs/udf/udf_i.h b/fs/udf/udf_i.h index b5cd8ed2aa12..b1b9a63d8cf3 100644 --- a/fs/udf/udf_i.h +++ b/fs/udf/udf_i.h @@ -56,7 +56,7 @@ struct udf_inode_info { static inline struct udf_inode_info *UDF_I(struct inode *inode) { - return list_entry(inode, struct udf_inode_info, vfs_inode); + return container_of(inode, struct udf_inode_info, vfs_inode); } #endif /* _UDF_I_H) */ diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 2c1036080d52..a7106eda5024 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -51,8 +51,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) if (ufs_fragnum(fragment) + count > uspi->s_fpg) ufs_error (sb, "ufs_free_fragments", "internal error"); - - lock_ufs(sb); + + mutex_lock(&UFS_SB(sb)->s_lock); cgno = ufs_dtog(uspi, fragment); bit = ufs_dtogd(uspi, fragment); @@ -115,13 +115,13 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) if (sb->s_flags & MS_SYNCHRONOUS) ubh_sync_block(UCPI_UBH(ucpi)); ufs_mark_sb_dirty(sb); - - unlock_ufs(sb); + + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT\n"); return; failed: - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT (FAILED)\n"); return; } @@ -151,7 +151,7 @@ void ufs_free_blocks(struct inode *inode, u64 fragment, unsigned count) goto failed; } - lock_ufs(sb); + mutex_lock(&UFS_SB(sb)->s_lock); do_more: overflow = 0; @@ -211,12 +211,12 @@ do_more: } ufs_mark_sb_dirty(sb); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT\n"); return; failed_unlock: - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); failed: UFSD("EXIT (FAILED)\n"); return; @@ -357,7 +357,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, usb1 = ubh_get_usb_first(uspi); *err = -ENOSPC; - lock_ufs(sb); + mutex_lock(&UFS_SB(sb)->s_lock); tmp = ufs_data_ptr_to_cpu(sb, p); if (count + ufs_fragnum(fragment) > uspi->s_fpb) { @@ -378,19 +378,19 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, "fragment %llu, tmp %llu\n", (unsigned long long)fragment, (unsigned long long)tmp); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); return INVBLOCK; } if (fragment < UFS_I(inode)->i_lastfrag) { UFSD("EXIT (ALREADY ALLOCATED)\n"); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); return 0; } } else { if (tmp) { UFSD("EXIT (ALREADY ALLOCATED)\n"); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); return 0; } } @@ -399,7 +399,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, * There is not enough space for user on the device */ if (!capable(CAP_SYS_RESOURCE) && ufs_freespace(uspi, UFS_MINFREE) <= 0) { - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT (FAILED)\n"); return 0; } @@ -424,7 +424,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, ufs_clear_frags(inode, result + oldcount, newcount - oldcount, locked_page != NULL); } - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT, result %llu\n", (unsigned long long)result); return result; } @@ -439,7 +439,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, fragment + count); ufs_clear_frags(inode, result + oldcount, newcount - oldcount, locked_page != NULL); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT, result %llu\n", (unsigned long long)result); return result; } @@ -477,7 +477,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, *err = 0; UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag, fragment + count); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); if (newcount < request) ufs_free_fragments (inode, result + newcount, request - newcount); ufs_free_fragments (inode, tmp, oldcount); @@ -485,7 +485,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, return result; } - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT (FAILED)\n"); return 0; } diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index 1bfe8cabff0f..74f2e80288bf 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -65,11 +65,6 @@ static inline void ufs_put_page(struct page *page) page_cache_release(page); } -static inline unsigned long ufs_dir_pages(struct inode *inode) -{ - return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; -} - ino_t ufs_inode_by_name(struct inode *dir, const struct qstr *qstr) { ino_t res = 0; @@ -87,7 +82,8 @@ ino_t ufs_inode_by_name(struct inode *dir, const struct qstr *qstr) /* Releases the page */ void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de, - struct page *page, struct inode *inode) + struct page *page, struct inode *inode, + bool update_times) { loff_t pos = page_offset(page) + (char *) de - (char *) page_address(page); @@ -103,7 +99,8 @@ void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de, err = ufs_commit_chunk(page, pos, len); ufs_put_page(page); - dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; + if (update_times) + dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(dir); } @@ -256,7 +253,7 @@ struct ufs_dir_entry *ufs_find_entry(struct inode *dir, const struct qstr *qstr, int namelen = qstr->len; unsigned reclen = UFS_DIR_REC_LEN(namelen); unsigned long start, n; - unsigned long npages = ufs_dir_pages(dir); + unsigned long npages = dir_pages(dir); struct page *page = NULL; struct ufs_inode_info *ui = UFS_I(dir); struct ufs_dir_entry *de; @@ -320,7 +317,7 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode) unsigned short rec_len, name_len; struct page *page = NULL; struct ufs_dir_entry *de; - unsigned long npages = ufs_dir_pages(dir); + unsigned long npages = dir_pages(dir); unsigned long n; char *kaddr; loff_t pos; @@ -437,7 +434,7 @@ ufs_readdir(struct file *file, struct dir_context *ctx) struct super_block *sb = inode->i_sb; unsigned int offset = pos & ~PAGE_CACHE_MASK; unsigned long n = pos >> PAGE_CACHE_SHIFT; - unsigned long npages = ufs_dir_pages(inode); + unsigned long npages = dir_pages(inode); unsigned chunk_mask = ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1); int need_revalidate = file->f_version != inode->i_version; unsigned flags = UFS_SB(sb)->s_flags; @@ -608,7 +605,7 @@ int ufs_empty_dir(struct inode * inode) { struct super_block *sb = inode->i_sb; struct page *page = NULL; - unsigned long i, npages = ufs_dir_pages(inode); + unsigned long i, npages = dir_pages(inode); for (i = 0; i < npages; i++) { char *kaddr; diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 7caa01652888..fd0203ce1f7f 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -69,11 +69,11 @@ void ufs_free_inode (struct inode * inode) ino = inode->i_ino; - lock_ufs(sb); + mutex_lock(&UFS_SB(sb)->s_lock); if (!((ino > 1) && (ino < (uspi->s_ncg * uspi->s_ipg )))) { ufs_warning(sb, "ufs_free_inode", "reserved inode or nonexistent inode %u\n", ino); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); return; } @@ -81,7 +81,7 @@ void ufs_free_inode (struct inode * inode) bit = ufs_inotocgoff (ino); ucpi = ufs_load_cylinder (sb, cg); if (!ucpi) { - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); return; } ucg = ubh_get_ucg(UCPI_UBH(ucpi)); @@ -115,7 +115,7 @@ void ufs_free_inode (struct inode * inode) ubh_sync_block(UCPI_UBH(ucpi)); ufs_mark_sb_dirty(sb); - unlock_ufs(sb); + mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT\n"); } @@ -193,7 +193,7 @@ struct inode *ufs_new_inode(struct inode *dir, umode_t mode) sbi = UFS_SB(sb); uspi = sbi->s_uspi; - lock_ufs(sb); + mutex_lock(&sbi->s_lock); /* * Try to place the inode in its parent directory @@ -331,21 +331,21 @@ cg_found: sync_dirty_buffer(bh); brelse(bh); } - unlock_ufs(sb); + mutex_unlock(&sbi->s_lock); UFSD("allocating inode %lu\n", inode->i_ino); UFSD("EXIT\n"); return inode; fail_remove_inode: - unlock_ufs(sb); + mutex_unlock(&sbi->s_lock); clear_nlink(inode); unlock_new_inode(inode); iput(inode); UFSD("EXIT (FAILED): err %d\n", err); return ERR_PTR(err); failed: - unlock_ufs(sb); + mutex_unlock(&sbi->s_lock); make_bad_inode(inode); iput (inode); UFSD("EXIT (FAILED): err %d\n", err); diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 99aaf5c9bf4d..f913a6924b23 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -903,6 +903,9 @@ void ufs_evict_inode(struct inode * inode) invalidate_inode_buffers(inode); clear_inode(inode); - if (want_delete) + if (want_delete) { + lock_ufs(inode->i_sb); ufs_free_inode(inode); + unlock_ufs(inode->i_sb); + } } diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index f773deb1d2e3..47966554317c 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -56,11 +56,9 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, unsi if (dentry->d_name.len > UFS_MAXNAMLEN) return ERR_PTR(-ENAMETOOLONG); - lock_ufs(dir->i_sb); ino = ufs_inode_by_name(dir, &dentry->d_name); if (ino) inode = ufs_iget(dir->i_sb, ino); - unlock_ufs(dir->i_sb); return d_splice_alias(inode, dentry); } @@ -76,24 +74,16 @@ static int ufs_create (struct inode * dir, struct dentry * dentry, umode_t mode, bool excl) { struct inode *inode; - int err; - - UFSD("BEGIN\n"); inode = ufs_new_inode(dir, mode); - err = PTR_ERR(inode); + if (IS_ERR(inode)) + return PTR_ERR(inode); - if (!IS_ERR(inode)) { - inode->i_op = &ufs_file_inode_operations; - inode->i_fop = &ufs_file_operations; - inode->i_mapping->a_ops = &ufs_aops; - mark_inode_dirty(inode); - lock_ufs(dir->i_sb); - err = ufs_add_nondir(dentry, inode); - unlock_ufs(dir->i_sb); - } - UFSD("END: err=%d\n", err); - return err; + inode->i_op = &ufs_file_inode_operations; + inode->i_fop = &ufs_file_operations; + inode->i_mapping->a_ops = &ufs_aops; + mark_inode_dirty(inode); + return ufs_add_nondir(dentry, inode); } static int ufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) @@ -110,9 +100,7 @@ static int ufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev init_special_inode(inode, mode, rdev); ufs_set_inode_dev(inode->i_sb, UFS_I(inode), rdev); mark_inode_dirty(inode); - lock_ufs(dir->i_sb); err = ufs_add_nondir(dentry, inode); - unlock_ufs(dir->i_sb); } return err; } @@ -121,19 +109,18 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry, const char * symname) { struct super_block * sb = dir->i_sb; - int err = -ENAMETOOLONG; + int err; unsigned l = strlen(symname)+1; struct inode * inode; if (l > sb->s_blocksize) - goto out_notlocked; + return -ENAMETOOLONG; inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO); err = PTR_ERR(inode); if (IS_ERR(inode)) - goto out_notlocked; + return err; - lock_ufs(dir->i_sb); if (l > UFS_SB(sb)->s_uspi->s_maxsymlinklen) { /* slow symlink */ inode->i_op = &ufs_symlink_inode_operations; @@ -150,17 +137,13 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry, } mark_inode_dirty(inode); - err = ufs_add_nondir(dentry, inode); -out: - unlock_ufs(dir->i_sb); -out_notlocked: - return err; + return ufs_add_nondir(dentry, inode); out_fail: inode_dec_link_count(inode); unlock_new_inode(inode); iput(inode); - goto out; + return err; } static int ufs_link (struct dentry * old_dentry, struct inode * dir, @@ -169,14 +152,16 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir, struct inode *inode = d_inode(old_dentry); int error; - lock_ufs(dir->i_sb); - inode->i_ctime = CURRENT_TIME_SEC; inode_inc_link_count(inode); ihold(inode); - error = ufs_add_nondir(dentry, inode); - unlock_ufs(dir->i_sb); + error = ufs_add_link(dentry, inode); + if (error) { + inode_dec_link_count(inode); + iput(inode); + } else + d_instantiate(dentry, inode); return error; } @@ -185,9 +170,12 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) struct inode * inode; int err; + inode_inc_link_count(dir); + inode = ufs_new_inode(dir, S_IFDIR|mode); + err = PTR_ERR(inode); if (IS_ERR(inode)) - return PTR_ERR(inode); + goto out_dir; inode->i_op = &ufs_dir_inode_operations; inode->i_fop = &ufs_dir_operations; @@ -195,9 +183,6 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) inode_inc_link_count(inode); - lock_ufs(dir->i_sb); - inode_inc_link_count(dir); - err = ufs_make_empty(inode, dir); if (err) goto out_fail; @@ -205,20 +190,19 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) err = ufs_add_link(dentry, inode); if (err) goto out_fail; - unlock_ufs(dir->i_sb); + unlock_new_inode(inode); d_instantiate(dentry, inode); -out: - return err; + return 0; out_fail: inode_dec_link_count(inode); inode_dec_link_count(inode); unlock_new_inode(inode); iput (inode); +out_dir: inode_dec_link_count(dir); - unlock_ufs(dir->i_sb); - goto out; + return err; } static int ufs_unlink(struct inode *dir, struct dentry *dentry) @@ -248,7 +232,6 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry) struct inode * inode = d_inode(dentry); int err= -ENOTEMPTY; - lock_ufs(dir->i_sb); if (ufs_empty_dir (inode)) { err = ufs_unlink(dir, dentry); if (!err) { @@ -257,7 +240,6 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry) inode_dec_link_count(dir); } } - unlock_ufs(dir->i_sb); return err; } @@ -295,7 +277,7 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry, new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page); if (!new_de) goto out_dir; - ufs_set_link(new_dir, new_de, new_page, old_inode); + ufs_set_link(new_dir, new_de, new_page, old_inode, 1); new_inode->i_ctime = CURRENT_TIME_SEC; if (dir_de) drop_nlink(new_inode); @@ -318,7 +300,12 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry, mark_inode_dirty(old_inode); if (dir_de) { - ufs_set_link(old_inode, dir_de, dir_page, new_dir); + if (old_dir != new_dir) + ufs_set_link(old_inode, dir_de, dir_page, new_dir, 0); + else { + kunmap(dir_page); + page_cache_release(dir_page); + } inode_dec_link_count(old_dir); } return 0; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 098508a93c7b..250579a80d90 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -695,6 +695,7 @@ static int ufs_sync_fs(struct super_block *sb, int wait) unsigned flags; lock_ufs(sb); + mutex_lock(&UFS_SB(sb)->s_lock); UFSD("ENTER\n"); @@ -712,6 +713,7 @@ static int ufs_sync_fs(struct super_block *sb, int wait) ufs_put_cstotal(sb); UFSD("EXIT\n"); + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return 0; @@ -800,6 +802,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) UFSD("flag %u\n", (int)(sb->s_flags & MS_RDONLY)); mutex_init(&sbi->mutex); + mutex_init(&sbi->s_lock); spin_lock_init(&sbi->work_lock); INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs); /* @@ -1278,6 +1281,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) sync_filesystem(sb); lock_ufs(sb); + mutex_lock(&UFS_SB(sb)->s_lock); uspi = UFS_SB(sb)->s_uspi; flags = UFS_SB(sb)->s_flags; usb1 = ubh_get_usb_first(uspi); @@ -1291,6 +1295,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) new_mount_opt = 0; ufs_set_opt (new_mount_opt, ONERROR_LOCK); if (!ufs_parse_options (data, &new_mount_opt)) { + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EINVAL; } @@ -1298,12 +1303,14 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) new_mount_opt |= ufstype; } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) { pr_err("ufstype can't be changed during remount\n"); + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EINVAL; } if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { UFS_SB(sb)->s_mount_opt = new_mount_opt; + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return 0; } @@ -1327,6 +1334,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) */ #ifndef CONFIG_UFS_FS_WRITE pr_err("ufs was compiled with read-only support, can't be mounted as read-write\n"); + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EINVAL; #else @@ -1336,11 +1344,13 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) ufstype != UFS_MOUNT_UFSTYPE_SUNx86 && ufstype != UFS_MOUNT_UFSTYPE_UFS2) { pr_err("this ufstype is read-only supported\n"); + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EINVAL; } if (!ufs_read_cylinder_structures(sb)) { pr_err("failed during remounting\n"); + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return -EPERM; } @@ -1348,6 +1358,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) #endif } UFS_SB(sb)->s_mount_opt = new_mount_opt; + mutex_unlock(&UFS_SB(sb)->s_lock); unlock_ufs(sb); return 0; } diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index 2a07396d5f9e..2e31ea2e35a3 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h @@ -30,6 +30,7 @@ struct ufs_sb_info { int work_queued; /* non-zero if the delayed work is queued */ struct delayed_work sync_work; /* FS sync delayed work */ spinlock_t work_lock; /* protects sync_work and work_queued */ + struct mutex s_lock; }; struct ufs_inode_info { @@ -105,7 +106,7 @@ extern int ufs_delete_entry(struct inode *, struct ufs_dir_entry *, struct page extern int ufs_empty_dir (struct inode *); extern struct ufs_dir_entry *ufs_dotdot(struct inode *, struct page **); extern void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de, - struct page *page, struct inode *inode); + struct page *page, struct inode *inode, bool update_times); /* file.c */ extern const struct inode_operations ufs_file_inode_operations; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 874507de3485..f0e8249722d4 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -577,6 +577,13 @@ restart: if (error) return error; + /* For changing security info in file_remove_privs() we need i_mutex */ + if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) { + xfs_rw_iunlock(ip, *iolock); + *iolock = XFS_IOLOCK_EXCL; + xfs_rw_ilock(ip, *iolock); + goto restart; + } /* * If the offset is beyond the size of the file, we need to zero any * blocks that fall between the existing EOF and the start of this @@ -637,7 +644,9 @@ restart: * setgid bits if the process is not being run by root. This keeps * people from modifying setuid and setgid binaries. */ - return file_remove_suid(file); + if (!IS_NOSEC(inode)) + return file_remove_privs(file); + return 0; } /* diff --git a/include/acpi/acnames.h b/include/acpi/acnames.h index 273de709495c..b52c0dc4b492 100644 --- a/include/acpi/acnames.h +++ b/include/acpi/acnames.h @@ -51,6 +51,7 @@ #define METHOD_NAME__BBN "_BBN" #define METHOD_NAME__CBA "_CBA" #define METHOD_NAME__CID "_CID" +#define METHOD_NAME__CLS "_CLS" #define METHOD_NAME__CRS "_CRS" #define METHOD_NAME__DDN "_DDN" #define METHOD_NAME__HID "_HID" diff --git a/include/acpi/acoutput.h b/include/acpi/acoutput.h index a8f344363e77..f56de8c5d844 100644 --- a/include/acpi/acoutput.h +++ b/include/acpi/acoutput.h @@ -294,8 +294,12 @@ /* DEBUG_PRINT functions */ -#define ACPI_DEBUG_PRINT(plist) ACPI_ACTUAL_DEBUG plist -#define ACPI_DEBUG_PRINT_RAW(plist) ACPI_ACTUAL_DEBUG_RAW plist +#ifndef COMPILER_VA_MACRO + +#define ACPI_DEBUG_PRINT(plist) acpi_debug_print plist +#define ACPI_DEBUG_PRINT_RAW(plist) acpi_debug_print_raw plist + +#else /* Helper macros for DEBUG_PRINT */ @@ -315,6 +319,11 @@ ACPI_DO_DEBUG_PRINT (acpi_debug_print_raw, level, line, \ filename, modulename, component, __VA_ARGS__) +#define ACPI_DEBUG_PRINT(plist) ACPI_ACTUAL_DEBUG plist +#define ACPI_DEBUG_PRINT_RAW(plist) ACPI_ACTUAL_DEBUG_RAW plist + +#endif + /* * Function entry tracing * diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index d68f1cd39c49..e8ec18a4a634 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -46,7 +46,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20150515 +#define ACPI_CA_VERSION 0x20150619 #include <acpi/acconfig.h> #include <acpi/actypes.h> @@ -195,9 +195,18 @@ ACPI_INIT_GLOBAL(u8, acpi_gbl_do_not_use_xsdt, FALSE); * address. Although ACPICA adheres to the ACPI specification which * requires the use of the corresponding 64-bit address if it is non-zero, * some machines have been found to have a corrupted non-zero 64-bit - * address. Default is TRUE, favor the 32-bit addresses. + * address. Default is FALSE, do not favor the 32-bit addresses. */ -ACPI_INIT_GLOBAL(u8, acpi_gbl_use32_bit_fadt_addresses, TRUE); +ACPI_INIT_GLOBAL(u8, acpi_gbl_use32_bit_fadt_addresses, FALSE); + +/* + * Optionally use 32-bit FACS table addresses. + * It is reported that some platforms fail to resume from system suspending + * if 64-bit FACS table address is selected: + * https://bugzilla.kernel.org/show_bug.cgi?id=74021 + * Default is TRUE, favor the 32-bit addresses. + */ +ACPI_INIT_GLOBAL(u8, acpi_gbl_use32_bit_facs_addresses, TRUE); /* * Optionally truncate I/O addresses to 16 bits. Provides compatibility @@ -220,6 +229,11 @@ ACPI_INIT_GLOBAL(u8, acpi_gbl_disable_auto_repair, FALSE); ACPI_INIT_GLOBAL(u8, acpi_gbl_disable_ssdt_table_install, FALSE); /* + * Optionally enable runtime namespace override. + */ +ACPI_INIT_GLOBAL(u8, acpi_gbl_runtime_namespace_override, TRUE); + +/* * We keep track of the latest version of Windows that has been requested by * the BIOS. ACPI 5.0. */ @@ -814,8 +828,12 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_leave_sleep_state(u8 sleep_state)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status - acpi_set_firmware_waking_vector(u32 - physical_address)) + acpi_set_firmware_waking_vectors + (acpi_physical_address physical_address, + acpi_physical_address physical_address64)) +ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status + acpi_set_firmware_waking_vector(u32 + physical_address)) #if ACPI_MACHINE_WIDTH == 64 ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_set_firmware_waking_vector64(u64 diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h index cb8a6b97ceda..2d5faf508cad 100644 --- a/include/acpi/actbl.h +++ b/include/acpi/actbl.h @@ -65,6 +65,7 @@ #define ACPI_SIG_DSDT "DSDT" /* Differentiated System Description Table */ #define ACPI_SIG_FADT "FACP" /* Fixed ACPI Description Table */ #define ACPI_SIG_FACS "FACS" /* Firmware ACPI Control Structure */ +#define ACPI_SIG_OSDT "OSDT" /* Override System Description Table */ #define ACPI_SIG_PSDT "PSDT" /* Persistent System Description Table */ #define ACPI_SIG_RSDP "RSD PTR " /* Root System Description Pointer */ #define ACPI_SIG_RSDT "RSDT" /* Root System Description Table */ diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 06b61f01ea59..fcd570999f35 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -835,6 +835,17 @@ struct acpi_madt_generic_distributor { u8 reserved2[3]; /* reserved - must be zero */ }; +/* Values for Version field above */ + +enum acpi_madt_gic_version { + ACPI_MADT_GIC_VERSION_NONE = 0, + ACPI_MADT_GIC_VERSION_V1 = 1, + ACPI_MADT_GIC_VERSION_V2 = 2, + ACPI_MADT_GIC_VERSION_V3 = 3, + ACPI_MADT_GIC_VERSION_V4 = 4, + ACPI_MADT_GIC_VERSION_RESERVED = 5 /* 5 and greater are reserved */ +}; + /* 13: Generic MSI Frame (ACPI 5.1) */ struct acpi_madt_generic_msi_frame { diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index 370d69d871a0..a948fc586b9b 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -51,8 +51,8 @@ * These tables are not consumed directly by the ACPICA subsystem, but are * included here to support device drivers and the AML disassembler. * - * The tables in this file are defined by third-party specifications, and are - * not defined directly by the ACPI specification itself. + * Generally, the tables in this file are defined by third-party specifications, + * and are not defined directly by the ACPI specification itself. * ******************************************************************************/ @@ -80,6 +80,7 @@ #define ACPI_SIG_SPCR "SPCR" /* Serial Port Console Redirection table */ #define ACPI_SIG_SPMI "SPMI" /* Server Platform Management Interface table */ #define ACPI_SIG_TCPA "TCPA" /* Trusted Computing Platform Alliance table */ +#define ACPI_SIG_TPM2 "TPM2" /* Trusted Platform Module 2.0 H/W interface table */ #define ACPI_SIG_UEFI "UEFI" /* Uefi Boot Optimization Table */ #define ACPI_SIG_VRTC "VRTC" /* Virtual Real Time Clock Table */ #define ACPI_SIG_WAET "WAET" /* Windows ACPI Emulated devices Table */ @@ -1179,20 +1180,85 @@ enum acpi_spmi_interface_types { /******************************************************************************* * * TCPA - Trusted Computing Platform Alliance table - * Version 1 + * Version 2 + * + * Conforms to "TCG ACPI Specification, Family 1.2 and 2.0", + * December 19, 2014 * - * Conforms to "TCG PC Specific Implementation Specification", - * Version 1.1, August 18, 2003 + * NOTE: There are two versions of the table with the same signature -- + * the client version and the server version. * ******************************************************************************/ -struct acpi_table_tcpa { +struct acpi_table_tcpa_client { struct acpi_table_header header; /* Common ACPI table header */ + u16 platform_class; + u32 minimum_log_length; /* Minimum length for the event log area */ + u64 log_address; /* Address of the event log area */ +}; + +struct acpi_table_tcpa_server { + struct acpi_table_header header; /* Common ACPI table header */ + u16 platform_class; u16 reserved; - u32 max_log_length; /* Maximum length for the event log area */ + u64 minimum_log_length; /* Minimum length for the event log area */ u64 log_address; /* Address of the event log area */ + u16 spec_revision; + u8 device_flags; + u8 interrupt_flags; + u8 gpe_number; + u8 reserved2[3]; + u32 global_interrupt; + struct acpi_generic_address address; + u32 reserved3; + struct acpi_generic_address config_address; + u8 group; + u8 bus; /* PCI Bus/Segment/Function numbers */ + u8 device; + u8 function; +}; + +/* Values for device_flags above */ + +#define ACPI_TCPA_PCI_DEVICE (1) +#define ACPI_TCPA_BUS_PNP (1<<1) +#define ACPI_TCPA_ADDRESS_VALID (1<<2) + +/* Values for interrupt_flags above */ + +#define ACPI_TCPA_INTERRUPT_MODE (1) +#define ACPI_TCPA_INTERRUPT_POLARITY (1<<1) +#define ACPI_TCPA_SCI_VIA_GPE (1<<2) +#define ACPI_TCPA_GLOBAL_INTERRUPT (1<<3) + +/******************************************************************************* + * + * TPM2 - Trusted Platform Module (TPM) 2.0 Hardware Interface Table + * Version 4 + * + * Conforms to "TCG ACPI Specification, Family 1.2 and 2.0", + * December 19, 2014 + * + ******************************************************************************/ + +struct acpi_table_tpm2 { + struct acpi_table_header header; /* Common ACPI table header */ + u16 platform_class; + u16 reserved; + u64 control_address; + u32 start_method; + + /* Platform-specific data follows */ }; +/* Values for start_method above */ + +#define ACPI_TPM2_NOT_ALLOWED 0 +#define ACPI_TPM2_START_METHOD 2 +#define ACPI_TPM2_MEMORY_MAPPED 6 +#define ACPI_TPM2_COMMAND_BUFFER 7 +#define ACPI_TPM2_COMMAND_BUFFER_WITH_START_METHOD 8 + /******************************************************************************* * * UEFI - UEFI Boot optimization Table diff --git a/include/acpi/actbl3.h b/include/acpi/actbl3.h index 4018986d2a2e..1df891660f43 100644 --- a/include/acpi/actbl3.h +++ b/include/acpi/actbl3.h @@ -51,7 +51,8 @@ * These tables are not consumed directly by the ACPICA subsystem, but are * included here to support device drivers and the AML disassembler. * - * The tables in this file are fully defined within the ACPI specification. + * In general, the tables in this file are fully defined within the ACPI + * specification. * ******************************************************************************/ @@ -69,7 +70,6 @@ #define ACPI_SIG_PMTT "PMTT" /* Platform Memory Topology Table */ #define ACPI_SIG_RASF "RASF" /* RAS Feature table */ #define ACPI_SIG_STAO "STAO" /* Status Override table */ -#define ACPI_SIG_TPM2 "TPM2" /* Trusted Platform Module 2.0 H/W interface table */ #define ACPI_SIG_WPBT "WPBT" /* Windows Platform Binary Table */ #define ACPI_SIG_XENV "XENV" /* Xen Environment table */ @@ -722,36 +722,6 @@ struct acpi_table_stao { /******************************************************************************* * - * TPM2 - Trusted Platform Module (TPM) 2.0 Hardware Interface Table - * Version 3 - * - * Conforms to "TPM 2.0 Hardware Interface Table (TPM2)" 29 November 2011 - * - ******************************************************************************/ - -struct acpi_table_tpm2 { - struct acpi_table_header header; /* Common ACPI table header */ - u32 flags; - u64 control_address; - u32 start_method; -}; - -/* Control area structure (not part of table, pointed to by control_address) */ - -struct acpi_tpm2_control { - u32 reserved; - u32 error; - u32 cancel; - u32 start; - u64 interrupt_control; - u32 command_size; - u64 command_address; - u32 response_size; - u64 response_address; -}; - -/******************************************************************************* - * * WPBT - Windows Platform Environment Table (ACPI 6.0) * Version 1 * diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 63fd7f5e9fb3..c2a41d223162 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -542,14 +542,14 @@ typedef u64 acpi_integer; #define ACPI_COMPARE_NAME(a,b) (*ACPI_CAST_PTR (u32, (a)) == *ACPI_CAST_PTR (u32, (b))) #define ACPI_MOVE_NAME(dest,src) (*ACPI_CAST_PTR (u32, (dest)) = *ACPI_CAST_PTR (u32, (src))) #else -#define ACPI_COMPARE_NAME(a,b) (!ACPI_STRNCMP (ACPI_CAST_PTR (char, (a)), ACPI_CAST_PTR (char, (b)), ACPI_NAME_SIZE)) -#define ACPI_MOVE_NAME(dest,src) (ACPI_STRNCPY (ACPI_CAST_PTR (char, (dest)), ACPI_CAST_PTR (char, (src)), ACPI_NAME_SIZE)) +#define ACPI_COMPARE_NAME(a,b) (!strncmp (ACPI_CAST_PTR (char, (a)), ACPI_CAST_PTR (char, (b)), ACPI_NAME_SIZE)) +#define ACPI_MOVE_NAME(dest,src) (strncpy (ACPI_CAST_PTR (char, (dest)), ACPI_CAST_PTR (char, (src)), ACPI_NAME_SIZE)) #endif /* Support for the special RSDP signature (8 characters) */ -#define ACPI_VALIDATE_RSDP_SIG(a) (!ACPI_STRNCMP (ACPI_CAST_PTR (char, (a)), ACPI_SIG_RSDP, 8)) -#define ACPI_MAKE_RSDP_SIG(dest) (ACPI_MEMCPY (ACPI_CAST_PTR (char, (dest)), ACPI_SIG_RSDP, 8)) +#define ACPI_VALIDATE_RSDP_SIG(a) (!strncmp (ACPI_CAST_PTR (char, (a)), ACPI_SIG_RSDP, 8)) +#define ACPI_MAKE_RSDP_SIG(dest) (memcpy (ACPI_CAST_PTR (char, (dest)), ACPI_SIG_RSDP, 8)) /******************************************************************************* * @@ -568,6 +568,7 @@ typedef u64 acpi_integer; #define ACPI_NO_ACPI_ENABLE 0x10 #define ACPI_NO_DEVICE_INIT 0x20 #define ACPI_NO_OBJECT_INIT 0x40 +#define ACPI_NO_FACS_INIT 0x80 /* * Initialization state @@ -1140,6 +1141,10 @@ u32 (*acpi_interface_handler) (acpi_string interface_name, u32 supported); #define ACPI_UUID_LENGTH 16 +/* Length of 3-byte PCI class code values when converted back to a string */ + +#define ACPI_PCICLS_STRING_SIZE 7 /* Includes null terminator */ + /* Structures used for device/processor HID, UID, CID, and SUB */ struct acpi_pnp_device_id { @@ -1162,7 +1167,7 @@ struct acpi_device_info { u32 name; /* ACPI object Name */ acpi_object_type type; /* ACPI object Type */ u8 param_count; /* If a method, required parameter count */ - u8 valid; /* Indicates which optional fields are valid */ + u16 valid; /* Indicates which optional fields are valid */ u8 flags; /* Miscellaneous info */ u8 highest_dstates[4]; /* _sx_d values: 0xFF indicates not valid */ u8 lowest_dstates[5]; /* _sx_w values: 0xFF indicates not valid */ @@ -1171,6 +1176,7 @@ struct acpi_device_info { struct acpi_pnp_device_id hardware_id; /* _HID value */ struct acpi_pnp_device_id unique_id; /* _UID value */ struct acpi_pnp_device_id subsystem_id; /* _SUB value */ + struct acpi_pnp_device_id class_code; /* _CLS value */ struct acpi_pnp_device_id_list compatible_id_list; /* _CID list <must be last> */ }; @@ -1180,14 +1186,15 @@ struct acpi_device_info { /* Flags for Valid field above (acpi_get_object_info) */ -#define ACPI_VALID_STA 0x01 -#define ACPI_VALID_ADR 0x02 -#define ACPI_VALID_HID 0x04 -#define ACPI_VALID_UID 0x08 -#define ACPI_VALID_SUB 0x10 -#define ACPI_VALID_CID 0x20 -#define ACPI_VALID_SXDS 0x40 -#define ACPI_VALID_SXWS 0x80 +#define ACPI_VALID_STA 0x0001 +#define ACPI_VALID_ADR 0x0002 +#define ACPI_VALID_HID 0x0004 +#define ACPI_VALID_UID 0x0008 +#define ACPI_VALID_SUB 0x0010 +#define ACPI_VALID_CID 0x0020 +#define ACPI_VALID_CLS 0x0040 +#define ACPI_VALID_SXDS 0x0100 +#define ACPI_VALID_SXWS 0x0200 /* Flags for _STA return value (current_status above) */ diff --git a/include/acpi/platform/acenv.h b/include/acpi/platform/acenv.h index 073997d729e9..3cedd43943f4 100644 --- a/include/acpi/platform/acenv.h +++ b/include/acpi/platform/acenv.h @@ -346,29 +346,6 @@ /* We will be linking to the standard Clib functions */ -#define ACPI_STRSTR(s1,s2) strstr((s1), (s2)) -#define ACPI_STRCHR(s1,c) strchr((s1), (c)) -#define ACPI_STRLEN(s) (acpi_size) strlen((s)) -#define ACPI_STRCPY(d,s) (void) strcpy((d), (s)) -#define ACPI_STRNCPY(d,s,n) (void) strncpy((d), (s), (acpi_size)(n)) -#define ACPI_STRNCMP(d,s,n) strncmp((d), (s), (acpi_size)(n)) -#define ACPI_STRCMP(d,s) strcmp((d), (s)) -#define ACPI_STRCAT(d,s) (void) strcat((d), (s)) -#define ACPI_STRNCAT(d,s,n) strncat((d), (s), (acpi_size)(n)) -#define ACPI_STRTOUL(d,s,n) strtoul((d), (s), (acpi_size)(n)) -#define ACPI_MEMCMP(s1,s2,n) memcmp((const char *)(s1), (const char *)(s2), (acpi_size)(n)) -#define ACPI_MEMCPY(d,s,n) (void) memcpy((d), (s), (acpi_size)(n)) -#define ACPI_MEMSET(d,s,n) (void) memset((d), (s), (acpi_size)(n)) - -#define ACPI_TOUPPER(i) toupper((int) (i)) -#define ACPI_TOLOWER(i) tolower((int) (i)) -#define ACPI_IS_XDIGIT(i) isxdigit((int) (i)) -#define ACPI_IS_DIGIT(i) isdigit((int) (i)) -#define ACPI_IS_SPACE(i) isspace((int) (i)) -#define ACPI_IS_UPPER(i) isupper((int) (i)) -#define ACPI_IS_PRINT(i) isprint((int) (i)) -#define ACPI_IS_ALPHA(i) isalpha((int) (i)) - #else /****************************************************************************** @@ -406,22 +383,6 @@ typedef char *va_list; /* Use the local (ACPICA) definitions of the clib functions */ -#define ACPI_STRSTR(s1,s2) acpi_ut_strstr ((s1), (s2)) -#define ACPI_STRCHR(s1,c) acpi_ut_strchr ((s1), (c)) -#define ACPI_STRLEN(s) (acpi_size) acpi_ut_strlen ((s)) -#define ACPI_STRCPY(d,s) (void) acpi_ut_strcpy ((d), (s)) -#define ACPI_STRNCPY(d,s,n) (void) acpi_ut_strncpy ((d), (s), (acpi_size)(n)) -#define ACPI_STRNCMP(d,s,n) acpi_ut_strncmp ((d), (s), (acpi_size)(n)) -#define ACPI_STRCMP(d,s) acpi_ut_strcmp ((d), (s)) -#define ACPI_STRCAT(d,s) (void) acpi_ut_strcat ((d), (s)) -#define ACPI_STRNCAT(d,s,n) acpi_ut_strncat ((d), (s), (acpi_size)(n)) -#define ACPI_STRTOUL(d,s,n) acpi_ut_strtoul ((d), (s), (acpi_size)(n)) -#define ACPI_MEMCMP(s1,s2,n) acpi_ut_memcmp((const char *)(s1), (const char *)(s2), (acpi_size)(n)) -#define ACPI_MEMCPY(d,s,n) (void) acpi_ut_memcpy ((d), (s), (acpi_size)(n)) -#define ACPI_MEMSET(d,v,n) (void) acpi_ut_memset ((d), (v), (acpi_size)(n)) -#define ACPI_TOUPPER(c) acpi_ut_to_upper ((int) (c)) -#define ACPI_TOLOWER(c) acpi_ut_to_lower ((int) (c)) - #endif /* ACPI_USE_SYSTEM_CLIBRARY */ #ifndef ACPI_FILE diff --git a/include/acpi/platform/acenvex.h b/include/acpi/platform/acenvex.h index 14dc6f68ca18..0a7dc8e583b1 100644 --- a/include/acpi/platform/acenvex.h +++ b/include/acpi/platform/acenvex.h @@ -56,6 +56,12 @@ #if defined(_LINUX) || defined(__linux__) #include <acpi/platform/aclinuxex.h> +#elif defined(_AED_EFI) +#include "acefiex.h" + +#elif defined(_GNU_EFI) +#include "acefiex.h" + #elif defined(__DragonFly__) #include "acdragonflyex.h" diff --git a/include/acpi/platform/acgcc.h b/include/acpi/platform/acgcc.h index f54de0a63558..5457a06cb528 100644 --- a/include/acpi/platform/acgcc.h +++ b/include/acpi/platform/acgcc.h @@ -75,4 +75,8 @@ #undef strchr #endif +/* GCC supports __VA_ARGS__ in macros */ + +#define COMPILER_VA_MACRO 1 + #endif /* __ACGCC_H__ */ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c471dfc93b71..d2445fa9999f 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -58,6 +58,19 @@ static inline acpi_handle acpi_device_handle(struct acpi_device *adev) acpi_fwnode_handle(adev) : NULL) #define ACPI_HANDLE(dev) acpi_device_handle(ACPI_COMPANION(dev)) +/** + * ACPI_DEVICE_CLASS - macro used to describe an ACPI device with + * the PCI-defined class-code information + * + * @_cls : the class, subclass, prog-if triple for this device + * @_msk : the class mask for this device + * + * This macro is used to create a struct acpi_device_id that matches a + * specific PCI class. The .id and .driver_data fields will be left + * initialized with the default value. + */ +#define ACPI_DEVICE_CLASS(_cls, _msk) .cls = (_cls), .cls_msk = (_msk), + static inline bool has_acpi_companion(struct device *dev) { return is_acpi_node(dev->fwnode); @@ -309,9 +322,6 @@ int acpi_check_region(resource_size_t start, resource_size_t n, int acpi_resources_are_enforced(void); -int acpi_reserve_region(u64 start, unsigned int length, u8 space_id, - unsigned long flags, char *desc); - #ifdef CONFIG_HIBERNATION void __init acpi_no_s4_hw_signature(void); #endif @@ -446,6 +456,7 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *); #define ACPI_COMPANION(dev) (NULL) #define ACPI_COMPANION_SET(dev, adev) do { } while (0) #define ACPI_HANDLE(dev) (NULL) +#define ACPI_DEVICE_CLASS(_cls, _msk) .cls = (0), .cls_msk = (0), struct fwnode_handle; @@ -507,13 +518,6 @@ static inline int acpi_check_region(resource_size_t start, resource_size_t n, return 0; } -static inline int acpi_reserve_region(u64 start, unsigned int length, - u8 space_id, unsigned long flags, - char *desc) -{ - return -ENXIO; -} - struct acpi_table_header; static inline int acpi_table_parse(char *id, int (*handler)(struct acpi_table_header *)) diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index a48d90e3bcbb..a23209b43842 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -50,10 +50,10 @@ enum wb_stat_item { */ struct bdi_writeback_congested { unsigned long state; /* WB_[a]sync_congested flags */ + atomic_t refcnt; /* nr of attached wb's and blkg */ #ifdef CONFIG_CGROUP_WRITEBACK struct backing_dev_info *bdi; /* the associated bdi */ - atomic_t refcnt; /* nr of attached wb's and blkg */ int blkcg_id; /* ID of the associated blkcg */ struct rb_node rb_node; /* on bdi->cgwb_congestion_tree */ #endif @@ -150,11 +150,12 @@ struct backing_dev_info { atomic_long_t tot_write_bandwidth; struct bdi_writeback wb; /* the root writeback info for this bdi */ - struct bdi_writeback_congested wb_congested; /* its congested state */ #ifdef CONFIG_CGROUP_WRITEBACK struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */ struct rb_root cgwb_congested_tree; /* their congested states */ atomic_t usage_cnt; /* counts both cgwbs and cgwb_contested's */ +#else + struct bdi_writeback_congested *wb_congested; #endif wait_queue_head_t wb_waitq; diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 0e6d4828a77a..0fe9df983ab7 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -15,6 +15,7 @@ #include <linux/writeback.h> #include <linux/blk-cgroup.h> #include <linux/backing-dev-defs.h> +#include <linux/slab.h> int __must_check bdi_init(struct backing_dev_info *bdi); void bdi_destroy(struct backing_dev_info *bdi); @@ -465,11 +466,14 @@ static inline bool inode_cgwb_enabled(struct inode *inode) static inline struct bdi_writeback_congested * wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp) { - return bdi->wb.congested; + atomic_inc(&bdi->wb_congested->refcnt); + return bdi->wb_congested; } static inline void wb_congested_put(struct bdi_writeback_congested *congested) { + if (atomic_dec_and_test(&congested->refcnt)) + kfree(congested); } static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi) diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 73b45225a7ca..e6797ded700e 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -317,6 +317,13 @@ sb_getblk(struct super_block *sb, sector_t block) return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE); } + +static inline struct buffer_head * +sb_getblk_gfp(struct super_block *sb, sector_t block, gfp_t gfp) +{ + return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, gfp); +} + static inline struct buffer_head * sb_find_get_block(struct super_block *sb, sector_t block) { diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 30f92cefaa72..9ebee53d3bf5 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -43,9 +43,9 @@ struct ceph_options { int flags; struct ceph_fsid fsid; struct ceph_entity_addr my_addr; - int mount_timeout; - int osd_idle_ttl; - int osd_keepalive_timeout; + unsigned long mount_timeout; /* jiffies */ + unsigned long osd_idle_ttl; /* jiffies */ + unsigned long osd_keepalive_timeout; /* jiffies */ /* * any type that can't be simply compared or doesn't need need @@ -63,9 +63,9 @@ struct ceph_options { /* * defaults */ -#define CEPH_MOUNT_TIMEOUT_DEFAULT 60 -#define CEPH_OSD_KEEPALIVE_DEFAULT 5 -#define CEPH_OSD_IDLE_TTL_DEFAULT 60 +#define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000) +#define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) +#define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) @@ -93,13 +93,9 @@ enum { CEPH_MOUNT_SHUTDOWN, }; -/* - * subtract jiffies - */ -static inline unsigned long time_sub(unsigned long a, unsigned long b) +static inline unsigned long ceph_timeout_jiffies(unsigned long timeout) { - BUG_ON(time_after(b, a)); - return (long)a - (long)b; + return timeout ?: MAX_SCHEDULE_TIMEOUT; } struct ceph_mds_client; @@ -178,6 +174,7 @@ static inline int calc_pages_for(u64 off, u64 len) extern struct kmem_cache *ceph_inode_cachep; extern struct kmem_cache *ceph_cap_cachep; +extern struct kmem_cache *ceph_cap_flush_cachep; extern struct kmem_cache *ceph_dentry_cachep; extern struct kmem_cache *ceph_file_cachep; diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index e15499422fdc..37753278987a 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -8,6 +8,7 @@ #include <linux/radix-tree.h> #include <linux/uio.h> #include <linux/workqueue.h> +#include <net/net_namespace.h> #include <linux/ceph/types.h> #include <linux/ceph/buffer.h> @@ -56,6 +57,7 @@ struct ceph_messenger { struct ceph_entity_addr my_enc_addr; atomic_t stopping; + possible_net_t net; bool nocrc; bool tcp_nodelay; @@ -267,6 +269,7 @@ extern void ceph_messenger_init(struct ceph_messenger *msgr, u64 required_features, bool nocrc, bool tcp_nodelay); +extern void ceph_messenger_fini(struct ceph_messenger *msgr); extern void ceph_con_init(struct ceph_connection *con, void *private, const struct ceph_connection_operations *ops, diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 61b19c46bdb3..7506b485bb6d 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -249,7 +249,7 @@ extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg); extern void osd_req_op_init(struct ceph_osd_request *osd_req, - unsigned int which, u16 opcode); + unsigned int which, u16 opcode, u32 flags); extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *, unsigned int which, diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 7f8ad9593da7..e08a6ae7c0a4 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -17,11 +17,11 @@ # define __release(x) __context__(x,-1) # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) # define __percpu __attribute__((noderef, address_space(3))) +# define __pmem __attribute__((noderef, address_space(5))) #ifdef CONFIG_SPARSE_RCU_POINTER # define __rcu __attribute__((noderef, address_space(4))) #else # define __rcu -# define __pmem __attribute__((noderef, address_space(5))) #endif extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); diff --git a/include/linux/crc-t10dif.h b/include/linux/crc-t10dif.h index cf53d0773ce3..d81961e9e37d 100644 --- a/include/linux/crc-t10dif.h +++ b/include/linux/crc-t10dif.h @@ -9,5 +9,6 @@ extern __u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len); extern __u16 crc_t10dif(unsigned char const *, size_t); +extern __u16 crc_t10dif_update(__u16 crc, unsigned char const *, size_t); #endif diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 48a1a7d100f1..48b49305716b 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -1,7 +1,11 @@ #ifndef CEPH_CRUSH_CRUSH_H #define CEPH_CRUSH_CRUSH_H -#include <linux/types.h> +#ifdef __KERNEL__ +# include <linux/types.h> +#else +# include "crush_compat.h" +#endif /* * CRUSH is a pseudo-random data distribution algorithm that @@ -20,7 +24,11 @@ #define CRUSH_MAGIC 0x00010000ul /* for detecting algorithm revisions */ #define CRUSH_MAX_DEPTH 10 /* max crush hierarchy depth */ +#define CRUSH_MAX_RULESET (1<<8) /* max crush ruleset number */ +#define CRUSH_MAX_RULES CRUSH_MAX_RULESET /* should be the same as max rulesets */ +#define CRUSH_MAX_DEVICE_WEIGHT (100u * 0x10000u) +#define CRUSH_MAX_BUCKET_WEIGHT (65535u * 0x10000u) #define CRUSH_ITEM_UNDEF 0x7ffffffe /* undefined result (internal use only) */ #define CRUSH_ITEM_NONE 0x7fffffff /* no result */ @@ -108,6 +116,15 @@ enum { }; extern const char *crush_bucket_alg_name(int alg); +/* + * although tree was a legacy algorithm, it has been buggy, so + * exclude it. + */ +#define CRUSH_LEGACY_ALLOWED_BUCKET_ALGS ( \ + (1 << CRUSH_BUCKET_UNIFORM) | \ + (1 << CRUSH_BUCKET_LIST) | \ + (1 << CRUSH_BUCKET_STRAW)) + struct crush_bucket { __s32 id; /* this'll be negative */ __u16 type; /* non-zero; type=0 is reserved for devices */ @@ -174,7 +191,7 @@ struct crush_map { /* choose local attempts using a fallback permutation before * re-descent */ __u32 choose_local_fallback_tries; - /* choose attempts before giving up */ + /* choose attempts before giving up */ __u32 choose_total_tries; /* attempt chooseleaf inner descent once for firstn mode; on * reject retry outer descent. Note that this does *not* @@ -187,6 +204,25 @@ struct crush_map { * that want to limit reshuffling, a value of 3 or 4 will make the * mappings line up a bit better with previous mappings. */ __u8 chooseleaf_vary_r; + +#ifndef __KERNEL__ + /* + * version 0 (original) of straw_calc has various flaws. version 1 + * fixes a few of them. + */ + __u8 straw_calc_version; + + /* + * allowed bucket algs is a bitmask, here the bit positions + * are CRUSH_BUCKET_*. note that these are *bits* and + * CRUSH_BUCKET_* values are not, so we need to or together (1 + * << CRUSH_BUCKET_WHATEVER). The 0th bit is not used to + * minimize confusion (bucket type values start at 1). + */ + __u32 allowed_bucket_algs; + + __u32 *choose_tries; +#endif }; diff --git a/include/linux/crush/hash.h b/include/linux/crush/hash.h index 91e884230d5d..d1d90258242e 100644 --- a/include/linux/crush/hash.h +++ b/include/linux/crush/hash.h @@ -1,6 +1,12 @@ #ifndef CEPH_CRUSH_HASH_H #define CEPH_CRUSH_HASH_H +#ifdef __KERNEL__ +# include <linux/types.h> +#else +# include "crush_compat.h" +#endif + #define CRUSH_HASH_RJENKINS1 0 #define CRUSH_HASH_DEFAULT CRUSH_HASH_RJENKINS1 diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h index eab367446eea..5dfd5b1125d2 100644 --- a/include/linux/crush/mapper.h +++ b/include/linux/crush/mapper.h @@ -8,7 +8,7 @@ * LGPL2 */ -#include <linux/crush/crush.h> +#include "crush.h" extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size); extern int crush_do_rule(const struct crush_map *map, diff --git a/include/linux/dcache.h b/include/linux/dcache.h index df334cbacc6d..d2d50249b7b2 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -160,6 +160,7 @@ struct dentry_operations { char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(struct dentry *, bool); + struct inode *(*d_select_inode)(struct dentry *, unsigned); } ____cacheline_aligned; /* @@ -225,6 +226,7 @@ struct dentry_operations { #define DCACHE_MAY_FREE 0x00800000 #define DCACHE_FALLTHRU 0x01000000 /* Fall through to lower layer */ +#define DCACHE_OP_SELECT_INODE 0x02000000 /* Unioned entry: dcache op selects inode */ extern seqlock_t rename_lock; @@ -505,6 +507,11 @@ static inline bool d_really_is_positive(const struct dentry *dentry) return dentry->d_inode != NULL; } +static inline int simple_positive(struct dentry *dentry) +{ + return d_really_is_positive(dentry) && !d_unhashed(dentry); +} + extern void d_set_fallthru(struct dentry *dentry); static inline bool d_is_fallthru(const struct dentry *dentry) diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 230f87bdf5ad..fbb88740634a 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -47,6 +47,9 @@ struct files_struct { * read mostly part */ atomic_t count; + bool resize_in_progress; + wait_queue_head_t resize_wait; + struct fdtable __rcu *fdt; struct fdtable fdtab; /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 3f1a84635da8..a0653e560c26 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1654,7 +1654,6 @@ struct inode_operations { int (*set_acl)(struct inode *, struct posix_acl *, int); /* WARNING: probably going away soon, do not use! */ - int (*dentry_open)(struct dentry *, struct file *, const struct cred *); } ____cacheline_aligned; ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, @@ -1917,6 +1916,7 @@ struct file_system_type { #define FS_HAS_SUBTYPE 4 #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */ +#define FS_USERNS_VISIBLE 32 /* FS must already be visible */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ struct dentry *(*mount) (struct file_system_type *, int, const char *, void *); @@ -2004,7 +2004,6 @@ extern int vfs_ustat(dev_t, struct kstatfs *); extern int freeze_super(struct super_block *super); extern int thaw_super(struct super_block *super); extern bool our_mnt(struct vfsmount *mnt); -extern bool fs_fully_visible(struct file_system_type *); extern int current_umask(void); @@ -2213,7 +2212,6 @@ extern struct file *file_open_name(struct filename *, int, umode_t); extern struct file *filp_open(const char *, int, umode_t); extern struct file *file_open_root(struct dentry *, struct vfsmount *, const char *, int); -extern int vfs_open(const struct path *, struct file *, const struct cred *); extern struct file * dentry_open(const struct path *, int, const struct cred *); extern int filp_close(struct file *, fl_owner_t id); @@ -2530,6 +2528,8 @@ extern struct file * open_exec(const char *); extern int is_subdir(struct dentry *, struct dentry *); extern int path_is_under(struct path *, struct path *); +extern char *file_path(struct file *, char *, int); + #include <linux/err.h> /* needed for stackable file system support */ @@ -2581,7 +2581,12 @@ extern struct inode *new_inode_pseudo(struct super_block *sb); extern struct inode *new_inode(struct super_block *sb); extern void free_inode_nonrcu(struct inode *inode); extern int should_remove_suid(struct dentry *); -extern int file_remove_suid(struct file *); +extern int file_remove_privs(struct file *); +extern int dentry_needs_remove_privs(struct dentry *dentry); +static inline int file_needs_remove_privs(struct file *file) +{ + return dentry_needs_remove_privs(file->f_path.dentry); +} extern void __insert_inode_hash(struct inode *, unsigned long hashval); static inline void insert_inode_hash(struct inode *inode) @@ -2816,6 +2821,8 @@ extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned in extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); extern const struct file_operations simple_dir_operations; extern const struct inode_operations simple_dir_inode_operations; +extern void make_empty_dir_inode(struct inode *inode); +extern bool is_empty_dir_inode(struct inode *inode); struct tree_descr { char *name; const struct file_operations *ops; int mode; }; struct dentry *d_alloc_name(struct dentry *, const char *); extern int simple_fill_super(struct super_block *, unsigned long, struct tree_descr *); diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 771484993ca7..604e1526cd00 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -74,6 +74,7 @@ extern wait_queue_head_t fscache_cache_cleared_wq; */ typedef void (*fscache_operation_release_t)(struct fscache_operation *op); typedef void (*fscache_operation_processor_t)(struct fscache_operation *op); +typedef void (*fscache_operation_cancel_t)(struct fscache_operation *op); enum fscache_operation_state { FSCACHE_OP_ST_BLANK, /* Op is not yet submitted */ @@ -109,6 +110,9 @@ struct fscache_operation { * the op in a non-pool thread */ fscache_operation_processor_t processor; + /* Operation cancellation cleanup (optional) */ + fscache_operation_cancel_t cancel; + /* operation releaser */ fscache_operation_release_t release; }; @@ -119,33 +123,17 @@ extern void fscache_op_work_func(struct work_struct *work); extern void fscache_enqueue_operation(struct fscache_operation *); extern void fscache_op_complete(struct fscache_operation *, bool); extern void fscache_put_operation(struct fscache_operation *); - -/** - * fscache_operation_init - Do basic initialisation of an operation - * @op: The operation to initialise - * @release: The release function to assign - * - * Do basic initialisation of an operation. The caller must still set flags, - * object and processor if needed. - */ -static inline void fscache_operation_init(struct fscache_operation *op, - fscache_operation_processor_t processor, - fscache_operation_release_t release) -{ - INIT_WORK(&op->work, fscache_op_work_func); - atomic_set(&op->usage, 1); - op->state = FSCACHE_OP_ST_INITIALISED; - op->debug_id = atomic_inc_return(&fscache_op_debug_id); - op->processor = processor; - op->release = release; - INIT_LIST_HEAD(&op->pend_link); -} +extern void fscache_operation_init(struct fscache_operation *, + fscache_operation_processor_t, + fscache_operation_cancel_t, + fscache_operation_release_t); /* * data read operation */ struct fscache_retrieval { struct fscache_operation op; + struct fscache_cookie *cookie; /* The netfs cookie */ struct address_space *mapping; /* netfs pages */ fscache_rw_complete_t end_io_func; /* function to call on I/O completion */ void *context; /* netfs read context (pinned) */ @@ -371,6 +359,7 @@ struct fscache_object { #define FSCACHE_OBJECT_IS_LOOKED_UP 4 /* T if object has been looked up */ #define FSCACHE_OBJECT_IS_AVAILABLE 5 /* T if object has become active */ #define FSCACHE_OBJECT_RETIRED 6 /* T if object was retired on relinquishment */ +#define FSCACHE_OBJECT_KILLED_BY_CACHE 7 /* T if object was killed by the cache */ struct list_head cache_link; /* link in cache->object_list */ struct hlist_node cookie_link; /* link in cookie->backing_objects */ @@ -410,17 +399,16 @@ static inline bool fscache_object_is_available(struct fscache_object *object) return test_bit(FSCACHE_OBJECT_IS_AVAILABLE, &object->flags); } -static inline bool fscache_object_is_active(struct fscache_object *object) +static inline bool fscache_cache_is_broken(struct fscache_object *object) { - return fscache_object_is_available(object) && - fscache_object_is_live(object) && - !test_bit(FSCACHE_IOERROR, &object->cache->flags); + return test_bit(FSCACHE_IOERROR, &object->cache->flags); } -static inline bool fscache_object_is_dead(struct fscache_object *object) +static inline bool fscache_object_is_active(struct fscache_object *object) { - return fscache_object_is_dying(object) && - test_bit(FSCACHE_IOERROR, &object->cache->flags); + return fscache_object_is_available(object) && + fscache_object_is_live(object) && + !fscache_cache_is_broken(object); } /** @@ -551,4 +539,15 @@ extern enum fscache_checkaux fscache_check_aux(struct fscache_object *object, const void *data, uint16_t datalen); +extern void fscache_object_retrying_stale(struct fscache_object *object); + +enum fscache_why_object_killed { + FSCACHE_OBJECT_IS_STALE, + FSCACHE_OBJECT_NO_SPACE, + FSCACHE_OBJECT_WAS_RETIRED, + FSCACHE_OBJECT_WAS_CULLED, +}; +extern void fscache_object_mark_killed(struct fscache_object *object, + enum fscache_why_object_killed why); + #endif /* _LINUX_FSCACHE_CACHE_H */ diff --git a/include/linux/hwspinlock.h b/include/linux/hwspinlock.h index 3343298e40e8..859d673d98c8 100644 --- a/include/linux/hwspinlock.h +++ b/include/linux/hwspinlock.h @@ -26,6 +26,7 @@ #define HWLOCK_IRQ 0x02 /* Disable interrupts, don't save state */ struct device; +struct device_node; struct hwspinlock; struct hwspinlock_device; struct hwspinlock_ops; @@ -66,6 +67,7 @@ int hwspin_lock_unregister(struct hwspinlock_device *bank); struct hwspinlock *hwspin_lock_request(void); struct hwspinlock *hwspin_lock_request_specific(unsigned int id); int hwspin_lock_free(struct hwspinlock *hwlock); +int of_hwspin_lock_get_id(struct device_node *np, int index); int hwspin_lock_get_id(struct hwspinlock *hwlock); int __hwspin_lock_timeout(struct hwspinlock *, unsigned int, int, unsigned long *); @@ -120,6 +122,11 @@ void __hwspin_unlock(struct hwspinlock *hwlock, int mode, unsigned long *flags) { } +static inline int of_hwspin_lock_get_id(struct device_node *np, int index) +{ + return 0; +} + static inline int hwspin_lock_get_id(struct hwspinlock *hwlock) { return 0; diff --git a/include/linux/input/touchscreen.h b/include/linux/input/touchscreen.h index 08a5ef6e8f25..eecc9ea6cd58 100644 --- a/include/linux/input/touchscreen.h +++ b/include/linux/input/touchscreen.h @@ -12,9 +12,10 @@ #include <linux/input.h> #ifdef CONFIG_OF -void touchscreen_parse_of_params(struct input_dev *dev); +void touchscreen_parse_of_params(struct input_dev *dev, bool multitouch); #else -static inline void touchscreen_parse_of_params(struct input_dev *dev) +static inline void touchscreen_parse_of_params(struct input_dev *dev, + bool multitouch) { } #endif diff --git a/include/linux/irqchip.h b/include/linux/irqchip.h index 14d79131f53d..638887376e58 100644 --- a/include/linux/irqchip.h +++ b/include/linux/irqchip.h @@ -11,6 +11,20 @@ #ifndef _LINUX_IRQCHIP_H #define _LINUX_IRQCHIP_H +#include <linux/of.h> + +/* + * This macro must be used by the different irqchip drivers to declare + * the association between their DT compatible string and their + * initialization function. + * + * @name: name that must be unique accross all IRQCHIP_DECLARE of the + * same file. + * @compstr: compatible string of the irqchip driver + * @fn: initialization function + */ +#define IRQCHIP_DECLARE(name, compat, fn) OF_DECLARE_2(irqchip, name, compat, fn) + #ifdef CONFIG_IRQCHIP void irqchip_init(void); #else diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 624a668e61f1..fcea4e48e21f 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -87,7 +87,12 @@ struct irq_desc { const char *name; } ____cacheline_internodealigned_in_smp; -#ifndef CONFIG_SPARSE_IRQ +#ifdef CONFIG_SPARSE_IRQ +extern void irq_lock_sparse(void); +extern void irq_unlock_sparse(void); +#else +static inline void irq_lock_sparse(void) { } +static inline void irq_unlock_sparse(void) { } extern struct irq_desc irq_desc[NR_IRQS]; #endif diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index e6b2f7db9c0c..123be25ea15a 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -45,6 +45,7 @@ enum kernfs_node_flag { KERNFS_LOCKDEP = 0x0100, KERNFS_SUICIDAL = 0x0400, KERNFS_SUICIDED = 0x0800, + KERNFS_EMPTY_DIR = 0x1000, }; /* @flags for kernfs_create_root() */ @@ -286,6 +287,8 @@ void kernfs_destroy_root(struct kernfs_root *root); struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, void *priv, const void *ns); +struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, + const char *name); struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode, loff_t size, diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 8183d6640ca7..34f25b7bf642 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -189,6 +189,8 @@ struct css_device_id { struct acpi_device_id { __u8 id[ACPI_ID_LEN]; kernel_ulong_t driver_data; + __u32 cls; + __u32 cls_msk; }; #define PNP_ID_LEN 8 diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 32201c269890..b8e72aad919c 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -500,6 +500,7 @@ enum { NFSPROC4_CLNT_SEEK, NFSPROC4_CLNT_ALLOCATE, NFSPROC4_CLNT_DEALLOCATE, + NFSPROC4_CLNT_LAYOUTSTATS, }; /* nfs41 types */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b95f914ce083..f91b5ade30c9 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -219,6 +219,7 @@ struct nfs_inode { #define NFS_INO_COMMIT (7) /* inode is committing unstable writes */ #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ +#define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */ static inline struct nfs_inode *NFS_I(const struct inode *inode) { diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 5e1273d4de14..a2ea1491d3df 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -237,5 +237,6 @@ struct nfs_server { #define NFS_CAP_SEEK (1U << 19) #define NFS_CAP_ALLOCATE (1U << 20) #define NFS_CAP_DEALLOCATE (1U << 21) +#define NFS_CAP_LAYOUTSTATS (1U << 22) #endif diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 3eb072dbce83..f2f650f136ee 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -67,7 +67,6 @@ struct nfs_rw_ops { const fmode_t rw_mode; struct nfs_pgio_header *(*rw_alloc_header)(void); void (*rw_free_header)(struct nfs_pgio_header *); - void (*rw_release)(struct nfs_pgio_header *); int (*rw_done)(struct rpc_task *, struct nfs_pgio_header *, struct inode *); void (*rw_result)(struct rpc_task *, struct nfs_pgio_header *); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 93ab6071bbe9..7bbe50504211 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -316,6 +316,49 @@ struct nfs4_layoutreturn { int rpc_status; }; +#define PNFS_LAYOUTSTATS_MAXSIZE 256 + +struct nfs42_layoutstat_args; +struct nfs42_layoutstat_devinfo; +typedef void (*layoutstats_encode_t)(struct xdr_stream *, + struct nfs42_layoutstat_args *, + struct nfs42_layoutstat_devinfo *); + +/* Per file per deviceid layoutstats */ +struct nfs42_layoutstat_devinfo { + struct nfs4_deviceid dev_id; + __u64 offset; + __u64 length; + __u64 read_count; + __u64 read_bytes; + __u64 write_count; + __u64 write_bytes; + __u32 layout_type; + layoutstats_encode_t layoutstats_encode; + void *layout_private; +}; + +struct nfs42_layoutstat_args { + struct nfs4_sequence_args seq_args; + struct nfs_fh *fh; + struct inode *inode; + nfs4_stateid stateid; + int num_dev; + struct nfs42_layoutstat_devinfo *devinfo; +}; + +struct nfs42_layoutstat_res { + struct nfs4_sequence_res seq_res; + int num_dev; + int rpc_status; +}; + +struct nfs42_layoutstat_data { + struct inode *inode; + struct nfs42_layoutstat_args args; + struct nfs42_layoutstat_res res; +}; + struct stateowner_id { __u64 create_time; __u32 uniquifier; @@ -984,17 +1027,14 @@ struct nfs4_readlink_res { struct nfs4_sequence_res seq_res; }; -#define NFS4_SETCLIENTID_NAMELEN (127) struct nfs4_setclientid { const nfs4_verifier * sc_verifier; - unsigned int sc_name_len; - char sc_name[NFS4_SETCLIENTID_NAMELEN + 1]; u32 sc_prog; unsigned int sc_netid_len; char sc_netid[RPCBIND_MAXNETIDLEN + 1]; unsigned int sc_uaddr_len; char sc_uaddr[RPCBIND_MAXUADDRLEN + 1]; - u32 sc_cb_ident; + struct nfs_client *sc_clnt; struct rpc_cred *sc_cred; }; @@ -1142,12 +1182,9 @@ struct nfs41_state_protection { struct nfs4_op_map allow; }; -#define NFS4_EXCHANGE_ID_LEN (48) struct nfs41_exchange_id_args { struct nfs_client *client; nfs4_verifier *verifier; - unsigned int id_len; - char id[NFS4_EXCHANGE_ID_LEN]; u32 flags; struct nfs41_state_protection state_protect; }; diff --git a/include/linux/ntb.h b/include/linux/ntb.h index 9ac1a62fc6f5..b02f72bb8e32 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -4,15 +4,20 @@ * * GPL LICENSE SUMMARY * - * Copyright(c) 2012 Intel Corporation. All rights reserved. + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * * BSD LICENSE * - * Copyright(c) 2012 Intel Corporation. All rights reserved. + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -40,49 +45,940 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Intel PCIe NTB Linux driver + * PCIe NTB Linux driver * * Contact Information: - * Jon Mason <jon.mason@intel.com> + * Allen Hubbe <Allen.Hubbe@emc.com> */ -struct ntb_transport_qp; +#ifndef _NTB_H_ +#define _NTB_H_ -struct ntb_client { - struct device_driver driver; - int (*probe)(struct pci_dev *pdev); - void (*remove)(struct pci_dev *pdev); +#include <linux/completion.h> +#include <linux/device.h> + +struct ntb_client; +struct ntb_dev; +struct pci_dev; + +/** + * enum ntb_topo - NTB connection topology + * @NTB_TOPO_NONE: Topology is unknown or invalid. + * @NTB_TOPO_PRI: On primary side of local ntb. + * @NTB_TOPO_SEC: On secondary side of remote ntb. + * @NTB_TOPO_B2B_USD: On primary side of local ntb upstream of remote ntb. + * @NTB_TOPO_B2B_DSD: On primary side of local ntb downstream of remote ntb. + */ +enum ntb_topo { + NTB_TOPO_NONE = -1, + NTB_TOPO_PRI, + NTB_TOPO_SEC, + NTB_TOPO_B2B_USD, + NTB_TOPO_B2B_DSD, +}; + +static inline int ntb_topo_is_b2b(enum ntb_topo topo) +{ + switch ((int)topo) { + case NTB_TOPO_B2B_USD: + case NTB_TOPO_B2B_DSD: + return 1; + } + return 0; +} + +static inline char *ntb_topo_string(enum ntb_topo topo) +{ + switch (topo) { + case NTB_TOPO_NONE: return "NTB_TOPO_NONE"; + case NTB_TOPO_PRI: return "NTB_TOPO_PRI"; + case NTB_TOPO_SEC: return "NTB_TOPO_SEC"; + case NTB_TOPO_B2B_USD: return "NTB_TOPO_B2B_USD"; + case NTB_TOPO_B2B_DSD: return "NTB_TOPO_B2B_DSD"; + } + return "NTB_TOPO_INVALID"; +} + +/** + * enum ntb_speed - NTB link training speed + * @NTB_SPEED_AUTO: Request the max supported speed. + * @NTB_SPEED_NONE: Link is not trained to any speed. + * @NTB_SPEED_GEN1: Link is trained to gen1 speed. + * @NTB_SPEED_GEN2: Link is trained to gen2 speed. + * @NTB_SPEED_GEN3: Link is trained to gen3 speed. + */ +enum ntb_speed { + NTB_SPEED_AUTO = -1, + NTB_SPEED_NONE = 0, + NTB_SPEED_GEN1 = 1, + NTB_SPEED_GEN2 = 2, + NTB_SPEED_GEN3 = 3, +}; + +/** + * enum ntb_width - NTB link training width + * @NTB_WIDTH_AUTO: Request the max supported width. + * @NTB_WIDTH_NONE: Link is not trained to any width. + * @NTB_WIDTH_1: Link is trained to 1 lane width. + * @NTB_WIDTH_2: Link is trained to 2 lane width. + * @NTB_WIDTH_4: Link is trained to 4 lane width. + * @NTB_WIDTH_8: Link is trained to 8 lane width. + * @NTB_WIDTH_12: Link is trained to 12 lane width. + * @NTB_WIDTH_16: Link is trained to 16 lane width. + * @NTB_WIDTH_32: Link is trained to 32 lane width. + */ +enum ntb_width { + NTB_WIDTH_AUTO = -1, + NTB_WIDTH_NONE = 0, + NTB_WIDTH_1 = 1, + NTB_WIDTH_2 = 2, + NTB_WIDTH_4 = 4, + NTB_WIDTH_8 = 8, + NTB_WIDTH_12 = 12, + NTB_WIDTH_16 = 16, + NTB_WIDTH_32 = 32, +}; + +/** + * struct ntb_client_ops - ntb client operations + * @probe: Notify client of a new device. + * @remove: Notify client to remove a device. + */ +struct ntb_client_ops { + int (*probe)(struct ntb_client *client, struct ntb_dev *ntb); + void (*remove)(struct ntb_client *client, struct ntb_dev *ntb); +}; + +static inline int ntb_client_ops_is_valid(const struct ntb_client_ops *ops) +{ + /* commented callbacks are not required: */ + return + ops->probe && + ops->remove && + 1; +} + +/** + * struct ntb_ctx_ops - ntb driver context operations + * @link_event: See ntb_link_event(). + * @db_event: See ntb_db_event(). + */ +struct ntb_ctx_ops { + void (*link_event)(void *ctx); + void (*db_event)(void *ctx, int db_vector); +}; + +static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops) +{ + /* commented callbacks are not required: */ + return + /* ops->link_event && */ + /* ops->db_event && */ + 1; +} + +/** + * struct ntb_ctx_ops - ntb device operations + * @mw_count: See ntb_mw_count(). + * @mw_get_range: See ntb_mw_get_range(). + * @mw_set_trans: See ntb_mw_set_trans(). + * @mw_clear_trans: See ntb_mw_clear_trans(). + * @link_is_up: See ntb_link_is_up(). + * @link_enable: See ntb_link_enable(). + * @link_disable: See ntb_link_disable(). + * @db_is_unsafe: See ntb_db_is_unsafe(). + * @db_valid_mask: See ntb_db_valid_mask(). + * @db_vector_count: See ntb_db_vector_count(). + * @db_vector_mask: See ntb_db_vector_mask(). + * @db_read: See ntb_db_read(). + * @db_set: See ntb_db_set(). + * @db_clear: See ntb_db_clear(). + * @db_read_mask: See ntb_db_read_mask(). + * @db_set_mask: See ntb_db_set_mask(). + * @db_clear_mask: See ntb_db_clear_mask(). + * @peer_db_addr: See ntb_peer_db_addr(). + * @peer_db_read: See ntb_peer_db_read(). + * @peer_db_set: See ntb_peer_db_set(). + * @peer_db_clear: See ntb_peer_db_clear(). + * @peer_db_read_mask: See ntb_peer_db_read_mask(). + * @peer_db_set_mask: See ntb_peer_db_set_mask(). + * @peer_db_clear_mask: See ntb_peer_db_clear_mask(). + * @spad_is_unsafe: See ntb_spad_is_unsafe(). + * @spad_count: See ntb_spad_count(). + * @spad_read: See ntb_spad_read(). + * @spad_write: See ntb_spad_write(). + * @peer_spad_addr: See ntb_peer_spad_addr(). + * @peer_spad_read: See ntb_peer_spad_read(). + * @peer_spad_write: See ntb_peer_spad_write(). + */ +struct ntb_dev_ops { + int (*mw_count)(struct ntb_dev *ntb); + int (*mw_get_range)(struct ntb_dev *ntb, int idx, + phys_addr_t *base, resource_size_t *size, + resource_size_t *align, resource_size_t *align_size); + int (*mw_set_trans)(struct ntb_dev *ntb, int idx, + dma_addr_t addr, resource_size_t size); + int (*mw_clear_trans)(struct ntb_dev *ntb, int idx); + + int (*link_is_up)(struct ntb_dev *ntb, + enum ntb_speed *speed, enum ntb_width *width); + int (*link_enable)(struct ntb_dev *ntb, + enum ntb_speed max_speed, enum ntb_width max_width); + int (*link_disable)(struct ntb_dev *ntb); + + int (*db_is_unsafe)(struct ntb_dev *ntb); + u64 (*db_valid_mask)(struct ntb_dev *ntb); + int (*db_vector_count)(struct ntb_dev *ntb); + u64 (*db_vector_mask)(struct ntb_dev *ntb, int db_vector); + + u64 (*db_read)(struct ntb_dev *ntb); + int (*db_set)(struct ntb_dev *ntb, u64 db_bits); + int (*db_clear)(struct ntb_dev *ntb, u64 db_bits); + + u64 (*db_read_mask)(struct ntb_dev *ntb); + int (*db_set_mask)(struct ntb_dev *ntb, u64 db_bits); + int (*db_clear_mask)(struct ntb_dev *ntb, u64 db_bits); + + int (*peer_db_addr)(struct ntb_dev *ntb, + phys_addr_t *db_addr, resource_size_t *db_size); + u64 (*peer_db_read)(struct ntb_dev *ntb); + int (*peer_db_set)(struct ntb_dev *ntb, u64 db_bits); + int (*peer_db_clear)(struct ntb_dev *ntb, u64 db_bits); + + u64 (*peer_db_read_mask)(struct ntb_dev *ntb); + int (*peer_db_set_mask)(struct ntb_dev *ntb, u64 db_bits); + int (*peer_db_clear_mask)(struct ntb_dev *ntb, u64 db_bits); + + int (*spad_is_unsafe)(struct ntb_dev *ntb); + int (*spad_count)(struct ntb_dev *ntb); + + u32 (*spad_read)(struct ntb_dev *ntb, int idx); + int (*spad_write)(struct ntb_dev *ntb, int idx, u32 val); + + int (*peer_spad_addr)(struct ntb_dev *ntb, int idx, + phys_addr_t *spad_addr); + u32 (*peer_spad_read)(struct ntb_dev *ntb, int idx); + int (*peer_spad_write)(struct ntb_dev *ntb, int idx, u32 val); }; -enum { - NTB_LINK_DOWN = 0, - NTB_LINK_UP, +static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) +{ + /* commented callbacks are not required: */ + return + ops->mw_count && + ops->mw_get_range && + ops->mw_set_trans && + /* ops->mw_clear_trans && */ + ops->link_is_up && + ops->link_enable && + ops->link_disable && + /* ops->db_is_unsafe && */ + ops->db_valid_mask && + + /* both set, or both unset */ + (!ops->db_vector_count == !ops->db_vector_mask) && + + ops->db_read && + /* ops->db_set && */ + ops->db_clear && + /* ops->db_read_mask && */ + ops->db_set_mask && + ops->db_clear_mask && + ops->peer_db_addr && + /* ops->peer_db_read && */ + ops->peer_db_set && + /* ops->peer_db_clear && */ + /* ops->peer_db_read_mask && */ + /* ops->peer_db_set_mask && */ + /* ops->peer_db_clear_mask && */ + /* ops->spad_is_unsafe && */ + ops->spad_count && + ops->spad_read && + ops->spad_write && + ops->peer_spad_addr && + /* ops->peer_spad_read && */ + ops->peer_spad_write && + 1; +} + +/** + * struct ntb_client - client interested in ntb devices + * @drv: Linux driver object. + * @ops: See &ntb_client_ops. + */ +struct ntb_client { + struct device_driver drv; + const struct ntb_client_ops ops; }; -int ntb_register_client(struct ntb_client *drvr); -void ntb_unregister_client(struct ntb_client *drvr); -int ntb_register_client_dev(char *device_name); -void ntb_unregister_client_dev(char *device_name); - -struct ntb_queue_handlers { - void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data, - void *data, int len); - void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data, - void *data, int len); - void (*event_handler)(void *data, int status); +#define drv_ntb_client(__drv) container_of((__drv), struct ntb_client, drv) + +/** + * struct ntb_device - ntb device + * @dev: Linux device object. + * @pdev: Pci device entry of the ntb. + * @topo: Detected topology of the ntb. + * @ops: See &ntb_dev_ops. + * @ctx: See &ntb_ctx_ops. + * @ctx_ops: See &ntb_ctx_ops. + */ +struct ntb_dev { + struct device dev; + struct pci_dev *pdev; + enum ntb_topo topo; + const struct ntb_dev_ops *ops; + void *ctx; + const struct ntb_ctx_ops *ctx_ops; + + /* private: */ + + /* synchronize setting, clearing, and calling ctx_ops */ + spinlock_t ctx_lock; + /* block unregister until device is fully released */ + struct completion released; }; -unsigned char ntb_transport_qp_num(struct ntb_transport_qp *qp); -unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp); -struct ntb_transport_qp * -ntb_transport_create_queue(void *data, struct pci_dev *pdev, - const struct ntb_queue_handlers *handlers); -void ntb_transport_free_queue(struct ntb_transport_qp *qp); -int ntb_transport_rx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, - unsigned int len); -int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, - unsigned int len); -void *ntb_transport_rx_remove(struct ntb_transport_qp *qp, unsigned int *len); -void ntb_transport_link_up(struct ntb_transport_qp *qp); -void ntb_transport_link_down(struct ntb_transport_qp *qp); -bool ntb_transport_link_query(struct ntb_transport_qp *qp); +#define dev_ntb(__dev) container_of((__dev), struct ntb_dev, dev) + +/** + * ntb_register_client() - register a client for interest in ntb devices + * @client: Client context. + * + * The client will be added to the list of clients interested in ntb devices. + * The client will be notified of any ntb devices that are not already + * associated with a client, or if ntb devices are registered later. + * + * Return: Zero if the client is registered, otherwise an error number. + */ +#define ntb_register_client(client) \ + __ntb_register_client((client), THIS_MODULE, KBUILD_MODNAME) + +int __ntb_register_client(struct ntb_client *client, struct module *mod, + const char *mod_name); + +/** + * ntb_unregister_client() - unregister a client for interest in ntb devices + * @client: Client context. + * + * The client will be removed from the list of clients interested in ntb + * devices. If any ntb devices are associated with the client, the client will + * be notified to remove those devices. + */ +void ntb_unregister_client(struct ntb_client *client); + +#define module_ntb_client(__ntb_client) \ + module_driver(__ntb_client, ntb_register_client, \ + ntb_unregister_client) + +/** + * ntb_register_device() - register a ntb device + * @ntb: NTB device context. + * + * The device will be added to the list of ntb devices. If any clients are + * interested in ntb devices, each client will be notified of the ntb device, + * until at most one client accepts the device. + * + * Return: Zero if the device is registered, otherwise an error number. + */ +int ntb_register_device(struct ntb_dev *ntb); + +/** + * ntb_register_device() - unregister a ntb device + * @ntb: NTB device context. + * + * The device will be removed from the list of ntb devices. If the ntb device + * is associated with a client, the client will be notified to remove the + * device. + */ +void ntb_unregister_device(struct ntb_dev *ntb); + +/** + * ntb_set_ctx() - associate a driver context with an ntb device + * @ntb: NTB device context. + * @ctx: Driver context. + * @ctx_ops: Driver context operations. + * + * Associate a driver context and operations with a ntb device. The context is + * provided by the client driver, and the driver may associate a different + * context with each ntb device. + * + * Return: Zero if the context is associated, otherwise an error number. + */ +int ntb_set_ctx(struct ntb_dev *ntb, void *ctx, + const struct ntb_ctx_ops *ctx_ops); + +/** + * ntb_clear_ctx() - disassociate any driver context from an ntb device + * @ntb: NTB device context. + * + * Clear any association that may exist between a driver context and the ntb + * device. + */ +void ntb_clear_ctx(struct ntb_dev *ntb); + +/** + * ntb_link_event() - notify driver context of a change in link status + * @ntb: NTB device context. + * + * Notify the driver context that the link status may have changed. The driver + * should call ntb_link_is_up() to get the current status. + */ +void ntb_link_event(struct ntb_dev *ntb); + +/** + * ntb_db_event() - notify driver context of a doorbell event + * @ntb: NTB device context. + * @vector: Interrupt vector number. + * + * Notify the driver context of a doorbell event. If hardware supports + * multiple interrupt vectors for doorbells, the vector number indicates which + * vector received the interrupt. The vector number is relative to the first + * vector used for doorbells, starting at zero, and must be less than + ** ntb_db_vector_count(). The driver may call ntb_db_read() to check which + * doorbell bits need service, and ntb_db_vector_mask() to determine which of + * those bits are associated with the vector number. + */ +void ntb_db_event(struct ntb_dev *ntb, int vector); + +/** + * ntb_mw_count() - get the number of memory windows + * @ntb: NTB device context. + * + * Hardware and topology may support a different number of memory windows. + * + * Return: the number of memory windows. + */ +static inline int ntb_mw_count(struct ntb_dev *ntb) +{ + return ntb->ops->mw_count(ntb); +} + +/** + * ntb_mw_get_range() - get the range of a memory window + * @ntb: NTB device context. + * @idx: Memory window number. + * @base: OUT - the base address for mapping the memory window + * @size: OUT - the size for mapping the memory window + * @align: OUT - the base alignment for translating the memory window + * @align_size: OUT - the size alignment for translating the memory window + * + * Get the range of a memory window. NULL may be given for any output + * parameter if the value is not needed. The base and size may be used for + * mapping the memory window, to access the peer memory. The alignment and + * size may be used for translating the memory window, for the peer to access + * memory on the local system. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_mw_get_range(struct ntb_dev *ntb, int idx, + phys_addr_t *base, resource_size_t *size, + resource_size_t *align, resource_size_t *align_size) +{ + return ntb->ops->mw_get_range(ntb, idx, base, size, + align, align_size); +} + +/** + * ntb_mw_set_trans() - set the translation of a memory window + * @ntb: NTB device context. + * @idx: Memory window number. + * @addr: The dma address local memory to expose to the peer. + * @size: The size of the local memory to expose to the peer. + * + * Set the translation of a memory window. The peer may access local memory + * through the window starting at the address, up to the size. The address + * must be aligned to the alignment specified by ntb_mw_get_range(). The size + * must be aligned to the size alignment specified by ntb_mw_get_range(). + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_mw_set_trans(struct ntb_dev *ntb, int idx, + dma_addr_t addr, resource_size_t size) +{ + return ntb->ops->mw_set_trans(ntb, idx, addr, size); +} + +/** + * ntb_mw_clear_trans() - clear the translation of a memory window + * @ntb: NTB device context. + * @idx: Memory window number. + * + * Clear the translation of a memory window. The peer may no longer access + * local memory through the window. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_mw_clear_trans(struct ntb_dev *ntb, int idx) +{ + if (!ntb->ops->mw_clear_trans) + return ntb->ops->mw_set_trans(ntb, idx, 0, 0); + + return ntb->ops->mw_clear_trans(ntb, idx); +} + +/** + * ntb_link_is_up() - get the current ntb link state + * @ntb: NTB device context. + * @speed: OUT - The link speed expressed as PCIe generation number. + * @width: OUT - The link width expressed as the number of PCIe lanes. + * + * Set the translation of a memory window. The peer may access local memory + * through the window starting at the address, up to the size. The address + * must be aligned to the alignment specified by ntb_mw_get_range(). The size + * must be aligned to the size alignment specified by ntb_mw_get_range(). + * + * Return: One if the link is up, zero if the link is down, otherwise a + * negative value indicating the error number. + */ +static inline int ntb_link_is_up(struct ntb_dev *ntb, + enum ntb_speed *speed, enum ntb_width *width) +{ + return ntb->ops->link_is_up(ntb, speed, width); +} + +/** + * ntb_link_enable() - enable the link on the secondary side of the ntb + * @ntb: NTB device context. + * @max_speed: The maximum link speed expressed as PCIe generation number. + * @max_width: The maximum link width expressed as the number of PCIe lanes. + * + * Enable the link on the secondary side of the ntb. This can only be done + * from the primary side of the ntb in primary or b2b topology. The ntb device + * should train the link to its maximum speed and width, or the requested speed + * and width, whichever is smaller, if supported. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_link_enable(struct ntb_dev *ntb, + enum ntb_speed max_speed, + enum ntb_width max_width) +{ + return ntb->ops->link_enable(ntb, max_speed, max_width); +} + +/** + * ntb_link_disable() - disable the link on the secondary side of the ntb + * @ntb: NTB device context. + * + * Disable the link on the secondary side of the ntb. This can only be + * done from the primary side of the ntb in primary or b2b topology. The ntb + * device should disable the link. Returning from this call must indicate that + * a barrier has passed, though with no more writes may pass in either + * direction across the link, except if this call returns an error number. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_link_disable(struct ntb_dev *ntb) +{ + return ntb->ops->link_disable(ntb); +} + +/** + * ntb_db_is_unsafe() - check if it is safe to use hardware doorbell + * @ntb: NTB device context. + * + * It is possible for some ntb hardware to be affected by errata. Hardware + * drivers can advise clients to avoid using doorbells. Clients may ignore + * this advice, though caution is recommended. + * + * Return: Zero if it is safe to use doorbells, or One if it is not safe. + */ +static inline int ntb_db_is_unsafe(struct ntb_dev *ntb) +{ + if (!ntb->ops->db_is_unsafe) + return 0; + + return ntb->ops->db_is_unsafe(ntb); +} + +/** + * ntb_db_valid_mask() - get a mask of doorbell bits supported by the ntb + * @ntb: NTB device context. + * + * Hardware may support different number or arrangement of doorbell bits. + * + * Return: A mask of doorbell bits supported by the ntb. + */ +static inline u64 ntb_db_valid_mask(struct ntb_dev *ntb) +{ + return ntb->ops->db_valid_mask(ntb); +} + +/** + * ntb_db_vector_count() - get the number of doorbell interrupt vectors + * @ntb: NTB device context. + * + * Hardware may support different number of interrupt vectors. + * + * Return: The number of doorbell interrupt vectors. + */ +static inline int ntb_db_vector_count(struct ntb_dev *ntb) +{ + if (!ntb->ops->db_vector_count) + return 1; + + return ntb->ops->db_vector_count(ntb); +} + +/** + * ntb_db_vector_mask() - get a mask of doorbell bits serviced by a vector + * @ntb: NTB device context. + * @vector: Doorbell vector number. + * + * Each interrupt vector may have a different number or arrangement of bits. + * + * Return: A mask of doorbell bits serviced by a vector. + */ +static inline u64 ntb_db_vector_mask(struct ntb_dev *ntb, int vector) +{ + if (!ntb->ops->db_vector_mask) + return ntb_db_valid_mask(ntb); + + return ntb->ops->db_vector_mask(ntb, vector); +} + +/** + * ntb_db_read() - read the local doorbell register + * @ntb: NTB device context. + * + * Read the local doorbell register, and return the bits that are set. + * + * Return: The bits currently set in the local doorbell register. + */ +static inline u64 ntb_db_read(struct ntb_dev *ntb) +{ + return ntb->ops->db_read(ntb); +} + +/** + * ntb_db_set() - set bits in the local doorbell register + * @ntb: NTB device context. + * @db_bits: Doorbell bits to set. + * + * Set bits in the local doorbell register, which may generate a local doorbell + * interrupt. Bits that were already set must remain set. + * + * This is unusual, and hardware may not support it. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_db_set(struct ntb_dev *ntb, u64 db_bits) +{ + if (!ntb->ops->db_set) + return -EINVAL; + + return ntb->ops->db_set(ntb, db_bits); +} + +/** + * ntb_db_clear() - clear bits in the local doorbell register + * @ntb: NTB device context. + * @db_bits: Doorbell bits to clear. + * + * Clear bits in the local doorbell register, arming the bits for the next + * doorbell. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_db_clear(struct ntb_dev *ntb, u64 db_bits) +{ + return ntb->ops->db_clear(ntb, db_bits); +} + +/** + * ntb_db_read_mask() - read the local doorbell mask + * @ntb: NTB device context. + * + * Read the local doorbell mask register, and return the bits that are set. + * + * This is unusual, though hardware is likely to support it. + * + * Return: The bits currently set in the local doorbell mask register. + */ +static inline u64 ntb_db_read_mask(struct ntb_dev *ntb) +{ + if (!ntb->ops->db_read_mask) + return 0; + + return ntb->ops->db_read_mask(ntb); +} + +/** + * ntb_db_set_mask() - set bits in the local doorbell mask + * @ntb: NTB device context. + * @db_bits: Doorbell mask bits to set. + * + * Set bits in the local doorbell mask register, preventing doorbell interrupts + * from being generated for those doorbell bits. Bits that were already set + * must remain set. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits) +{ + return ntb->ops->db_set_mask(ntb, db_bits); +} + +/** + * ntb_db_clear_mask() - clear bits in the local doorbell mask + * @ntb: NTB device context. + * @db_bits: Doorbell bits to clear. + * + * Clear bits in the local doorbell mask register, allowing doorbell interrupts + * from being generated for those doorbell bits. If a doorbell bit is already + * set at the time the mask is cleared, and the corresponding mask bit is + * changed from set to clear, then the ntb driver must ensure that + * ntb_db_event() is called. If the hardware does not generate the interrupt + * on clearing the mask bit, then the driver must call ntb_db_event() anyway. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits) +{ + return ntb->ops->db_clear_mask(ntb, db_bits); +} + +/** + * ntb_peer_db_addr() - address and size of the peer doorbell register + * @ntb: NTB device context. + * @db_addr: OUT - The address of the peer doorbell register. + * @db_size: OUT - The number of bytes to write the peer doorbell register. + * + * Return the address of the peer doorbell register. This may be used, for + * example, by drivers that offload memory copy operations to a dma engine. + * The drivers may wish to ring the peer doorbell at the completion of memory + * copy operations. For efficiency, and to simplify ordering of operations + * between the dma memory copies and the ringing doorbell, the driver may + * append one additional dma memory copy with the doorbell register as the + * destination, after the memory copy operations. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_db_addr(struct ntb_dev *ntb, + phys_addr_t *db_addr, + resource_size_t *db_size) +{ + return ntb->ops->peer_db_addr(ntb, db_addr, db_size); +} + +/** + * ntb_peer_db_read() - read the peer doorbell register + * @ntb: NTB device context. + * + * Read the peer doorbell register, and return the bits that are set. + * + * This is unusual, and hardware may not support it. + * + * Return: The bits currently set in the peer doorbell register. + */ +static inline u64 ntb_peer_db_read(struct ntb_dev *ntb) +{ + if (!ntb->ops->peer_db_read) + return 0; + + return ntb->ops->peer_db_read(ntb); +} + +/** + * ntb_peer_db_set() - set bits in the peer doorbell register + * @ntb: NTB device context. + * @db_bits: Doorbell bits to set. + * + * Set bits in the peer doorbell register, which may generate a peer doorbell + * interrupt. Bits that were already set must remain set. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits) +{ + return ntb->ops->peer_db_set(ntb, db_bits); +} + +/** + * ntb_peer_db_clear() - clear bits in the local doorbell register + * @ntb: NTB device context. + * @db_bits: Doorbell bits to clear. + * + * Clear bits in the peer doorbell register, arming the bits for the next + * doorbell. + * + * This is unusual, and hardware may not support it. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_db_clear(struct ntb_dev *ntb, u64 db_bits) +{ + if (!ntb->ops->db_clear) + return -EINVAL; + + return ntb->ops->peer_db_clear(ntb, db_bits); +} + +/** + * ntb_peer_db_read_mask() - read the peer doorbell mask + * @ntb: NTB device context. + * + * Read the peer doorbell mask register, and return the bits that are set. + * + * This is unusual, and hardware may not support it. + * + * Return: The bits currently set in the peer doorbell mask register. + */ +static inline u64 ntb_peer_db_read_mask(struct ntb_dev *ntb) +{ + if (!ntb->ops->db_read_mask) + return 0; + + return ntb->ops->peer_db_read_mask(ntb); +} + +/** + * ntb_peer_db_set_mask() - set bits in the peer doorbell mask + * @ntb: NTB device context. + * @db_bits: Doorbell mask bits to set. + * + * Set bits in the peer doorbell mask register, preventing doorbell interrupts + * from being generated for those doorbell bits. Bits that were already set + * must remain set. + * + * This is unusual, and hardware may not support it. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_db_set_mask(struct ntb_dev *ntb, u64 db_bits) +{ + if (!ntb->ops->db_set_mask) + return -EINVAL; + + return ntb->ops->peer_db_set_mask(ntb, db_bits); +} + +/** + * ntb_peer_db_clear_mask() - clear bits in the peer doorbell mask + * @ntb: NTB device context. + * @db_bits: Doorbell bits to clear. + * + * Clear bits in the peer doorbell mask register, allowing doorbell interrupts + * from being generated for those doorbell bits. If the hardware does not + * generate the interrupt on clearing the mask bit, then the driver should not + * implement this function! + * + * This is unusual, and hardware may not support it. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_db_clear_mask(struct ntb_dev *ntb, u64 db_bits) +{ + if (!ntb->ops->db_clear_mask) + return -EINVAL; + + return ntb->ops->peer_db_clear_mask(ntb, db_bits); +} + +/** + * ntb_spad_is_unsafe() - check if it is safe to use the hardware scratchpads + * @ntb: NTB device context. + * + * It is possible for some ntb hardware to be affected by errata. Hardware + * drivers can advise clients to avoid using scratchpads. Clients may ignore + * this advice, though caution is recommended. + * + * Return: Zero if it is safe to use scratchpads, or One if it is not safe. + */ +static inline int ntb_spad_is_unsafe(struct ntb_dev *ntb) +{ + if (!ntb->ops->spad_is_unsafe) + return 0; + + return ntb->ops->spad_is_unsafe(ntb); +} + +/** + * ntb_mw_count() - get the number of scratchpads + * @ntb: NTB device context. + * + * Hardware and topology may support a different number of scratchpads. + * + * Return: the number of scratchpads. + */ +static inline int ntb_spad_count(struct ntb_dev *ntb) +{ + return ntb->ops->spad_count(ntb); +} + +/** + * ntb_spad_read() - read the local scratchpad register + * @ntb: NTB device context. + * @idx: Scratchpad index. + * + * Read the local scratchpad register, and return the value. + * + * Return: The value of the local scratchpad register. + */ +static inline u32 ntb_spad_read(struct ntb_dev *ntb, int idx) +{ + return ntb->ops->spad_read(ntb, idx); +} + +/** + * ntb_spad_write() - write the local scratchpad register + * @ntb: NTB device context. + * @idx: Scratchpad index. + * @val: Scratchpad value. + * + * Write the value to the local scratchpad register. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_spad_write(struct ntb_dev *ntb, int idx, u32 val) +{ + return ntb->ops->spad_write(ntb, idx, val); +} + +/** + * ntb_peer_spad_addr() - address of the peer scratchpad register + * @ntb: NTB device context. + * @idx: Scratchpad index. + * @spad_addr: OUT - The address of the peer scratchpad register. + * + * Return the address of the peer doorbell register. This may be used, for + * example, by drivers that offload memory copy operations to a dma engine. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_spad_addr(struct ntb_dev *ntb, int idx, + phys_addr_t *spad_addr) +{ + return ntb->ops->peer_spad_addr(ntb, idx, spad_addr); +} + +/** + * ntb_peer_spad_read() - read the peer scratchpad register + * @ntb: NTB device context. + * @idx: Scratchpad index. + * + * Read the peer scratchpad register, and return the value. + * + * Return: The value of the local scratchpad register. + */ +static inline u32 ntb_peer_spad_read(struct ntb_dev *ntb, int idx) +{ + return ntb->ops->peer_spad_read(ntb, idx); +} + +/** + * ntb_peer_spad_write() - write the peer scratchpad register + * @ntb: NTB device context. + * @idx: Scratchpad index. + * @val: Scratchpad value. + * + * Write the value to the peer scratchpad register. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_spad_write(struct ntb_dev *ntb, int idx, u32 val) +{ + return ntb->ops->peer_spad_write(ntb, idx, val); +} + +#endif diff --git a/include/linux/ntb_transport.h b/include/linux/ntb_transport.h new file mode 100644 index 000000000000..2862861366a5 --- /dev/null +++ b/include/linux/ntb_transport.h @@ -0,0 +1,85 @@ +/* + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2012 Intel Corporation. All rights reserved. + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * BSD LICENSE + * + * Copyright(c) 2012 Intel Corporation. All rights reserved. + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copy + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * PCIe NTB Transport Linux driver + * + * Contact Information: + * Jon Mason <jon.mason@intel.com> + */ + +struct ntb_transport_qp; + +struct ntb_transport_client { + struct device_driver driver; + int (*probe)(struct device *client_dev); + void (*remove)(struct device *client_dev); +}; + +int ntb_transport_register_client(struct ntb_transport_client *drvr); +void ntb_transport_unregister_client(struct ntb_transport_client *drvr); +int ntb_transport_register_client_dev(char *device_name); +void ntb_transport_unregister_client_dev(char *device_name); + +struct ntb_queue_handlers { + void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data, + void *data, int len); + void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data, + void *data, int len); + void (*event_handler)(void *data, int status); +}; + +unsigned char ntb_transport_qp_num(struct ntb_transport_qp *qp); +unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp); +struct ntb_transport_qp * +ntb_transport_create_queue(void *data, struct device *client_dev, + const struct ntb_queue_handlers *handlers); +void ntb_transport_free_queue(struct ntb_transport_qp *qp); +int ntb_transport_rx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, + unsigned int len); +int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, + unsigned int len); +void *ntb_transport_rx_remove(struct ntb_transport_qp *qp, unsigned int *len); +void ntb_transport_link_up(struct ntb_transport_qp *qp); +void ntb_transport_link_down(struct ntb_transport_qp *qp); +bool ntb_transport_link_query(struct ntb_transport_qp *qp); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index fb0814ca65c7..a6c78e00ea96 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -671,4 +671,10 @@ static inline int add_to_page_cache(struct page *page, return error; } +static inline unsigned long dir_pages(struct inode *inode) +{ + return (unsigned long)(inode->i_size + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; +} + #endif /* _LINUX_PAGEMAP_H */ diff --git a/include/linux/platform_data/wkup_m3.h b/include/linux/platform_data/wkup_m3.h new file mode 100644 index 000000000000..3f1d77effd71 --- /dev/null +++ b/include/linux/platform_data/wkup_m3.h @@ -0,0 +1,30 @@ +/* + * TI Wakeup M3 remote processor platform data + * + * Copyright (C) 2014-2015 Texas Instruments, Inc. + * + * Dave Gerlach <d-gerlach@ti.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _LINUX_PLATFORM_DATA_WKUP_M3_H +#define _LINUX_PLATFORM_DATA_WKUP_M3_H + +struct platform_device; + +struct wkup_m3_platform_data { + const char *reset_name; + + int (*assert_reset)(struct platform_device *pdev, const char *name); + int (*deassert_reset)(struct platform_device *pdev, const char *name); +}; + +#endif /* _LINUX_PLATFORM_DATA_WKUP_M3_H */ diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 0f1534acaf60..84991f185173 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -293,6 +293,8 @@ struct preempt_notifier { struct preempt_ops *ops; }; +void preempt_notifier_inc(void); +void preempt_notifier_dec(void); void preempt_notifier_register(struct preempt_notifier *notifier); void preempt_notifier_unregister(struct preempt_notifier *notifier); diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 78b8a9b9d40a..9c4e1384f636 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -36,11 +36,11 @@ #define REMOTEPROC_H #include <linux/types.h> -#include <linux/klist.h> #include <linux/mutex.h> #include <linux/virtio.h> #include <linux/completion.h> #include <linux/idr.h> +#include <linux/of.h> /** * struct resource_table - firmware resource table header @@ -330,11 +330,13 @@ struct rproc; * @start: power on the device and boot it * @stop: power off the device * @kick: kick a virtqueue (virtqueue id given as a parameter) + * @da_to_va: optional platform hook to perform address translations */ struct rproc_ops { int (*start)(struct rproc *rproc); int (*stop)(struct rproc *rproc); void (*kick)(struct rproc *rproc, int vqid); + void * (*da_to_va)(struct rproc *rproc, u64 da, int len); }; /** @@ -375,7 +377,7 @@ enum rproc_crash_type { /** * struct rproc - represents a physical remote processor device - * @node: klist node of this rproc object + * @node: list node of this rproc object * @domain: iommu domain * @name: human readable name of the rproc * @firmware: name of firmware file to be loaded @@ -407,7 +409,7 @@ enum rproc_crash_type { * @has_iommu: flag to indicate if remote processor is behind an MMU */ struct rproc { - struct klist_node node; + struct list_head node; struct iommu_domain *domain; const char *name; const char *firmware; @@ -481,6 +483,7 @@ struct rproc_vdev { u32 rsc_offset; }; +struct rproc *rproc_get_by_phandle(phandle phandle); struct rproc *rproc_alloc(struct device *dev, const char *name, const struct rproc_ops *ops, const char *firmware, int len); diff --git a/include/linux/rtc/sirfsoc_rtciobrg.h b/include/linux/rtc/sirfsoc_rtciobrg.h index 2c92e1c8e055..aefd997262e4 100644 --- a/include/linux/rtc/sirfsoc_rtciobrg.h +++ b/include/linux/rtc/sirfsoc_rtciobrg.h @@ -9,10 +9,14 @@ #ifndef _SIRFSOC_RTC_IOBRG_H_ #define _SIRFSOC_RTC_IOBRG_H_ +struct regmap_config; + extern void sirfsoc_rtc_iobrg_besyncing(void); extern u32 sirfsoc_rtc_iobrg_readl(u32 addr); extern void sirfsoc_rtc_iobrg_writel(u32 val, u32 addr); +struct regmap *devm_regmap_init_iobg(struct device *dev, + const struct regmap_config *config); #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 8aa4a251742f..ae21f1591615 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -192,8 +192,6 @@ struct task_group; #ifdef CONFIG_SCHED_DEBUG extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); extern void proc_sched_set_task(struct task_struct *p); -extern void -print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); #endif /* @@ -838,7 +836,7 @@ extern struct user_struct root_user; struct backing_dev_info; struct reclaim_state; -#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) +#ifdef CONFIG_SCHED_INFO struct sched_info { /* cumulative counters */ unsigned long pcount; /* # of times run on this cpu */ @@ -848,7 +846,7 @@ struct sched_info { unsigned long long last_arrival,/* when we last ran on a cpu */ last_queued; /* when we were last queued to run */ }; -#endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */ +#endif /* CONFIG_SCHED_INFO */ #ifdef CONFIG_TASK_DELAY_ACCT struct task_delay_info { @@ -1397,7 +1395,7 @@ struct task_struct { int rcu_tasks_idle_cpu; #endif /* #ifdef CONFIG_TASKS_RCU */ -#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) +#ifdef CONFIG_SCHED_INFO struct sched_info sched_info; #endif diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index afbb1fd77c77..912a7c482649 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -123,6 +123,7 @@ __printf(2, 3) int seq_printf(struct seq_file *, const char *, ...); __printf(2, 0) int seq_vprintf(struct seq_file *, const char *, va_list args); int seq_path(struct seq_file *, const struct path *, const char *); +int seq_file_path(struct seq_file *, struct file *, const char *); int seq_dentry(struct seq_file *, struct dentry *, const char *); int seq_path_root(struct seq_file *m, const struct path *path, const struct path *root, const char *esc); diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index 2ca67b55e0fe..8df43c9f11dc 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -37,7 +37,6 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied); void xprt_free_bc_request(struct rpc_rqst *req); int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs); -int bc_send(struct rpc_rqst *req); /* * Determine if a shared backchannel is in use diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 598ba80ec30c..131032f15cc1 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -56,6 +56,7 @@ struct rpc_clnt { struct rpc_rtt * cl_rtt; /* RTO estimator data */ const struct rpc_timeout *cl_timeout; /* Timeout strategy */ + atomic_t cl_swapper; /* swapfile count */ int cl_nodelen; /* nodename length */ char cl_nodename[UNX_MAXNODENAME+1]; struct rpc_pipe_dir_head cl_pipedir_objects; diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 5f1e6bd4c316..d703f0ef37d8 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -205,8 +205,7 @@ struct rpc_wait_queue { */ struct rpc_task *rpc_new_task(const struct rpc_task_setup *); struct rpc_task *rpc_run_task(const struct rpc_task_setup *); -struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, - const struct rpc_call_ops *ops); +struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req); void rpc_put_task(struct rpc_task *); void rpc_put_task_async(struct rpc_task *); void rpc_exit_task(struct rpc_task *); @@ -269,4 +268,20 @@ static inline void rpc_assign_waitqueue_name(struct rpc_wait_queue *q, } #endif +#if IS_ENABLED(CONFIG_SUNRPC_SWAP) +int rpc_clnt_swap_activate(struct rpc_clnt *clnt); +void rpc_clnt_swap_deactivate(struct rpc_clnt *clnt); +#else +static inline int +rpc_clnt_swap_activate(struct rpc_clnt *clnt) +{ + return -EINVAL; +} + +static inline void +rpc_clnt_swap_deactivate(struct rpc_clnt *clnt) +{ +} +#endif /* CONFIG_SUNRPC_SWAP */ + #endif /* _LINUX_SUNRPC_SCHED_H_ */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 8b93ef53df3c..0fb9acbb4780 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -133,6 +133,9 @@ struct rpc_xprt_ops { void (*close)(struct rpc_xprt *xprt); void (*destroy)(struct rpc_xprt *xprt); void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq); + int (*enable_swap)(struct rpc_xprt *xprt); + void (*disable_swap)(struct rpc_xprt *xprt); + void (*inject_disconnect)(struct rpc_xprt *xprt); }; /* @@ -180,7 +183,7 @@ struct rpc_xprt { atomic_t num_reqs; /* total slots */ unsigned long state; /* transport state */ unsigned char resvport : 1; /* use a reserved port */ - unsigned int swapper; /* we're swapping over this + atomic_t swapper; /* we're swapping over this transport */ unsigned int bind_index; /* bind function index */ @@ -212,7 +215,8 @@ struct rpc_xprt { #if defined(CONFIG_SUNRPC_BACKCHANNEL) struct svc_serv *bc_serv; /* The RPC service which will */ /* process the callback */ - unsigned int bc_alloc_count; /* Total number of preallocs */ + int bc_alloc_count; /* Total number of preallocs */ + atomic_t bc_free_slots; spinlock_t bc_pa_lock; /* Protects the preallocated * items */ struct list_head bc_pa_list; /* List of preallocated @@ -241,6 +245,7 @@ struct rpc_xprt { const char *address_strings[RPC_DISPLAY_MAX]; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) struct dentry *debugfs; /* debugfs directory */ + atomic_t inject_disconnect; #endif }; @@ -327,6 +332,18 @@ static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 * return p + xprt->tsh_size; } +static inline int +xprt_enable_swap(struct rpc_xprt *xprt) +{ + return xprt->ops->enable_swap(xprt); +} + +static inline void +xprt_disable_swap(struct rpc_xprt *xprt) +{ + xprt->ops->disable_swap(xprt); +} + /* * Transport switch helper functions */ @@ -345,7 +362,6 @@ void xprt_release_rqst_cong(struct rpc_task *task); void xprt_disconnect_done(struct rpc_xprt *xprt); void xprt_force_disconnect(struct rpc_xprt *xprt); void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); -int xs_swapper(struct rpc_xprt *xprt, int enable); bool xprt_lock_connect(struct rpc_xprt *, struct rpc_task *, void *); void xprt_unlock_connect(struct rpc_xprt *, void *); @@ -431,6 +447,23 @@ static inline int xprt_test_and_set_binding(struct rpc_xprt *xprt) return test_and_set_bit(XPRT_BINDING, &xprt->state); } +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) +extern unsigned int rpc_inject_disconnect; +static inline void xprt_inject_disconnect(struct rpc_xprt *xprt) +{ + if (!rpc_inject_disconnect) + return; + if (atomic_dec_return(&xprt->inject_disconnect)) + return; + atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect); + xprt->ops->inject_disconnect(xprt); +} +#else +static inline void xprt_inject_disconnect(struct rpc_xprt *xprt) +{ +} +#endif + #endif /* __KERNEL__*/ #endif /* _LINUX_SUNRPC_XPRT_H */ diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index c984c85981ea..b17613052cc3 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h @@ -56,7 +56,8 @@ #define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */ -/* memory registration strategies */ +/* Memory registration strategies, by number. + * This is part of a kernel / user space API. Do not remove. */ enum rpcrdma_memreg { RPCRDMA_BOUNCEBUFFERS = 0, RPCRDMA_REGISTER, diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 795d5fea5697..fa7bc29925c9 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -188,6 +188,9 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, void unregister_sysctl_table(struct ctl_table_header * table); extern int sysctl_init(void); + +extern struct ctl_table sysctl_mount_point[]; + #else /* CONFIG_SYSCTL */ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table) { diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 99382c0df17e..9f65758311a4 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -210,6 +210,10 @@ int __must_check sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, int __must_check sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, const void *new_ns); +int __must_check sysfs_create_mount_point(struct kobject *parent_kobj, + const char *name); +void sysfs_remove_mount_point(struct kobject *parent_kobj, + const char *name); int __must_check sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, @@ -298,6 +302,17 @@ static inline int sysfs_move_dir_ns(struct kobject *kobj, return 0; } +static inline int sysfs_create_mount_point(struct kobject *parent_kobj, + const char *name) +{ + return 0; +} + +static inline void sysfs_remove_mount_point(struct kobject *parent_kobj, + const char *name) +{ +} + static inline int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, const void *ns) diff --git a/include/linux/tick.h b/include/linux/tick.h index 3741ba1a652c..edbfc9a5293e 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -67,10 +67,13 @@ extern void tick_broadcast_control(enum tick_broadcast_mode mode); static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { } #endif /* BROADCAST */ -#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) +#ifdef CONFIG_GENERIC_CLOCKEVENTS extern int tick_broadcast_oneshot_control(enum tick_broadcast_state state); #else -static inline int tick_broadcast_oneshot_control(enum tick_broadcast_state state) { return 0; } +static inline int tick_broadcast_oneshot_control(enum tick_broadcast_state state) +{ + return 0; +} #endif static inline void tick_broadcast_enable(void) diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 3aa72e648650..6e191e4e6ab6 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -145,7 +145,6 @@ static inline void getboottime(struct timespec *ts) } #endif -#define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts) #define ktime_get_real_ts64(ts) getnstimeofday64(ts) /* diff --git a/include/linux/virtio_byteorder.h b/include/linux/virtio_byteorder.h index 51865d05b267..ce63a2c3a612 100644 --- a/include/linux/virtio_byteorder.h +++ b/include/linux/virtio_byteorder.h @@ -3,17 +3,21 @@ #include <linux/types.h> #include <uapi/linux/virtio_types.h> -/* - * Low-level memory accessors for handling virtio in modern little endian and in - * compatibility native endian format. - */ +static inline bool virtio_legacy_is_little_endian(void) +{ +#ifdef __LITTLE_ENDIAN + return true; +#else + return false; +#endif +} static inline u16 __virtio16_to_cpu(bool little_endian, __virtio16 val) { if (little_endian) return le16_to_cpu((__force __le16)val); else - return (__force u16)val; + return be16_to_cpu((__force __be16)val); } static inline __virtio16 __cpu_to_virtio16(bool little_endian, u16 val) @@ -21,7 +25,7 @@ static inline __virtio16 __cpu_to_virtio16(bool little_endian, u16 val) if (little_endian) return (__force __virtio16)cpu_to_le16(val); else - return (__force __virtio16)val; + return (__force __virtio16)cpu_to_be16(val); } static inline u32 __virtio32_to_cpu(bool little_endian, __virtio32 val) @@ -29,7 +33,7 @@ static inline u32 __virtio32_to_cpu(bool little_endian, __virtio32 val) if (little_endian) return le32_to_cpu((__force __le32)val); else - return (__force u32)val; + return be32_to_cpu((__force __be32)val); } static inline __virtio32 __cpu_to_virtio32(bool little_endian, u32 val) @@ -37,7 +41,7 @@ static inline __virtio32 __cpu_to_virtio32(bool little_endian, u32 val) if (little_endian) return (__force __virtio32)cpu_to_le32(val); else - return (__force __virtio32)val; + return (__force __virtio32)cpu_to_be32(val); } static inline u64 __virtio64_to_cpu(bool little_endian, __virtio64 val) @@ -45,7 +49,7 @@ static inline u64 __virtio64_to_cpu(bool little_endian, __virtio64 val) if (little_endian) return le64_to_cpu((__force __le64)val); else - return (__force u64)val; + return be64_to_cpu((__force __be64)val); } static inline __virtio64 __cpu_to_virtio64(bool little_endian, u64 val) @@ -53,7 +57,7 @@ static inline __virtio64 __cpu_to_virtio64(bool little_endian, u64 val) if (little_endian) return (__force __virtio64)cpu_to_le64(val); else - return (__force __virtio64)val; + return (__force __virtio64)cpu_to_be64(val); } #endif /* _LINUX_VIRTIO_BYTEORDER */ diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 1e306f727edc..e5ce8ab0b8b0 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -205,35 +205,41 @@ int virtqueue_set_affinity(struct virtqueue *vq, int cpu) return 0; } +static inline bool virtio_is_little_endian(struct virtio_device *vdev) +{ + return virtio_has_feature(vdev, VIRTIO_F_VERSION_1) || + virtio_legacy_is_little_endian(); +} + /* Memory accessors */ static inline u16 virtio16_to_cpu(struct virtio_device *vdev, __virtio16 val) { - return __virtio16_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __virtio16_to_cpu(virtio_is_little_endian(vdev), val); } static inline __virtio16 cpu_to_virtio16(struct virtio_device *vdev, u16 val) { - return __cpu_to_virtio16(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __cpu_to_virtio16(virtio_is_little_endian(vdev), val); } static inline u32 virtio32_to_cpu(struct virtio_device *vdev, __virtio32 val) { - return __virtio32_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __virtio32_to_cpu(virtio_is_little_endian(vdev), val); } static inline __virtio32 cpu_to_virtio32(struct virtio_device *vdev, u32 val) { - return __cpu_to_virtio32(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __cpu_to_virtio32(virtio_is_little_endian(vdev), val); } static inline u64 virtio64_to_cpu(struct virtio_device *vdev, __virtio64 val) { - return __virtio64_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __virtio64_to_cpu(virtio_is_little_endian(vdev), val); } static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val) { - return __cpu_to_virtio64(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __cpu_to_virtio64(virtio_is_little_endian(vdev), val); } /* Config space accessors. */ diff --git a/include/linux/vringh.h b/include/linux/vringh.h index a3fa537e717a..bc6c28d04263 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -226,33 +226,39 @@ static inline void vringh_notify(struct vringh *vrh) vrh->notify(vrh); } +static inline bool vringh_is_little_endian(const struct vringh *vrh) +{ + return vrh->little_endian || + virtio_legacy_is_little_endian(); +} + static inline u16 vringh16_to_cpu(const struct vringh *vrh, __virtio16 val) { - return __virtio16_to_cpu(vrh->little_endian, val); + return __virtio16_to_cpu(vringh_is_little_endian(vrh), val); } static inline __virtio16 cpu_to_vringh16(const struct vringh *vrh, u16 val) { - return __cpu_to_virtio16(vrh->little_endian, val); + return __cpu_to_virtio16(vringh_is_little_endian(vrh), val); } static inline u32 vringh32_to_cpu(const struct vringh *vrh, __virtio32 val) { - return __virtio32_to_cpu(vrh->little_endian, val); + return __virtio32_to_cpu(vringh_is_little_endian(vrh), val); } static inline __virtio32 cpu_to_vringh32(const struct vringh *vrh, u32 val) { - return __cpu_to_virtio32(vrh->little_endian, val); + return __cpu_to_virtio32(vringh_is_little_endian(vrh), val); } static inline u64 vringh64_to_cpu(const struct vringh *vrh, __virtio64 val) { - return __virtio64_to_cpu(vrh->little_endian, val); + return __virtio64_to_cpu(vringh_is_little_endian(vrh), val); } static inline __virtio64 cpu_to_vringh64(const struct vringh *vrh, u64 val) { - return __cpu_to_virtio64(vrh->little_endian, val); + return __cpu_to_virtio64(vringh_is_little_endian(vrh), val); } #endif /* _LINUX_VRINGH_H */ diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index 006983b296dd..34117b8b72e4 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -247,10 +247,6 @@ struct iscsi_conn_ops { u8 DataDigest; /* [0,1] == [None,CRC32C] */ u32 MaxRecvDataSegmentLength; /* [512..2**24-1] */ u32 MaxXmitDataSegmentLength; /* [512..2**24-1] */ - u8 OFMarker; /* [0,1] == [No,Yes] */ - u8 IFMarker; /* [0,1] == [No,Yes] */ - u32 OFMarkInt; /* [1..65535] */ - u32 IFMarkInt; /* [1..65535] */ /* * iSER specific connection parameters */ @@ -531,12 +527,6 @@ struct iscsi_conn { u32 exp_statsn; /* Per connection status sequence number */ u32 stat_sn; - /* IFMarkInt's Current Value */ - u32 if_marker; - /* OFMarkInt's Current Value */ - u32 of_marker; - /* Used for calculating OFMarker offset to next PDU */ - u32 of_marker_offset; #define IPV6_ADDRESS_SPACE 48 unsigned char login_ip[IPV6_ADDRESS_SPACE]; unsigned char local_ip[IPV6_ADDRESS_SPACE]; @@ -754,10 +744,10 @@ struct iscsi_node_stat_grps { }; struct iscsi_node_acl { + struct se_node_acl se_node_acl; struct iscsi_node_attrib node_attrib; struct iscsi_node_auth node_auth; struct iscsi_node_stat_grps node_stat_grps; - struct se_node_acl se_node_acl; }; struct iscsi_tpg_attrib { diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 5f1225706993..1e5c8f949bae 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -3,18 +3,7 @@ #define TRANSPORT_FLAG_PASSTHROUGH 1 -struct target_backend_cits { - struct config_item_type tb_dev_cit; - struct config_item_type tb_dev_attrib_cit; - struct config_item_type tb_dev_pr_cit; - struct config_item_type tb_dev_wwn_cit; - struct config_item_type tb_dev_alua_tg_pt_gps_cit; - struct config_item_type tb_dev_stat_cit; -}; - -struct se_subsystem_api { - struct list_head sub_api_list; - +struct target_backend_ops { char name[16]; char inquiry_prod[16]; char inquiry_rev[4]; @@ -52,7 +41,7 @@ struct se_subsystem_api { int (*format_prot)(struct se_device *); void (*free_prot)(struct se_device *); - struct target_backend_cits tb_cits; + struct configfs_attribute **tb_dev_attrib_attrs; }; struct sbc_ops { @@ -60,12 +49,12 @@ struct sbc_ops { u32, enum dma_data_direction); sense_reason_t (*execute_sync_cache)(struct se_cmd *cmd); sense_reason_t (*execute_write_same)(struct se_cmd *cmd); - sense_reason_t (*execute_write_same_unmap)(struct se_cmd *cmd); - sense_reason_t (*execute_unmap)(struct se_cmd *cmd); + sense_reason_t (*execute_unmap)(struct se_cmd *cmd, + sector_t lba, sector_t nolb); }; -int transport_subsystem_register(struct se_subsystem_api *); -void transport_subsystem_release(struct se_subsystem_api *); +int transport_backend_register(const struct target_backend_ops *); +void target_backend_unregister(const struct target_backend_ops *); void target_complete_cmd(struct se_cmd *, u8); void target_complete_cmd_with_length(struct se_cmd *, u8, int); @@ -79,22 +68,19 @@ sense_reason_t sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops); u32 sbc_get_device_rev(struct se_device *dev); u32 sbc_get_device_type(struct se_device *dev); sector_t sbc_get_write_same_sectors(struct se_cmd *cmd); -sense_reason_t sbc_execute_unmap(struct se_cmd *cmd, - sense_reason_t (*do_unmap_fn)(struct se_cmd *cmd, void *priv, - sector_t lba, sector_t nolb), - void *priv); void sbc_dif_generate(struct se_cmd *); -sense_reason_t sbc_dif_verify_write(struct se_cmd *, sector_t, unsigned int, +sense_reason_t sbc_dif_verify(struct se_cmd *, sector_t, unsigned int, unsigned int, struct scatterlist *, int); -sense_reason_t sbc_dif_verify_read(struct se_cmd *, sector_t, unsigned int, - unsigned int, struct scatterlist *, int); -sense_reason_t sbc_dif_read_strip(struct se_cmd *); - +void sbc_dif_copy_prot(struct se_cmd *, unsigned int, bool, + struct scatterlist *, int); void transport_set_vpd_proto_id(struct t10_vpd *, unsigned char *); int transport_set_vpd_assoc(struct t10_vpd *, unsigned char *); int transport_set_vpd_ident_type(struct t10_vpd *, unsigned char *); int transport_set_vpd_ident(struct t10_vpd *, unsigned char *); +extern struct configfs_attribute *sbc_attrib_attrs[]; +extern struct configfs_attribute *passthrough_attrib_attrs[]; + /* core helpers also used by command snooping in pscsi */ void *transport_kmap_data_sg(struct se_cmd *); void transport_kunmap_data_sg(struct se_cmd *); @@ -103,39 +89,7 @@ int target_alloc_sgl(struct scatterlist **, unsigned int *, u32, bool); sense_reason_t transport_generic_map_mem_to_cmd(struct se_cmd *, struct scatterlist *, u32, struct scatterlist *, u32); -void array_free(void *array, int n); - -/* From target_core_configfs.c to setup default backend config_item_types */ -void target_core_setup_sub_cits(struct se_subsystem_api *); - -/* attribute helpers from target_core_device.c for backend drivers */ -bool se_dev_check_wce(struct se_device *); -int se_dev_set_max_unmap_lba_count(struct se_device *, u32); -int se_dev_set_max_unmap_block_desc_count(struct se_device *, u32); -int se_dev_set_unmap_granularity(struct se_device *, u32); -int se_dev_set_unmap_granularity_alignment(struct se_device *, u32); -int se_dev_set_max_write_same_len(struct se_device *, u32); -int se_dev_set_emulate_model_alias(struct se_device *, int); -int se_dev_set_emulate_dpo(struct se_device *, int); -int se_dev_set_emulate_fua_write(struct se_device *, int); -int se_dev_set_emulate_fua_read(struct se_device *, int); -int se_dev_set_emulate_write_cache(struct se_device *, int); -int se_dev_set_emulate_ua_intlck_ctrl(struct se_device *, int); -int se_dev_set_emulate_tas(struct se_device *, int); -int se_dev_set_emulate_tpu(struct se_device *, int); -int se_dev_set_emulate_tpws(struct se_device *, int); -int se_dev_set_emulate_caw(struct se_device *, int); -int se_dev_set_emulate_3pc(struct se_device *, int); -int se_dev_set_pi_prot_type(struct se_device *, int); -int se_dev_set_pi_prot_format(struct se_device *, int); -int se_dev_set_enforce_pr_isids(struct se_device *, int); -int se_dev_set_force_pr_aptpl(struct se_device *, int); -int se_dev_set_is_nonrot(struct se_device *, int); -int se_dev_set_emulate_rest_reord(struct se_device *dev, int); -int se_dev_set_queue_depth(struct se_device *, u32); -int se_dev_set_max_sectors(struct se_device *, u32); -int se_dev_set_optimal_sectors(struct se_device *, u32); -int se_dev_set_block_size(struct se_device *, u32); +bool target_lun_is_rdonly(struct se_cmd *); sense_reason_t passthrough_parse_cdb(struct se_cmd *cmd, sense_reason_t (*exec_cmd)(struct se_cmd *cmd)); diff --git a/include/target/target_core_backend_configfs.h b/include/target/target_core_backend_configfs.h deleted file mode 100644 index 186f7a923570..000000000000 --- a/include/target/target_core_backend_configfs.h +++ /dev/null @@ -1,118 +0,0 @@ -#ifndef TARGET_CORE_BACKEND_CONFIGFS_H -#define TARGET_CORE_BACKEND_CONFIGFS_H - -#include <target/configfs_macros.h> - -#define DEF_TB_DEV_ATTRIB_SHOW(_backend, _name) \ -static ssize_t _backend##_dev_show_attr_##_name( \ - struct se_dev_attrib *da, \ - char *page) \ -{ \ - return snprintf(page, PAGE_SIZE, "%u\n", \ - (u32)da->da_dev->dev_attrib._name); \ -} - -#define DEF_TB_DEV_ATTRIB_STORE(_backend, _name) \ -static ssize_t _backend##_dev_store_attr_##_name( \ - struct se_dev_attrib *da, \ - const char *page, \ - size_t count) \ -{ \ - unsigned long val; \ - int ret; \ - \ - ret = kstrtoul(page, 0, &val); \ - if (ret < 0) { \ - pr_err("kstrtoul() failed with ret: %d\n", ret); \ - return -EINVAL; \ - } \ - ret = se_dev_set_##_name(da->da_dev, (u32)val); \ - \ - return (!ret) ? count : -EINVAL; \ -} - -#define DEF_TB_DEV_ATTRIB(_backend, _name) \ -DEF_TB_DEV_ATTRIB_SHOW(_backend, _name); \ -DEF_TB_DEV_ATTRIB_STORE(_backend, _name); - -#define DEF_TB_DEV_ATTRIB_RO(_backend, name) \ -DEF_TB_DEV_ATTRIB_SHOW(_backend, name); - -CONFIGFS_EATTR_STRUCT(target_backend_dev_attrib, se_dev_attrib); -#define TB_DEV_ATTR(_backend, _name, _mode) \ -static struct target_backend_dev_attrib_attribute _backend##_dev_attrib_##_name = \ - __CONFIGFS_EATTR(_name, _mode, \ - _backend##_dev_show_attr_##_name, \ - _backend##_dev_store_attr_##_name); - -#define TB_DEV_ATTR_RO(_backend, _name) \ -static struct target_backend_dev_attrib_attribute _backend##_dev_attrib_##_name = \ - __CONFIGFS_EATTR_RO(_name, \ - _backend##_dev_show_attr_##_name); - -/* - * Default list of target backend device attributes as defined by - * struct se_dev_attrib - */ - -#define DEF_TB_DEFAULT_ATTRIBS(_backend) \ - DEF_TB_DEV_ATTRIB(_backend, emulate_model_alias); \ - TB_DEV_ATTR(_backend, emulate_model_alias, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, emulate_dpo); \ - TB_DEV_ATTR(_backend, emulate_dpo, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, emulate_fua_write); \ - TB_DEV_ATTR(_backend, emulate_fua_write, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, emulate_fua_read); \ - TB_DEV_ATTR(_backend, emulate_fua_read, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, emulate_write_cache); \ - TB_DEV_ATTR(_backend, emulate_write_cache, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, emulate_ua_intlck_ctrl); \ - TB_DEV_ATTR(_backend, emulate_ua_intlck_ctrl, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, emulate_tas); \ - TB_DEV_ATTR(_backend, emulate_tas, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, emulate_tpu); \ - TB_DEV_ATTR(_backend, emulate_tpu, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, emulate_tpws); \ - TB_DEV_ATTR(_backend, emulate_tpws, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, emulate_caw); \ - TB_DEV_ATTR(_backend, emulate_caw, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, emulate_3pc); \ - TB_DEV_ATTR(_backend, emulate_3pc, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, pi_prot_type); \ - TB_DEV_ATTR(_backend, pi_prot_type, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB_RO(_backend, hw_pi_prot_type); \ - TB_DEV_ATTR_RO(_backend, hw_pi_prot_type); \ - DEF_TB_DEV_ATTRIB(_backend, pi_prot_format); \ - TB_DEV_ATTR(_backend, pi_prot_format, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, enforce_pr_isids); \ - TB_DEV_ATTR(_backend, enforce_pr_isids, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, is_nonrot); \ - TB_DEV_ATTR(_backend, is_nonrot, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, emulate_rest_reord); \ - TB_DEV_ATTR(_backend, emulate_rest_reord, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, force_pr_aptpl); \ - TB_DEV_ATTR(_backend, force_pr_aptpl, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB_RO(_backend, hw_block_size); \ - TB_DEV_ATTR_RO(_backend, hw_block_size); \ - DEF_TB_DEV_ATTRIB(_backend, block_size); \ - TB_DEV_ATTR(_backend, block_size, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB_RO(_backend, hw_max_sectors); \ - TB_DEV_ATTR_RO(_backend, hw_max_sectors); \ - DEF_TB_DEV_ATTRIB(_backend, optimal_sectors); \ - TB_DEV_ATTR(_backend, optimal_sectors, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB_RO(_backend, hw_queue_depth); \ - TB_DEV_ATTR_RO(_backend, hw_queue_depth); \ - DEF_TB_DEV_ATTRIB(_backend, queue_depth); \ - TB_DEV_ATTR(_backend, queue_depth, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, max_unmap_lba_count); \ - TB_DEV_ATTR(_backend, max_unmap_lba_count, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, max_unmap_block_desc_count); \ - TB_DEV_ATTR(_backend, max_unmap_block_desc_count, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, unmap_granularity); \ - TB_DEV_ATTR(_backend, unmap_granularity, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, unmap_granularity_alignment); \ - TB_DEV_ATTR(_backend, unmap_granularity_alignment, S_IRUGO | S_IWUSR); \ - DEF_TB_DEV_ATTRIB(_backend, max_write_same_len); \ - TB_DEV_ATTR(_backend, max_write_same_len, S_IRUGO | S_IWUSR); - -#endif /* TARGET_CORE_BACKEND_CONFIGFS_H */ diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index aec6f6a4477c..17ae2d6a4891 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -9,12 +9,8 @@ #include <net/sock.h> #include <net/tcp.h> -#define TARGET_CORE_MOD_VERSION "v4.1.0" -#define TARGET_CORE_VERSION TARGET_CORE_MOD_VERSION +#define TARGET_CORE_VERSION "v5.0" -/* Maximum Number of LUNs per Target Portal Group */ -/* Don't raise above 511 or REPORT_LUNS needs to handle >1 page */ -#define TRANSPORT_MAX_LUNS_PER_TPG 256 /* * Maximum size of a CDB that can be stored in se_cmd without allocating * memory dynamically for the CDB. @@ -70,12 +66,6 @@ #define DA_MAX_WRITE_SAME_LEN 0 /* Use a model alias based on the configfs backend device name */ #define DA_EMULATE_MODEL_ALIAS 0 -/* Emulation for Direct Page Out */ -#define DA_EMULATE_DPO 0 -/* Emulation for Forced Unit Access WRITEs */ -#define DA_EMULATE_FUA_WRITE 1 -/* Emulation for Forced Unit Access READs */ -#define DA_EMULATE_FUA_READ 0 /* Emulation for WriteCache and SYNCHRONIZE_CACHE */ #define DA_EMULATE_WRITE_CACHE 0 /* Emulation for UNIT ATTENTION Interlock Control */ @@ -116,18 +106,6 @@ enum hba_flags_table { HBA_FLAGS_PSCSI_MODE = 0x02, }; -/* struct se_lun->lun_status */ -enum transport_lun_status_table { - TRANSPORT_LUN_STATUS_FREE = 0, - TRANSPORT_LUN_STATUS_ACTIVE = 1, -}; - -/* struct se_portal_group->se_tpg_type */ -enum transport_tpg_type_table { - TRANSPORT_TPG_TYPE_NORMAL = 0, - TRANSPORT_TPG_TYPE_DISCOVERY = 1, -}; - /* Special transport agnostic struct se_cmd->t_states */ enum transport_state_table { TRANSPORT_NO_STATE = 0, @@ -158,14 +136,13 @@ enum se_cmd_flags_table { SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC = 0x00020000, SCF_COMPARE_AND_WRITE = 0x00080000, SCF_COMPARE_AND_WRITE_POST = 0x00100000, + SCF_PASSTHROUGH_PROT_SG_TO_MEM_NOALLOC = 0x00200000, }; /* struct se_dev_entry->lun_flags and struct se_lun->lun_access */ enum transport_lunflags_table { - TRANSPORT_LUNFLAGS_NO_ACCESS = 0x00, - TRANSPORT_LUNFLAGS_INITIATOR_ACCESS = 0x01, - TRANSPORT_LUNFLAGS_READ_ONLY = 0x02, - TRANSPORT_LUNFLAGS_READ_WRITE = 0x04, + TRANSPORT_LUNFLAGS_READ_ONLY = 0x01, + TRANSPORT_LUNFLAGS_READ_WRITE = 0x02, }; /* @@ -314,22 +291,13 @@ struct t10_alua_tg_pt_gp { struct se_device *tg_pt_gp_dev; struct config_group tg_pt_gp_group; struct list_head tg_pt_gp_list; - struct list_head tg_pt_gp_mem_list; - struct se_port *tg_pt_gp_alua_port; + struct list_head tg_pt_gp_lun_list; + struct se_lun *tg_pt_gp_alua_lun; struct se_node_acl *tg_pt_gp_alua_nacl; struct delayed_work tg_pt_gp_transition_work; struct completion *tg_pt_gp_transition_complete; }; -struct t10_alua_tg_pt_gp_member { - bool tg_pt_gp_assoc; - atomic_t tg_pt_gp_mem_ref_cnt; - spinlock_t tg_pt_gp_mem_lock; - struct t10_alua_tg_pt_gp *tg_pt_gp; - struct se_port *tg_pt; - struct list_head tg_pt_gp_mem_list; -}; - struct t10_vpd { unsigned char device_identifier[INQUIRY_VPD_DEVICE_IDENTIFIER_LEN]; int protocol_identifier_set; @@ -374,15 +342,16 @@ struct t10_pr_registration { int pr_res_scope; /* Used for fabric initiator WWPNs using a ISID */ bool isid_present_at_reg; - u32 pr_res_mapped_lun; - u32 pr_aptpl_target_lun; + u64 pr_res_mapped_lun; + u64 pr_aptpl_target_lun; + u16 tg_pt_sep_rtpi; u32 pr_res_generation; u64 pr_reg_bin_isid; u64 pr_res_key; atomic_t pr_res_holders; struct se_node_acl *pr_reg_nacl; + /* Used by ALL_TG_PT=1 registration with deve->pr_ref taken */ struct se_dev_entry *pr_reg_deve; - struct se_lun *pr_reg_tg_pt_lun; struct list_head pr_reg_list; struct list_head pr_reg_abort_list; struct list_head pr_reg_aptpl_list; @@ -422,7 +391,7 @@ struct se_tmr_req { u8 response; int call_transport; /* Reference to ITT that Task Mgmt should be performed */ - u32 ref_task_tag; + u64 ref_task_tag; void *fabric_tmr_ptr; struct se_cmd *task_cmd; struct se_device *tmr_dev; @@ -475,6 +444,7 @@ struct se_cmd { u8 scsi_asc; u8 scsi_ascq; u16 scsi_sense_length; + u64 tag; /* SAM command identifier aka task tag */ /* Delay for ALUA Active/NonOptimized state access in milliseconds */ int alua_nonop_delay; /* See include/linux/dma-mapping.h */ @@ -493,7 +463,7 @@ struct se_cmd { /* Total size in bytes associated with command */ u32 data_length; u32 residual_count; - u32 orig_fe_lun; + u64 orig_fe_lun; /* Persistent Reservation key */ u64 pr_res_key; /* Used for sense data */ @@ -501,7 +471,6 @@ struct se_cmd { struct list_head se_delayed_node; struct list_head se_qf_node; struct se_device *se_dev; - struct se_dev_entry *se_deve; struct se_lun *se_lun; /* Only used for internal passthrough and legacy TCM fabric modules */ struct se_session *se_sess; @@ -511,9 +480,8 @@ struct se_cmd { struct kref cmd_kref; const struct target_core_fabric_ops *se_tfo; sense_reason_t (*execute_cmd)(struct se_cmd *); - sense_reason_t (*execute_rw)(struct se_cmd *, struct scatterlist *, - u32, enum dma_data_direction); sense_reason_t (*transport_complete_callback)(struct se_cmd *, bool); + void *protocol_data; unsigned char *t_task_cdb; unsigned char __t_task_cdb[TCM_MAX_COMMAND_SIZE]; @@ -569,7 +537,6 @@ struct se_cmd { struct se_ua { u8 ua_asc; u8 ua_ascq; - struct se_node_acl *ua_nacl; struct list_head ua_nacl_list; }; @@ -585,10 +552,10 @@ struct se_node_acl { char acl_tag[MAX_ACL_TAG_SIZE]; /* Used for PR SPEC_I_PT=1 and REGISTER_AND_MOVE */ atomic_t acl_pr_ref_count; - struct se_dev_entry **device_list; + struct hlist_head lun_entry_hlist; struct se_session *nacl_sess; struct se_portal_group *se_tpg; - spinlock_t device_list_lock; + struct mutex lun_entry_mutex; spinlock_t nacl_sess_lock; struct config_group acl_group; struct config_group acl_attrib_group; @@ -632,33 +599,37 @@ struct se_ml_stat_grps { struct se_lun_acl { char initiatorname[TRANSPORT_IQN_LEN]; - u32 mapped_lun; + u64 mapped_lun; struct se_node_acl *se_lun_nacl; struct se_lun *se_lun; - struct list_head lacl_list; struct config_group se_lun_group; struct se_ml_stat_grps ml_stat_grps; }; struct se_dev_entry { - bool def_pr_registered; /* See transport_lunflags_table */ - u32 lun_flags; - u32 mapped_lun; - u32 total_cmds; + u64 mapped_lun; u64 pr_res_key; u64 creation_time; + u32 lun_flags; u32 attach_count; - u64 read_bytes; - u64 write_bytes; + atomic_long_t total_cmds; + atomic_long_t read_bytes; + atomic_long_t write_bytes; atomic_t ua_count; /* Used for PR SPEC_I_PT=1 and REGISTER_AND_MOVE */ - atomic_t pr_ref_count; - struct se_lun_acl *se_lun_acl; + struct kref pr_kref; + struct completion pr_comp; + struct se_lun_acl __rcu *se_lun_acl; spinlock_t ua_lock; - struct se_lun *se_lun; + struct se_lun __rcu *se_lun; +#define DEF_PR_REG_ACTIVE 1 + unsigned long deve_flags; struct list_head alua_port_list; + struct list_head lun_link; struct list_head ua_list; + struct hlist_node link; + struct rcu_head rcu_head; }; struct se_dev_attrib { @@ -703,25 +674,48 @@ struct se_port_stat_grps { struct config_group scsi_transport_group; }; +struct scsi_port_stats { + atomic_long_t cmd_pdus; + atomic_long_t tx_data_octets; + atomic_long_t rx_data_octets; +}; + struct se_lun { + u64 unpacked_lun; #define SE_LUN_LINK_MAGIC 0xffff7771 u32 lun_link_magic; - /* See transport_lun_status_table */ - enum transport_lun_status_table lun_status; u32 lun_access; u32 lun_flags; - u32 unpacked_lun; + u32 lun_index; + + /* RELATIVE TARGET PORT IDENTIFER */ + u16 lun_rtpi; atomic_t lun_acl_count; - spinlock_t lun_acl_lock; - spinlock_t lun_sep_lock; - struct completion lun_shutdown_comp; - struct list_head lun_acl_list; - struct se_device *lun_se_dev; - struct se_port *lun_sep; + struct se_device __rcu *lun_se_dev; + + struct list_head lun_deve_list; + spinlock_t lun_deve_lock; + + /* ALUA state */ + int lun_tg_pt_secondary_stat; + int lun_tg_pt_secondary_write_md; + atomic_t lun_tg_pt_secondary_offline; + struct mutex lun_tg_pt_md_mutex; + + /* ALUA target port group linkage */ + struct list_head lun_tg_pt_gp_link; + struct t10_alua_tg_pt_gp *lun_tg_pt_gp; + spinlock_t lun_tg_pt_gp_lock; + + struct se_portal_group *lun_tpg; + struct scsi_port_stats lun_stats; struct config_group lun_group; struct se_port_stat_grps port_stat_grps; struct completion lun_ref_comp; struct percpu_ref lun_ref; + struct list_head lun_dev_link; + struct hlist_node link; + struct rcu_head rcu_head; }; struct se_dev_stat_grps { @@ -744,7 +738,6 @@ struct se_device { #define DF_EMULATED_VPD_UNIT_SERIAL 0x00000004 #define DF_USING_UDEV_PATH 0x00000008 #define DF_USING_ALIAS 0x00000010 - u32 dev_port_count; /* Physical device queue depth */ u32 queue_depth; /* Used for SPC-2 reservations enforce of ISIDs */ @@ -761,7 +754,7 @@ struct se_device { atomic_t dev_ordered_id; atomic_t dev_ordered_sync; atomic_t dev_qf_count; - int export_count; + u32 export_count; spinlock_t delayed_cmd_lock; spinlock_t execute_task_lock; spinlock_t dev_reservation_lock; @@ -803,12 +796,15 @@ struct se_device { #define SE_UDEV_PATH_LEN 512 /* must be less than PAGE_SIZE */ unsigned char udev_path[SE_UDEV_PATH_LEN]; /* Pointer to template of function pointers for transport */ - struct se_subsystem_api *transport; + const struct target_backend_ops *transport; /* Linked list for struct se_hba struct se_device list */ struct list_head dev_list; struct se_lun xcopy_lun; /* Protection Information */ int prot_length; + /* For se_lun->lun_se_dev RCU read-side critical access */ + u32 hba_index; + struct rcu_head rcu_head; }; struct se_hba { @@ -825,33 +821,7 @@ struct se_hba { spinlock_t device_lock; struct config_group hba_group; struct mutex hba_access_mutex; - struct se_subsystem_api *transport; -}; - -struct scsi_port_stats { - u64 cmd_pdus; - u64 tx_data_octets; - u64 rx_data_octets; -}; - -struct se_port { - /* RELATIVE TARGET PORT IDENTIFER */ - u16 sep_rtpi; - int sep_tg_pt_secondary_stat; - int sep_tg_pt_secondary_write_md; - u32 sep_index; - struct scsi_port_stats sep_stats; - /* Used for ALUA Target Port Groups membership */ - atomic_t sep_tg_pt_secondary_offline; - /* Used for PR ALL_TG_PT=1 */ - atomic_t sep_tg_pt_ref_cnt; - spinlock_t sep_alua_lock; - struct mutex sep_tg_pt_md_mutex; - struct t10_alua_tg_pt_gp_member *sep_alua_tg_pt_gp_mem; - struct se_lun *sep_lun; - struct se_portal_group *sep_tpg; - struct list_head sep_alua_list; - struct list_head sep_list; + struct target_backend *backend; }; struct se_tpg_np { @@ -860,24 +830,26 @@ struct se_tpg_np { }; struct se_portal_group { - /* Type of target portal group, see transport_tpg_type_table */ - enum transport_tpg_type_table se_tpg_type; + /* + * PROTOCOL IDENTIFIER value per SPC4, 7.5.1. + * + * Negative values can be used by fabric drivers for internal use TPGs. + */ + int proto_id; /* Number of ACLed Initiator Nodes for this TPG */ u32 num_node_acls; /* Used for PR SPEC_I_PT=1 and REGISTER_AND_MOVE */ atomic_t tpg_pr_ref_count; /* Spinlock for adding/removing ACLed Nodes */ - spinlock_t acl_node_lock; + struct mutex acl_node_mutex; /* Spinlock for adding/removing sessions */ spinlock_t session_lock; - spinlock_t tpg_lun_lock; - /* Pointer to $FABRIC_MOD portal group */ - void *se_tpg_fabric_ptr; + struct mutex tpg_lun_mutex; struct list_head se_tpg_node; /* linked list for initiator ACL list */ struct list_head acl_node_list; - struct se_lun **tpg_lun_list; - struct se_lun tpg_virt_lun0; + struct hlist_head tpg_lun_hlist; + struct se_lun *tpg_virt_lun0; /* List of TCM sessions associated wth this TPG */ struct list_head tpg_sess_list; /* Pointer to $FABRIC_MOD dependent code */ diff --git a/include/target/target_core_configfs.h b/include/target/target_core_configfs.h deleted file mode 100644 index b99c01170392..000000000000 --- a/include/target/target_core_configfs.h +++ /dev/null @@ -1,48 +0,0 @@ -#define TARGET_CORE_CONFIGFS_VERSION TARGET_CORE_MOD_VERSION - -#define TARGET_CORE_CONFIG_ROOT "/sys/kernel/config" - -#define TARGET_CORE_NAME_MAX_LEN 64 -#define TARGET_FABRIC_NAME_SIZE 32 - -struct target_fabric_configfs_template { - struct config_item_type tfc_discovery_cit; - struct config_item_type tfc_wwn_cit; - struct config_item_type tfc_wwn_fabric_stats_cit; - struct config_item_type tfc_tpg_cit; - struct config_item_type tfc_tpg_base_cit; - struct config_item_type tfc_tpg_lun_cit; - struct config_item_type tfc_tpg_port_cit; - struct config_item_type tfc_tpg_port_stat_cit; - struct config_item_type tfc_tpg_np_cit; - struct config_item_type tfc_tpg_np_base_cit; - struct config_item_type tfc_tpg_attrib_cit; - struct config_item_type tfc_tpg_auth_cit; - struct config_item_type tfc_tpg_param_cit; - struct config_item_type tfc_tpg_nacl_cit; - struct config_item_type tfc_tpg_nacl_base_cit; - struct config_item_type tfc_tpg_nacl_attrib_cit; - struct config_item_type tfc_tpg_nacl_auth_cit; - struct config_item_type tfc_tpg_nacl_param_cit; - struct config_item_type tfc_tpg_nacl_stat_cit; - struct config_item_type tfc_tpg_mappedlun_cit; - struct config_item_type tfc_tpg_mappedlun_stat_cit; -}; - -struct target_fabric_configfs { - char tf_name[TARGET_FABRIC_NAME_SIZE]; - atomic_t tf_access_cnt; - struct list_head tf_list; - struct config_group tf_group; - struct config_group tf_disc_group; - struct config_group *tf_default_groups[2]; - /* Pointer to fabric's config_item */ - struct config_item *tf_fabric; - /* Passed from fabric modules */ - struct config_item_type *tf_fabric_cit; - /* Pointer to fabric's struct module */ - struct module *tf_module; - struct target_core_fabric_ops tf_ops; - struct target_fabric_configfs_template tf_cit_tmpl; -}; - diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index 0f4dc3768587..18afef91b447 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -4,20 +4,11 @@ struct target_core_fabric_ops { struct module *module; const char *name; + size_t node_acl_size; char *(*get_fabric_name)(void); - u8 (*get_fabric_proto_ident)(struct se_portal_group *); char *(*tpg_get_wwn)(struct se_portal_group *); u16 (*tpg_get_tag)(struct se_portal_group *); u32 (*tpg_get_default_depth)(struct se_portal_group *); - u32 (*tpg_get_pr_transport_id)(struct se_portal_group *, - struct se_node_acl *, - struct t10_pr_registration *, int *, - unsigned char *); - u32 (*tpg_get_pr_transport_id_len)(struct se_portal_group *, - struct se_node_acl *, - struct t10_pr_registration *, int *); - char *(*tpg_parse_pr_out_transport_id)(struct se_portal_group *, - const char *, u32 *, char **); int (*tpg_check_demo_mode)(struct se_portal_group *); int (*tpg_check_demo_mode_cache)(struct se_portal_group *); int (*tpg_check_demo_mode_write_protect)(struct se_portal_group *); @@ -36,10 +27,6 @@ struct target_core_fabric_ops { * WRITE_STRIP and READ_INSERT operations. */ int (*tpg_check_prot_fabric_only)(struct se_portal_group *); - struct se_node_acl *(*tpg_alloc_fabric_acl)( - struct se_portal_group *); - void (*tpg_release_fabric_acl)(struct se_portal_group *, - struct se_node_acl *); u32 (*tpg_get_inst_index)(struct se_portal_group *); /* * Optional to release struct se_cmd and fabric dependent allocated @@ -50,7 +37,6 @@ struct target_core_fabric_ops { */ int (*check_stop_free)(struct se_cmd *); void (*release_cmd)(struct se_cmd *); - void (*put_session)(struct se_session *); /* * Called with spin_lock_bh(struct se_portal_group->session_lock held. */ @@ -66,7 +52,6 @@ struct target_core_fabric_ops { int (*write_pending)(struct se_cmd *); int (*write_pending_status)(struct se_cmd *); void (*set_default_node_attributes)(struct se_node_acl *); - u32 (*get_task_tag)(struct se_cmd *); int (*get_cmd_state)(struct se_cmd *); int (*queue_data_in)(struct se_cmd *); int (*queue_status)(struct se_cmd *); @@ -88,9 +73,8 @@ struct target_core_fabric_ops { struct se_tpg_np *(*fabric_make_np)(struct se_portal_group *, struct config_group *, const char *); void (*fabric_drop_np)(struct se_tpg_np *); - struct se_node_acl *(*fabric_make_nodeacl)(struct se_portal_group *, - struct config_group *, const char *); - void (*fabric_drop_nodeacl)(struct se_node_acl *); + int (*fabric_init_nodeacl)(struct se_node_acl *, const char *); + void (*fabric_cleanup_nodeacl)(struct se_node_acl *); struct configfs_attribute **tfc_discovery_attrs; struct configfs_attribute **tfc_wwn_attrs; @@ -132,16 +116,16 @@ void transport_deregister_session(struct se_session *); void transport_init_se_cmd(struct se_cmd *, const struct target_core_fabric_ops *, struct se_session *, u32, int, int, unsigned char *); -sense_reason_t transport_lookup_cmd_lun(struct se_cmd *, u32); +sense_reason_t transport_lookup_cmd_lun(struct se_cmd *, u64); sense_reason_t target_setup_cmd_from_cdb(struct se_cmd *, unsigned char *); int target_submit_cmd_map_sgls(struct se_cmd *, struct se_session *, - unsigned char *, unsigned char *, u32, u32, int, int, int, + unsigned char *, unsigned char *, u64, u32, int, int, int, struct scatterlist *, u32, struct scatterlist *, u32, struct scatterlist *, u32); int target_submit_cmd(struct se_cmd *, struct se_session *, unsigned char *, - unsigned char *, u32, u32, int, int, int); + unsigned char *, u64, u32, int, int, int); int target_submit_tmr(struct se_cmd *se_cmd, struct se_session *se_sess, - unsigned char *sense, u32 unpacked_lun, + unsigned char *sense, u64 unpacked_lun, void *fabric_tmr_ptr, unsigned char tm_type, gfp_t, unsigned int, int); int transport_handle_cdb_direct(struct se_cmd *); @@ -155,8 +139,8 @@ bool transport_wait_for_tasks(struct se_cmd *); int transport_check_aborted_status(struct se_cmd *, int); int transport_send_check_condition_and_sense(struct se_cmd *, sense_reason_t, int); -int target_get_sess_cmd(struct se_session *, struct se_cmd *, bool); -int target_put_sess_cmd(struct se_session *, struct se_cmd *); +int target_get_sess_cmd(struct se_cmd *, bool); +int target_put_sess_cmd(struct se_cmd *); void target_sess_cmd_list_set_waiting(struct se_session *); void target_wait_for_sess_cmds(struct se_session *); @@ -167,52 +151,19 @@ void core_tmr_release_req(struct se_tmr_req *); int transport_generic_handle_tmr(struct se_cmd *); void transport_generic_request_failure(struct se_cmd *, sense_reason_t); void __target_execute_cmd(struct se_cmd *); -int transport_lookup_tmr_lun(struct se_cmd *, u32); +int transport_lookup_tmr_lun(struct se_cmd *, u64); struct se_node_acl *core_tpg_get_initiator_node_acl(struct se_portal_group *tpg, unsigned char *); struct se_node_acl *core_tpg_check_initiator_node_acl(struct se_portal_group *, unsigned char *); -void core_tpg_clear_object_luns(struct se_portal_group *); -struct se_node_acl *core_tpg_add_initiator_node_acl(struct se_portal_group *, - struct se_node_acl *, const char *, u32); -int core_tpg_del_initiator_node_acl(struct se_portal_group *, - struct se_node_acl *, int); int core_tpg_set_initiator_node_queue_depth(struct se_portal_group *, unsigned char *, u32, int); int core_tpg_set_initiator_node_tag(struct se_portal_group *, struct se_node_acl *, const char *); -int core_tpg_register(const struct target_core_fabric_ops *, - struct se_wwn *, struct se_portal_group *, void *, int); +int core_tpg_register(struct se_wwn *, struct se_portal_group *, int); int core_tpg_deregister(struct se_portal_group *); -/* SAS helpers */ -u8 sas_get_fabric_proto_ident(struct se_portal_group *); -u32 sas_get_pr_transport_id(struct se_portal_group *, struct se_node_acl *, - struct t10_pr_registration *, int *, unsigned char *); -u32 sas_get_pr_transport_id_len(struct se_portal_group *, struct se_node_acl *, - struct t10_pr_registration *, int *); -char *sas_parse_pr_out_transport_id(struct se_portal_group *, const char *, - u32 *, char **); - -/* FC helpers */ -u8 fc_get_fabric_proto_ident(struct se_portal_group *); -u32 fc_get_pr_transport_id(struct se_portal_group *, struct se_node_acl *, - struct t10_pr_registration *, int *, unsigned char *); -u32 fc_get_pr_transport_id_len(struct se_portal_group *, struct se_node_acl *, - struct t10_pr_registration *, int *); -char *fc_parse_pr_out_transport_id(struct se_portal_group *, const char *, - u32 *, char **); - -/* iSCSI helpers */ -u8 iscsi_get_fabric_proto_ident(struct se_portal_group *); -u32 iscsi_get_pr_transport_id(struct se_portal_group *, struct se_node_acl *, - struct t10_pr_registration *, int *, unsigned char *); -u32 iscsi_get_pr_transport_id_len(struct se_portal_group *, struct se_node_acl *, - struct t10_pr_registration *, int *); -char *iscsi_parse_pr_out_transport_id(struct se_portal_group *, const char *, - u32 *, char **); - /* * The LIO target core uses DMA_TO_DEVICE to mean that data is going * to the target (eg handling a WRITE) and DMA_FROM_DEVICE to mean diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index d3f4832db289..b6fce900a833 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -313,6 +313,9 @@ struct drm_amdgpu_gem_op { #define AMDGPU_VA_OP_MAP 1 #define AMDGPU_VA_OP_UNMAP 2 +/* Delay the page table update till the next CS */ +#define AMDGPU_VM_DELAY_UPDATE (1 << 0) + /* Mapping flags */ /* readable mapping */ #define AMDGPU_VM_PAGE_READABLE (1 << 1) @@ -348,6 +351,7 @@ struct drm_amdgpu_gem_va { #define AMDGPU_CHUNK_ID_IB 0x01 #define AMDGPU_CHUNK_ID_FENCE 0x02 +#define AMDGPU_CHUNK_ID_DEPENDENCIES 0x03 struct drm_amdgpu_cs_chunk { uint32_t chunk_id; @@ -399,6 +403,14 @@ struct drm_amdgpu_cs_chunk_ib { uint32_t ring; }; +struct drm_amdgpu_cs_chunk_dep { + uint32_t ip_type; + uint32_t ip_instance; + uint32_t ring; + uint32_t ctx_id; + uint64_t handle; +}; + struct drm_amdgpu_cs_chunk_fence { uint32_t handle; uint32_t offset; diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 25084a052a1e..c9aca042e61d 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -755,4 +755,7 @@ struct fuse_notify_retrieve_in { uint64_t dummy4; }; +/* Device ioctls: */ +#define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t) + #endif /* _LINUX_FUSE_H */ diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index 50ae24335444..3cb5e1d85ddd 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -50,6 +50,12 @@ #define TUNGETFILTER _IOR('T', 219, struct sock_fprog) #define TUNSETVNETLE _IOW('T', 220, int) #define TUNGETVNETLE _IOR('T', 221, int) +/* The TUNSETVNETBE and TUNGETVNETBE ioctls are for cross-endian support on + * little-endian hosts. Not all kernel configurations support them, but all + * configurations that support SET also support GET. + */ +#define TUNSETVNETBE _IOW('T', 222, int) +#define TUNGETVNETBE _IOR('T', 223, int) /* TUNSETIFF ifr flags */ #define IFF_TUN 0x0001 diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index bb6a5b4cb3c5..ab3731917bac 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -103,6 +103,20 @@ struct vhost_memory { /* Get accessor: reads index, writes value in num */ #define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state) +/* Set the vring byte order in num. Valid values are VHOST_VRING_LITTLE_ENDIAN + * or VHOST_VRING_BIG_ENDIAN (other values return -EINVAL). + * The byte order cannot be changed while the device is active: trying to do so + * returns -EBUSY. + * This is a legacy only API that is simply ignored when VIRTIO_F_VERSION_1 is + * set. + * Not all kernel configurations support this ioctl, but all configurations that + * support SET also support GET. + */ +#define VHOST_VRING_LITTLE_ENDIAN 0 +#define VHOST_VRING_BIG_ENDIAN 1 +#define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state) +#define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state) + /* The following ioctls use eventfd file descriptors to signal and poll * for events. */ diff --git a/init/Kconfig b/init/Kconfig index bcc41bd19999..af09b4fb43d2 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -435,6 +435,7 @@ config TASKSTATS config TASK_DELAY_ACCT bool "Enable per-task delay accounting" depends on TASKSTATS + select SCHED_INFO help Collect information on time spent by a task waiting for system resources like cpu, synchronous block I/O completion and swapping @@ -820,7 +821,7 @@ config IKCONFIG_PROC config LOG_BUF_SHIFT int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" - range 12 21 + range 12 25 default 17 depends on PRINTK help diff --git a/kernel/Makefile b/kernel/Makefile index 60c302cfb4d3..43c4c920f30a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -137,7 +137,7 @@ endif ifneq ($(wildcard $(obj)/.x509.list),) ifneq ($(shell cat $(obj)/.x509.list),$(X509_CERTIFICATES)) -$(info X.509 certificate list changed) +$(warning X.509 certificate list changed to "$(X509_CERTIFICATES)" from "$(shell cat $(obj)/.x509.list)") $(shell rm $(obj)/.x509.list) endif endif diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 09c65640cad6..e85bdfd15fed 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1021,8 +1021,7 @@ static int audit_log_single_execve_arg(struct audit_context *context, * for strings that are too long, we should not have created * any. */ - if (unlikely((len == 0) || len > MAX_ARG_STRLEN - 1)) { - WARN_ON(1); + if (WARN_ON_ONCE(len < 0 || len > MAX_ARG_STRLEN - 1)) { send_sig(SIGKILL, current, 0); return -1; } diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 9ef9fc8a774b..f89d9292eee6 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1939,8 +1939,6 @@ static struct file_system_type cgroup_fs_type = { .kill_sb = cgroup_kill_sb, }; -static struct kobject *cgroup_kobj; - /** * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy * @task: target task @@ -5070,13 +5068,13 @@ int __init cgroup_init(void) ss->bind(init_css_set.subsys[ssid]); } - cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj); - if (!cgroup_kobj) - return -ENOMEM; + err = sysfs_create_mount_point(fs_kobj, "cgroup"); + if (err) + return err; err = register_filesystem(&cgroup_fs_type); if (err < 0) { - kobject_put(cgroup_kobj); + sysfs_remove_mount_point(fs_kobj, "cgroup"); return err; } diff --git a/kernel/cpu.c b/kernel/cpu.c index 9c9c9fab16cc..6a374544d495 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -21,6 +21,7 @@ #include <linux/suspend.h> #include <linux/lockdep.h> #include <linux/tick.h> +#include <linux/irq.h> #include <trace/events/power.h> #include "smpboot.h" @@ -392,13 +393,19 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) smpboot_park_threads(cpu); /* - * So now all preempt/rcu users must observe !cpu_active(). + * Prevent irq alloc/free while the dying cpu reorganizes the + * interrupt affinities. */ + irq_lock_sparse(); + /* + * So now all preempt/rcu users must observe !cpu_active(). + */ err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); if (err) { /* CPU didn't die: tell everyone. Can't complain. */ cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu); + irq_unlock_sparse(); goto out_release; } BUG_ON(cpu_online(cpu)); @@ -415,6 +422,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) smp_mb(); /* Read from cpu_dead_idle before __cpu_die(). */ per_cpu(cpu_dead_idle, cpu) = false; + /* Interrupts are moved away from the dying cpu, reenable alloc/free */ + irq_unlock_sparse(); + hotplug_cpu__broadcast_tick_pull(cpu); /* This actually kills the CPU. */ __cpu_die(cpu); @@ -517,8 +527,18 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen) goto out_notify; } + /* + * Some architectures have to walk the irq descriptors to + * setup the vector space for the cpu which comes online. + * Prevent irq alloc/free across the bringup. + */ + irq_lock_sparse(); + /* Arch-specific enabling code. */ ret = __cpu_up(cpu, idle); + + irq_unlock_sparse(); + if (ret != 0) goto out_notify; BUG_ON(!cpu_online(cpu)); diff --git a/kernel/events/core.c b/kernel/events/core.c index d1f37ddd1960..d3dae3419b99 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4358,14 +4358,6 @@ static void ring_buffer_wakeup(struct perf_event *event) rcu_read_unlock(); } -static void rb_free_rcu(struct rcu_head *rcu_head) -{ - struct ring_buffer *rb; - - rb = container_of(rcu_head, struct ring_buffer, rcu_head); - rb_free(rb); -} - struct ring_buffer *ring_buffer_get(struct perf_event *event) { struct ring_buffer *rb; @@ -5794,7 +5786,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) * need to add enough zero bytes after the string to handle * the 64bit alignment we do later. */ - name = d_path(&file->f_path, buf, PATH_MAX - sizeof(u64)); + name = file_path(file, buf, PATH_MAX - sizeof(u64)); if (IS_ERR(name)) { name = "//toolong"; goto cpy_name; diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 2deb24c7a40d..2bbad9c1274c 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -11,6 +11,7 @@ struct ring_buffer { atomic_t refcount; struct rcu_head rcu_head; + struct irq_work irq_work; #ifdef CONFIG_PERF_USE_VMALLOC struct work_struct work; int page_order; /* allocation order */ @@ -55,6 +56,15 @@ struct ring_buffer { }; extern void rb_free(struct ring_buffer *rb); + +static inline void rb_free_rcu(struct rcu_head *rcu_head) +{ + struct ring_buffer *rb; + + rb = container_of(rcu_head, struct ring_buffer, rcu_head); + rb_free(rb); +} + extern struct ring_buffer * rb_alloc(int nr_pages, long watermark, int cpu, int flags); extern void perf_event_wakeup(struct perf_event *event); diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 96472824a752..b2be01b1aa9d 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -221,6 +221,8 @@ void perf_output_end(struct perf_output_handle *handle) rcu_read_unlock(); } +static void rb_irq_work(struct irq_work *work); + static void ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) { @@ -241,6 +243,16 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) INIT_LIST_HEAD(&rb->event_list); spin_lock_init(&rb->event_lock); + init_irq_work(&rb->irq_work, rb_irq_work); +} + +static void ring_buffer_put_async(struct ring_buffer *rb) +{ + if (!atomic_dec_and_test(&rb->refcount)) + return; + + rb->rcu_head.next = (void *)rb; + irq_work_queue(&rb->irq_work); } /* @@ -319,7 +331,7 @@ err_put: rb_free_aux(rb); err: - ring_buffer_put(rb); + ring_buffer_put_async(rb); handle->event = NULL; return NULL; @@ -370,7 +382,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, local_set(&rb->aux_nest, 0); rb_free_aux(rb); - ring_buffer_put(rb); + ring_buffer_put_async(rb); } /* @@ -557,7 +569,18 @@ static void __rb_free_aux(struct ring_buffer *rb) void rb_free_aux(struct ring_buffer *rb) { if (atomic_dec_and_test(&rb->aux_refcount)) + irq_work_queue(&rb->irq_work); +} + +static void rb_irq_work(struct irq_work *work) +{ + struct ring_buffer *rb = container_of(work, struct ring_buffer, irq_work); + + if (!atomic_read(&rb->aux_refcount)) __rb_free_aux(rb); + + if (rb->rcu_head.next == (void *)rb) + call_rcu(&rb->rcu_head, rb_free_rcu); } #ifndef CONFIG_PERF_USE_VMALLOC diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 4834ee828c41..61008b8433ab 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -76,12 +76,8 @@ extern void unmask_threaded_irq(struct irq_desc *desc); #ifdef CONFIG_SPARSE_IRQ static inline void irq_mark_irq(unsigned int irq) { } -extern void irq_lock_sparse(void); -extern void irq_unlock_sparse(void); #else extern void irq_mark_irq(unsigned int irq); -static inline void irq_lock_sparse(void) { } -static inline void irq_unlock_sparse(void) { } #endif extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); diff --git a/kernel/module.c b/kernel/module.c index 3e0e19763d24..4d2b82e610e2 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3557,6 +3557,7 @@ static int load_module(struct load_info *info, const char __user *uargs, mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); + mod_tree_remove(mod); wake_up_all(&module_wq); /* Wait for RCU-sched synchronizing before releasing mod->list. */ synchronize_sched(); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b803e1b8ab0c..78b4bad10081 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2164,7 +2164,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) set_task_cpu(p, cpu); raw_spin_unlock_irqrestore(&p->pi_lock, flags); -#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) +#ifdef CONFIG_SCHED_INFO if (likely(sched_info_on())) memset(&p->sched_info, 0, sizeof(p->sched_info)); #endif @@ -2320,13 +2320,27 @@ void wake_up_new_task(struct task_struct *p) static struct static_key preempt_notifier_key = STATIC_KEY_INIT_FALSE; +void preempt_notifier_inc(void) +{ + static_key_slow_inc(&preempt_notifier_key); +} +EXPORT_SYMBOL_GPL(preempt_notifier_inc); + +void preempt_notifier_dec(void) +{ + static_key_slow_dec(&preempt_notifier_key); +} +EXPORT_SYMBOL_GPL(preempt_notifier_dec); + /** * preempt_notifier_register - tell me when current is being preempted & rescheduled * @notifier: notifier struct to register */ void preempt_notifier_register(struct preempt_notifier *notifier) { - static_key_slow_inc(&preempt_notifier_key); + if (!static_key_false(&preempt_notifier_key)) + WARN(1, "registering preempt_notifier while notifiers disabled\n"); + hlist_add_head(¬ifier->link, ¤t->preempt_notifiers); } EXPORT_SYMBOL_GPL(preempt_notifier_register); @@ -2340,7 +2354,6 @@ EXPORT_SYMBOL_GPL(preempt_notifier_register); void preempt_notifier_unregister(struct preempt_notifier *notifier) { hlist_del(¬ifier->link); - static_key_slow_dec(&preempt_notifier_key); } EXPORT_SYMBOL_GPL(preempt_notifier_unregister); diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 315c68e015d9..4222ec50ab88 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -142,7 +142,7 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) 0LL, 0L); #endif #ifdef CONFIG_NUMA_BALANCING - SEQ_printf(m, " %d", task_node(p)); + SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p)); #endif #ifdef CONFIG_CGROUP_SCHED SEQ_printf(m, " %s", task_group_path(task_group(p))); @@ -517,11 +517,21 @@ __initcall(init_sched_debug_procfs); SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F)) +#ifdef CONFIG_NUMA_BALANCING +void print_numa_stats(struct seq_file *m, int node, unsigned long tsf, + unsigned long tpf, unsigned long gsf, unsigned long gpf) +{ + SEQ_printf(m, "numa_faults node=%d ", node); + SEQ_printf(m, "task_private=%lu task_shared=%lu ", tsf, tpf); + SEQ_printf(m, "group_private=%lu group_shared=%lu\n", gsf, gpf); +} +#endif + + static void sched_show_numa(struct task_struct *p, struct seq_file *m) { #ifdef CONFIG_NUMA_BALANCING struct mempolicy *pol; - int node, i; if (p->mm) P(mm->numa_scan_seq); @@ -533,26 +543,12 @@ static void sched_show_numa(struct task_struct *p, struct seq_file *m) mpol_get(pol); task_unlock(p); - SEQ_printf(m, "numa_migrations, %ld\n", xchg(&p->numa_pages_migrated, 0)); - - for_each_online_node(node) { - for (i = 0; i < 2; i++) { - unsigned long nr_faults = -1; - int cpu_current, home_node; - - if (p->numa_faults) - nr_faults = p->numa_faults[2*node + i]; - - cpu_current = !i ? (task_node(p) == node) : - (pol && node_isset(node, pol->v.nodes)); - - home_node = (p->numa_preferred_nid == node); - - SEQ_printf(m, "numa_faults_memory, %d, %d, %d, %d, %ld\n", - i, node, cpu_current, home_node, nr_faults); - } - } - + P(numa_pages_migrated); + P(numa_preferred_nid); + P(total_numa_faults); + SEQ_printf(m, "current_node=%d, numa_group_id=%d\n", + task_node(p), task_numa_group_id(p)); + show_numa_stats(p, m); mpol_put(pol); #endif } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3d57cc0ca0a6..65c8f3ebdc3c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8473,7 +8473,27 @@ void print_cfs_stats(struct seq_file *m, int cpu) print_cfs_rq(m, cpu, cfs_rq); rcu_read_unlock(); } -#endif + +#ifdef CONFIG_NUMA_BALANCING +void show_numa_stats(struct task_struct *p, struct seq_file *m) +{ + int node; + unsigned long tsf = 0, tpf = 0, gsf = 0, gpf = 0; + + for_each_online_node(node) { + if (p->numa_faults) { + tsf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 0)]; + tpf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 1)]; + } + if (p->numa_group) { + gsf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 0)], + gpf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 1)]; + } + print_numa_stats(m, node, tsf, tpf, gsf, gpf); + } +} +#endif /* CONFIG_NUMA_BALANCING */ +#endif /* CONFIG_SCHED_DEBUG */ __init void init_sched_fair_class(void) { diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 885889190a1f..84d48790bb6d 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1689,9 +1689,22 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2) extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq); extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq); + +#ifdef CONFIG_SCHED_DEBUG extern void print_cfs_stats(struct seq_file *m, int cpu); extern void print_rt_stats(struct seq_file *m, int cpu); extern void print_dl_stats(struct seq_file *m, int cpu); +extern void +print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); + +#ifdef CONFIG_NUMA_BALANCING +extern void +show_numa_stats(struct task_struct *p, struct seq_file *m); +extern void +print_numa_stats(struct seq_file *m, int node, unsigned long tsf, + unsigned long tpf, unsigned long gsf, unsigned long gpf); +#endif /* CONFIG_NUMA_BALANCING */ +#endif /* CONFIG_SCHED_DEBUG */ extern void init_cfs_rq(struct cfs_rq *cfs_rq); extern void init_rt_rq(struct rt_rq *rt_rq); diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 077ebbd5e10f..b0fbc7632de5 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -47,7 +47,7 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta) # define schedstat_set(var, val) do { } while (0) #endif -#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) +#ifdef CONFIG_SCHED_INFO static inline void sched_info_reset_dequeued(struct task_struct *t) { t->sched_info.last_queued = 0; @@ -156,7 +156,7 @@ sched_info_switch(struct rq *rq, #define sched_info_depart(rq, t) do { } while (0) #define sched_info_arrive(rq, next) do { } while (0) #define sched_info_switch(rq, t, next) do { } while (0) -#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ +#endif /* CONFIG_SCHED_INFO */ /* * The following are functions that support scheduler-internal time accounting. diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 812fcc3fd390..19b62b522158 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1538,12 +1538,6 @@ static struct ctl_table vm_table[] = { { } }; -#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE) -static struct ctl_table binfmt_misc_table[] = { - { } -}; -#endif - static struct ctl_table fs_table[] = { { .procname = "inode-nr", @@ -1697,7 +1691,7 @@ static struct ctl_table fs_table[] = { { .procname = "binfmt_misc", .mode = 0555, - .child = binfmt_misc_table, + .child = sysctl_mount_point, }, #endif { diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 08ccc3da3ca0..50eb107f1198 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -120,19 +120,25 @@ static int __clockevents_switch_state(struct clock_event_device *dev, /* The clockevent device is getting replaced. Shut it down. */ case CLOCK_EVT_STATE_SHUTDOWN: - return dev->set_state_shutdown(dev); + if (dev->set_state_shutdown) + return dev->set_state_shutdown(dev); + return 0; case CLOCK_EVT_STATE_PERIODIC: /* Core internal bug */ if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC)) return -ENOSYS; - return dev->set_state_periodic(dev); + if (dev->set_state_periodic) + return dev->set_state_periodic(dev); + return 0; case CLOCK_EVT_STATE_ONESHOT: /* Core internal bug */ if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT)) return -ENOSYS; - return dev->set_state_oneshot(dev); + if (dev->set_state_oneshot) + return dev->set_state_oneshot(dev); + return 0; case CLOCK_EVT_STATE_ONESHOT_STOPPED: /* Core internal bug */ @@ -471,18 +477,6 @@ static int clockevents_sanity_check(struct clock_event_device *dev) if (dev->features & CLOCK_EVT_FEAT_DUMMY) return 0; - /* New state-specific callbacks */ - if (!dev->set_state_shutdown) - return -EINVAL; - - if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) && - !dev->set_state_periodic) - return -EINVAL; - - if ((dev->features & CLOCK_EVT_FEAT_ONESHOT) && - !dev->set_state_oneshot) - return -EINVAL; - return 0; } diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index d39f32cdd1b5..52b9e199b5ac 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -159,7 +159,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { struct clock_event_device *bc = tick_broadcast_device.evtdev; unsigned long flags; - int ret; + int ret = 0; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); @@ -221,13 +221,14 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) * If we kept the cpu in the broadcast mask, * tell the caller to leave the per cpu device * in shutdown state. The periodic interrupt - * is delivered by the broadcast device. + * is delivered by the broadcast device, if + * the broadcast device exists and is not + * hrtimer based. */ - ret = cpumask_test_cpu(cpu, tick_broadcast_mask); + if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER)) + ret = cpumask_test_cpu(cpu, tick_broadcast_mask); break; default: - /* Nothing to do */ - ret = 0; break; } } @@ -265,8 +266,22 @@ static bool tick_do_broadcast(struct cpumask *mask) * Check, if the current cpu is in the mask */ if (cpumask_test_cpu(cpu, mask)) { + struct clock_event_device *bc = tick_broadcast_device.evtdev; + cpumask_clear_cpu(cpu, mask); - local = true; + /* + * We only run the local handler, if the broadcast + * device is not hrtimer based. Otherwise we run into + * a hrtimer recursion. + * + * local timer_interrupt() + * local_handler() + * expire_hrtimers() + * bc_handler() + * local_handler() + * expire_hrtimers() + */ + local = !(bc->features & CLOCK_EVT_FEAT_HRTIMER); } if (!cpumask_empty(mask)) { @@ -301,6 +316,13 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev) bool bc_local; raw_spin_lock(&tick_broadcast_lock); + + /* Handle spurious interrupts gracefully */ + if (clockevent_state_shutdown(tick_broadcast_device.evtdev)) { + raw_spin_unlock(&tick_broadcast_lock); + return; + } + bc_local = tick_do_periodic_broadcast(); if (clockevent_state_oneshot(dev)) { @@ -359,8 +381,16 @@ void tick_broadcast_control(enum tick_broadcast_mode mode) case TICK_BROADCAST_ON: cpumask_set_cpu(cpu, tick_broadcast_on); if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { - if (tick_broadcast_device.mode == - TICKDEV_MODE_PERIODIC) + /* + * Only shutdown the cpu local device, if: + * + * - the broadcast device exists + * - the broadcast device is not a hrtimer based one + * - the broadcast device is in periodic mode to + * avoid a hickup during switch to oneshot mode + */ + if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER) && + tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) clockevents_shutdown(dev); } break; @@ -379,14 +409,16 @@ void tick_broadcast_control(enum tick_broadcast_mode mode) break; } - if (cpumask_empty(tick_broadcast_mask)) { - if (!bc_stopped) - clockevents_shutdown(bc); - } else if (bc_stopped) { - if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) - tick_broadcast_start_periodic(bc); - else - tick_broadcast_setup_oneshot(bc); + if (bc) { + if (cpumask_empty(tick_broadcast_mask)) { + if (!bc_stopped) + clockevents_shutdown(bc); + } else if (bc_stopped) { + if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) + tick_broadcast_start_periodic(bc); + else + tick_broadcast_setup_oneshot(bc); + } } raw_spin_unlock(&tick_broadcast_lock); } @@ -662,71 +694,82 @@ static void broadcast_shutdown_local(struct clock_event_device *bc, clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN); } -/** - * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode - * @state: The target state (enter/exit) - * - * The system enters/leaves a state, where affected devices might stop - * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups. - * - * Called with interrupts disabled, so clockevents_lock is not - * required here because the local clock event device cannot go away - * under us. - */ -int tick_broadcast_oneshot_control(enum tick_broadcast_state state) +int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) { struct clock_event_device *bc, *dev; - struct tick_device *td; int cpu, ret = 0; ktime_t now; /* - * Periodic mode does not care about the enter/exit of power - * states + * If there is no broadcast device, tell the caller not to go + * into deep idle. */ - if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) - return 0; + if (!tick_broadcast_device.evtdev) + return -EBUSY; - /* - * We are called with preemtion disabled from the depth of the - * idle code, so we can't be moved away. - */ - td = this_cpu_ptr(&tick_cpu_device); - dev = td->evtdev; - - if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) - return 0; + dev = this_cpu_ptr(&tick_cpu_device)->evtdev; raw_spin_lock(&tick_broadcast_lock); bc = tick_broadcast_device.evtdev; cpu = smp_processor_id(); if (state == TICK_BROADCAST_ENTER) { + /* + * If the current CPU owns the hrtimer broadcast + * mechanism, it cannot go deep idle and we do not add + * the CPU to the broadcast mask. We don't have to go + * through the EXIT path as the local timer is not + * shutdown. + */ + ret = broadcast_needs_cpu(bc, cpu); + if (ret) + goto out; + + /* + * If the broadcast device is in periodic mode, we + * return. + */ + if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { + /* If it is a hrtimer based broadcast, return busy */ + if (bc->features & CLOCK_EVT_FEAT_HRTIMER) + ret = -EBUSY; + goto out; + } + if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); + + /* Conditionally shut down the local timer. */ broadcast_shutdown_local(bc, dev); + /* * We only reprogram the broadcast timer if we * did not mark ourself in the force mask and * if the cpu local event is earlier than the * broadcast event. If the current CPU is in * the force mask, then we are going to be - * woken by the IPI right away. + * woken by the IPI right away; we return + * busy, so the CPU does not try to go deep + * idle. */ - if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) && - dev->next_event.tv64 < bc->next_event.tv64) + if (cpumask_test_cpu(cpu, tick_broadcast_force_mask)) { + ret = -EBUSY; + } else if (dev->next_event.tv64 < bc->next_event.tv64) { tick_broadcast_set_event(bc, cpu, dev->next_event); + /* + * In case of hrtimer broadcasts the + * programming might have moved the + * timer to this cpu. If yes, remove + * us from the broadcast mask and + * return busy. + */ + ret = broadcast_needs_cpu(bc, cpu); + if (ret) { + cpumask_clear_cpu(cpu, + tick_broadcast_oneshot_mask); + } + } } - /* - * If the current CPU owns the hrtimer broadcast - * mechanism, it cannot go deep idle and we remove the - * CPU from the broadcast mask. We don't have to go - * through the EXIT path as the local timer is not - * shutdown. - */ - ret = broadcast_needs_cpu(bc, cpu); - if (ret) - cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); } else { if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); @@ -938,6 +981,16 @@ bool tick_broadcast_oneshot_available(void) return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false; } +#else +int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) +{ + struct clock_event_device *bc = tick_broadcast_device.evtdev; + + if (!bc || (bc->features & CLOCK_EVT_FEAT_HRTIMER)) + return -EBUSY; + + return 0; +} #endif void __init tick_broadcast_init(void) diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 76446cb5dfe1..55e13efff1ab 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -343,6 +343,27 @@ out_bc: tick_install_broadcast_device(newdev); } +/** + * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode + * @state: The target state (enter/exit) + * + * The system enters/leaves a state, where affected devices might stop + * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups. + * + * Called with interrupts disabled, so clockevents_lock is not + * required here because the local clock event device cannot go away + * under us. + */ +int tick_broadcast_oneshot_control(enum tick_broadcast_state state) +{ + struct tick_device *td = this_cpu_ptr(&tick_cpu_device); + + if (!(td->evtdev->features & CLOCK_EVT_FEAT_C3STOP)) + return 0; + + return __tick_broadcast_oneshot_control(state); +} + #ifdef CONFIG_HOTPLUG_CPU /* * Transfer the do_timer job away from a dying cpu. diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index 42fdf4958bcc..a4a8d4e9baa1 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -71,4 +71,14 @@ extern void tick_cancel_sched_timer(int cpu); static inline void tick_cancel_sched_timer(int cpu) { } #endif +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +extern int __tick_broadcast_oneshot_control(enum tick_broadcast_state state); +#else +static inline int +__tick_broadcast_oneshot_control(enum tick_broadcast_state state) +{ + return -EBUSY; +} +#endif + #endif diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index b908048f8d6a..e2894b23efb6 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -841,9 +841,14 @@ config SCHED_DEBUG that can help debug the scheduler. The runtime overhead of this option is minimal. +config SCHED_INFO + bool + default n + config SCHEDSTATS bool "Collect scheduler statistics" depends on DEBUG_KERNEL && PROC_FS + select SCHED_INFO help If you say Y here, additional code will be inserted into the scheduler and related routines to collect statistics about diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 777eda7d1ab4..39f24d6721e5 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -18,10 +18,6 @@ config KASAN For better error detection enable CONFIG_STACKTRACE, and add slub_debug=U to boot cmdline. -config KASAN_SHADOW_OFFSET - hex - default 0xdffffc0000000000 if X86_64 - choice prompt "Instrumentation type" depends on KASAN diff --git a/lib/Makefile b/lib/Makefile index ff37c8c2f7b2..6897b527581a 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -45,6 +45,9 @@ CFLAGS_kobject.o += -DDEBUG CFLAGS_kobject_uevent.o += -DDEBUG endif +obj-$(CONFIG_DEBUG_INFO_REDUCED) += debug_info.o +CFLAGS_debug_info.o += $(call cc-option, -femit-struct-debug-detailed=any) + obj-$(CONFIG_GENERIC_IOMAP) += iomap.o obj-$(CONFIG_GENERIC_PCI_IOMAP) += pci_iomap.o obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c index dfe6ec17c0a5..1ad33e555805 100644 --- a/lib/crc-t10dif.c +++ b/lib/crc-t10dif.c @@ -19,7 +19,7 @@ static struct crypto_shash *crct10dif_tfm; static struct static_key crct10dif_fallback __read_mostly; -__u16 crc_t10dif(const unsigned char *buffer, size_t len) +__u16 crc_t10dif_update(__u16 crc, const unsigned char *buffer, size_t len) { struct { struct shash_desc shash; @@ -28,17 +28,23 @@ __u16 crc_t10dif(const unsigned char *buffer, size_t len) int err; if (static_key_false(&crct10dif_fallback)) - return crc_t10dif_generic(0, buffer, len); + return crc_t10dif_generic(crc, buffer, len); desc.shash.tfm = crct10dif_tfm; desc.shash.flags = 0; - *(__u16 *)desc.ctx = 0; + *(__u16 *)desc.ctx = crc; err = crypto_shash_update(&desc.shash, buffer, len); BUG_ON(err); return *(__u16 *)desc.ctx; } +EXPORT_SYMBOL(crc_t10dif_update); + +__u16 crc_t10dif(const unsigned char *buffer, size_t len) +{ + return crc_t10dif_update(0, buffer, len); +} EXPORT_SYMBOL(crc_t10dif); static int __init crc_t10dif_mod_init(void) diff --git a/lib/debug_info.c b/lib/debug_info.c new file mode 100644 index 000000000000..2edbe27517ed --- /dev/null +++ b/lib/debug_info.c @@ -0,0 +1,27 @@ +/* + * This file exists solely to ensure debug information for some core + * data structures is included in the final image even for + * CONFIG_DEBUG_INFO_REDUCED. Please do not add actual code. However, + * adding appropriate #includes is fine. + */ +#include <stdarg.h> + +#include <linux/cred.h> +#include <linux/crypto.h> +#include <linux/dcache.h> +#include <linux/device.h> +#include <linux/fs.h> +#include <linux/fscache-cache.h> +#include <linux/io.h> +#include <linux/kallsyms.h> +#include <linux/kernel.h> +#include <linux/kobject.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/net.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <net/addrconf.h> +#include <net/sock.h> +#include <net/tcp.h> diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 7756da31b02b..dac5bf59309d 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -287,7 +287,7 @@ void wb_wakeup_delayed(struct bdi_writeback *wb) #define INIT_BW (100 << (20 - PAGE_SHIFT)) static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi, - gfp_t gfp) + int blkcg_id, gfp_t gfp) { int i, err; @@ -311,21 +311,29 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi, INIT_LIST_HEAD(&wb->work_list); INIT_DELAYED_WORK(&wb->dwork, wb_workfn); + wb->congested = wb_congested_get_create(bdi, blkcg_id, gfp); + if (!wb->congested) + return -ENOMEM; + err = fprop_local_init_percpu(&wb->completions, gfp); if (err) - return err; + goto out_put_cong; for (i = 0; i < NR_WB_STAT_ITEMS; i++) { err = percpu_counter_init(&wb->stat[i], 0, gfp); - if (err) { - while (--i) - percpu_counter_destroy(&wb->stat[i]); - fprop_local_destroy_percpu(&wb->completions); - return err; - } + if (err) + goto out_destroy_stat; } return 0; + +out_destroy_stat: + while (--i) + percpu_counter_destroy(&wb->stat[i]); + fprop_local_destroy_percpu(&wb->completions); +out_put_cong: + wb_congested_put(wb->congested); + return err; } /* @@ -361,6 +369,7 @@ static void wb_exit(struct bdi_writeback *wb) percpu_counter_destroy(&wb->stat[i]); fprop_local_destroy_percpu(&wb->completions); + wb_congested_put(wb->congested); } #ifdef CONFIG_CGROUP_WRITEBACK @@ -392,9 +401,6 @@ wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp) struct bdi_writeback_congested *new_congested = NULL, *congested; struct rb_node **node, *parent; unsigned long flags; - - if (blkcg_id == 1) - return &bdi->wb_congested; retry: spin_lock_irqsave(&cgwb_lock, flags); @@ -419,7 +425,6 @@ retry: new_congested = NULL; rb_link_node(&congested->rb_node, parent, node); rb_insert_color(&congested->rb_node, &bdi->cgwb_congested_tree); - atomic_inc(&bdi->usage_cnt); goto found; } @@ -450,24 +455,23 @@ found: */ void wb_congested_put(struct bdi_writeback_congested *congested) { - struct backing_dev_info *bdi = congested->bdi; unsigned long flags; - if (congested->blkcg_id == 1) - return; - local_irq_save(flags); if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) { local_irq_restore(flags); return; } - rb_erase(&congested->rb_node, &congested->bdi->cgwb_congested_tree); + /* bdi might already have been destroyed leaving @congested unlinked */ + if (congested->bdi) { + rb_erase(&congested->rb_node, + &congested->bdi->cgwb_congested_tree); + congested->bdi = NULL; + } + spin_unlock_irqrestore(&cgwb_lock, flags); kfree(congested); - - if (atomic_dec_and_test(&bdi->usage_cnt)) - wake_up_all(&cgwb_release_wait); } static void cgwb_release_workfn(struct work_struct *work) @@ -480,7 +484,6 @@ static void cgwb_release_workfn(struct work_struct *work) css_put(wb->memcg_css); css_put(wb->blkcg_css); - wb_congested_put(wb->congested); fprop_local_destroy_percpu(&wb->memcg_completions); percpu_ref_exit(&wb->refcnt); @@ -541,7 +544,7 @@ static int cgwb_create(struct backing_dev_info *bdi, if (!wb) return -ENOMEM; - ret = wb_init(wb, bdi, gfp); + ret = wb_init(wb, bdi, blkcg_css->id, gfp); if (ret) goto err_free; @@ -553,12 +556,6 @@ static int cgwb_create(struct backing_dev_info *bdi, if (ret) goto err_ref_exit; - wb->congested = wb_congested_get_create(bdi, blkcg_css->id, gfp); - if (!wb->congested) { - ret = -ENOMEM; - goto err_fprop_exit; - } - wb->memcg_css = memcg_css; wb->blkcg_css = blkcg_css; INIT_WORK(&wb->release_work, cgwb_release_workfn); @@ -588,12 +585,10 @@ static int cgwb_create(struct backing_dev_info *bdi, if (ret) { if (ret == -EEXIST) ret = 0; - goto err_put_congested; + goto err_fprop_exit; } goto out_put; -err_put_congested: - wb_congested_put(wb->congested); err_fprop_exit: fprop_local_destroy_percpu(&wb->memcg_completions); err_ref_exit: @@ -662,26 +657,41 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi, return wb; } -static void cgwb_bdi_init(struct backing_dev_info *bdi) +static int cgwb_bdi_init(struct backing_dev_info *bdi) { - bdi->wb.memcg_css = mem_cgroup_root_css; - bdi->wb.blkcg_css = blkcg_root_css; - bdi->wb_congested.blkcg_id = 1; + int ret; + INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC); bdi->cgwb_congested_tree = RB_ROOT; atomic_set(&bdi->usage_cnt, 1); + + ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL); + if (!ret) { + bdi->wb.memcg_css = mem_cgroup_root_css; + bdi->wb.blkcg_css = blkcg_root_css; + } + return ret; } static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { struct radix_tree_iter iter; + struct bdi_writeback_congested *congested, *congested_n; void **slot; WARN_ON(test_bit(WB_registered, &bdi->wb.state)); spin_lock_irq(&cgwb_lock); + radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0) cgwb_kill(*slot); + + rbtree_postorder_for_each_entry_safe(congested, congested_n, + &bdi->cgwb_congested_tree, rb_node) { + rb_erase(&congested->rb_node, &bdi->cgwb_congested_tree); + congested->bdi = NULL; /* mark @congested unlinked */ + } + spin_unlock_irq(&cgwb_lock); /* @@ -732,15 +742,28 @@ void wb_blkcg_offline(struct blkcg *blkcg) #else /* CONFIG_CGROUP_WRITEBACK */ -static void cgwb_bdi_init(struct backing_dev_info *bdi) { } +static int cgwb_bdi_init(struct backing_dev_info *bdi) +{ + int err; + + bdi->wb_congested = kzalloc(sizeof(*bdi->wb_congested), GFP_KERNEL); + if (!bdi->wb_congested) + return -ENOMEM; + + err = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL); + if (err) { + kfree(bdi->wb_congested); + return err; + } + return 0; +} + static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { } #endif /* CONFIG_CGROUP_WRITEBACK */ int bdi_init(struct backing_dev_info *bdi) { - int err; - bdi->dev = NULL; bdi->min_ratio = 0; @@ -749,15 +772,7 @@ int bdi_init(struct backing_dev_info *bdi) INIT_LIST_HEAD(&bdi->bdi_list); init_waitqueue_head(&bdi->wb_waitq); - err = wb_init(&bdi->wb, bdi, GFP_KERNEL); - if (err) - return err; - - bdi->wb_congested.state = 0; - bdi->wb.congested = &bdi->wb_congested; - - cgwb_bdi_init(bdi); - return 0; + return cgwb_bdi_init(bdi); } EXPORT_SYMBOL(bdi_init); diff --git a/mm/filemap.c b/mm/filemap.c index 11f10efd637c..1283fc825458 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2563,7 +2563,7 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) /* We can write back this queue in page reclaim */ current->backing_dev_info = inode_to_bdi(inode); - err = file_remove_suid(file); + err = file_remove_privs(file); if (err) goto out; diff --git a/mm/memory.c b/mm/memory.c index 11b9ca176740..388dcf9aa283 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2670,6 +2670,10 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, pte_unmap(page_table); + /* File mapping without ->vm_ops ? */ + if (vma->vm_flags & VM_SHARED) + return VM_FAULT_SIGBUS; + /* Check if we need to add a guard page to the stack */ if (check_stack_guard_page(vma, address) < 0) return VM_FAULT_SIGSEGV; @@ -3099,6 +3103,9 @@ static int do_fault(struct mm_struct *mm, struct vm_area_struct *vma, - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; pte_unmap(page_table); + /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ + if (!vma->vm_ops->fault) + return VM_FAULT_SIGBUS; if (!(flags & FAULT_FLAG_WRITE)) return do_read_fault(mm, vma, address, pmd, pgoff, flags, orig_pte); @@ -3244,13 +3251,12 @@ static int handle_pte_fault(struct mm_struct *mm, barrier(); if (!pte_present(entry)) { if (pte_none(entry)) { - if (vma->vm_ops) { - if (likely(vma->vm_ops->fault)) - return do_fault(mm, vma, address, pte, - pmd, flags, entry); - } - return do_anonymous_page(mm, vma, address, - pte, pmd, flags); + if (vma->vm_ops) + return do_fault(mm, vma, address, pte, pmd, + flags, entry); + + return do_anonymous_page(mm, vma, address, pte, pmd, + flags); } return do_swap_page(mm, vma, address, pte, pmd, flags, entry); @@ -3726,7 +3732,7 @@ void print_vma_addr(char *prefix, unsigned long ip) if (buf) { char *p; - p = d_path(&f->f_path, buf, PAGE_SIZE); + p = file_path(f, buf, PAGE_SIZE); if (IS_ERR(p)) p = "?"; printk("%s%s[%lx+%lx]", prefix, kbasename(p), diff --git a/mm/swapfile.c b/mm/swapfile.c index a7e72103f23b..41e4581af7c5 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2032,7 +2032,7 @@ static int swap_show(struct seq_file *swap, void *v) } file = si->swap_file; - len = seq_path(swap, &file->f_path, " \t\n\\"); + len = seq_file_path(swap, file, " \t\n\\"); seq_printf(swap, "%*s%s\t%u\t%u\t%d\n", len < 40 ? 40 - len : 1, " ", S_ISBLK(file_inode(file)->i_mode) ? diff --git a/net/9p/client.c b/net/9p/client.c index 6f4c4c88db84..498454b3c06c 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -843,7 +843,8 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, if (err < 0) { if (err == -EIO) c->status = Disconnected; - goto reterr; + if (err != -ERESTARTSYS) + goto reterr; } if (req->status == REQ_STATUS_ERROR) { p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); @@ -1582,6 +1583,10 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err) p9_free_req(clnt, req); break; } + if (rsize < count) { + pr_err("bogus RREAD count (%d > %d)\n", count, rsize); + count = rsize; + } p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count); if (!count) { @@ -1647,6 +1652,11 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) if (*err) { trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); + break; + } + if (rsize < count) { + pr_err("bogus RWRITE count (%d > %d)\n", count, rsize); + count = rsize; } p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count); diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 79e8f71aef5b..f30329f72641 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -9,6 +9,7 @@ #include <keys/ceph-type.h> #include <linux/module.h> #include <linux/mount.h> +#include <linux/nsproxy.h> #include <linux/parser.h> #include <linux/sched.h> #include <linux/seq_file.h> @@ -16,8 +17,6 @@ #include <linux/statfs.h> #include <linux/string.h> #include <linux/vmalloc.h> -#include <linux/nsproxy.h> -#include <net/net_namespace.h> #include <linux/ceph/ceph_features.h> @@ -131,6 +130,13 @@ int ceph_compare_options(struct ceph_options *new_opt, int i; int ret; + /* + * Don't bother comparing options if network namespaces don't + * match. + */ + if (!net_eq(current->nsproxy->net_ns, read_pnet(&client->msgr.net))) + return -1; + ret = memcmp(opt1, opt2, ofs); if (ret) return ret; @@ -335,9 +341,6 @@ ceph_parse_options(char *options, const char *dev_name, int err = -ENOMEM; substring_t argstr[MAX_OPT_ARGS]; - if (current->nsproxy->net_ns != &init_net) - return ERR_PTR(-EINVAL); - opt = kzalloc(sizeof(*opt), GFP_KERNEL); if (!opt) return ERR_PTR(-ENOMEM); @@ -352,8 +355,8 @@ ceph_parse_options(char *options, const char *dev_name, /* start with defaults */ opt->flags = CEPH_OPT_DEFAULT; opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; - opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ - opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */ + opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; + opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* get mon ip(s) */ /* ip1[:port1][,ip2[:port2]...] */ @@ -439,13 +442,32 @@ ceph_parse_options(char *options, const char *dev_name, pr_warn("ignoring deprecated osdtimeout option\n"); break; case Opt_osdkeepalivetimeout: - opt->osd_keepalive_timeout = intval; + /* 0 isn't well defined right now, reject it */ + if (intval < 1 || intval > INT_MAX / 1000) { + pr_err("osdkeepalive out of range\n"); + err = -EINVAL; + goto out; + } + opt->osd_keepalive_timeout = + msecs_to_jiffies(intval * 1000); break; case Opt_osd_idle_ttl: - opt->osd_idle_ttl = intval; + /* 0 isn't well defined right now, reject it */ + if (intval < 1 || intval > INT_MAX / 1000) { + pr_err("osd_idle_ttl out of range\n"); + err = -EINVAL; + goto out; + } + opt->osd_idle_ttl = msecs_to_jiffies(intval * 1000); break; case Opt_mount_timeout: - opt->mount_timeout = intval; + /* 0 is "wait forever" (i.e. infinite timeout) */ + if (intval < 0 || intval > INT_MAX / 1000) { + pr_err("mount_timeout out of range\n"); + err = -EINVAL; + goto out; + } + opt->mount_timeout = msecs_to_jiffies(intval * 1000); break; case Opt_share: @@ -512,12 +534,14 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client) seq_puts(m, "notcp_nodelay,"); if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) - seq_printf(m, "mount_timeout=%d,", opt->mount_timeout); + seq_printf(m, "mount_timeout=%d,", + jiffies_to_msecs(opt->mount_timeout) / 1000); if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) - seq_printf(m, "osd_idle_ttl=%d,", opt->osd_idle_ttl); + seq_printf(m, "osd_idle_ttl=%d,", + jiffies_to_msecs(opt->osd_idle_ttl) / 1000); if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) seq_printf(m, "osdkeepalivetimeout=%d,", - opt->osd_keepalive_timeout); + jiffies_to_msecs(opt->osd_keepalive_timeout) / 1000); /* drop redundant comma */ if (m->count != pos) @@ -587,6 +611,7 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private, fail_monc: ceph_monc_stop(&client->monc); fail: + ceph_messenger_fini(&client->msgr); kfree(client); return ERR_PTR(err); } @@ -600,8 +625,8 @@ void ceph_destroy_client(struct ceph_client *client) /* unmount */ ceph_osdc_stop(&client->osdc); - ceph_monc_stop(&client->monc); + ceph_messenger_fini(&client->msgr); ceph_debugfs_client_cleanup(client); @@ -626,8 +651,8 @@ static int have_mon_and_osd_map(struct ceph_client *client) */ int __ceph_open_session(struct ceph_client *client, unsigned long started) { - int err; - unsigned long timeout = client->options->mount_timeout * HZ; + unsigned long timeout = client->options->mount_timeout; + long err; /* open session, and wait for mon and osd maps */ err = ceph_monc_open_session(&client->monc); @@ -635,16 +660,15 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started) return err; while (!have_mon_and_osd_map(client)) { - err = -EIO; if (timeout && time_after_eq(jiffies, started + timeout)) - return err; + return -ETIMEDOUT; /* wait */ dout("mount waiting for mon_map\n"); err = wait_event_interruptible_timeout(client->auth_wq, have_mon_and_osd_map(client) || (client->auth_err < 0), - timeout); - if (err == -EINTR || err == -ERESTARTSYS) + ceph_timeout_jiffies(timeout)); + if (err < 0) return err; if (client->auth_err < 0) return client->auth_err; @@ -721,5 +745,5 @@ module_exit(exit_ceph_lib); MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); -MODULE_DESCRIPTION("Ceph filesystem for Linux"); +MODULE_DESCRIPTION("Ceph core library"); MODULE_LICENSE("GPL"); diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c index 9d84ce4ea0df..80d7c3a97cb8 100644 --- a/net/ceph/crush/crush.c +++ b/net/ceph/crush/crush.c @@ -1,15 +1,11 @@ - #ifdef __KERNEL__ # include <linux/slab.h> +# include <linux/crush/crush.h> #else -# include <stdlib.h> -# include <assert.h> -# define kfree(x) do { if (x) free(x); } while (0) -# define BUG_ON(x) assert(!(x)) +# include "crush_compat.h" +# include "crush.h" #endif -#include <linux/crush/crush.h> - const char *crush_bucket_alg_name(int alg) { switch (alg) { @@ -134,6 +130,9 @@ void crush_destroy(struct crush_map *map) kfree(map->rules); } +#ifndef __KERNEL__ + kfree(map->choose_tries); +#endif kfree(map); } diff --git a/net/ceph/crush/crush_ln_table.h b/net/ceph/crush/crush_ln_table.h index 6192c7fc958c..aae534c901a4 100644 --- a/net/ceph/crush/crush_ln_table.h +++ b/net/ceph/crush/crush_ln_table.h @@ -10,20 +10,20 @@ * */ -#if defined(__linux__) -#include <linux/types.h> -#elif defined(__FreeBSD__) -#include <sys/types.h> -#endif - #ifndef CEPH_CRUSH_LN_H #define CEPH_CRUSH_LN_H +#ifdef __KERNEL__ +# include <linux/types.h> +#else +# include "crush_compat.h" +#endif -// RH_LH_tbl[2*k] = 2^48/(1.0+k/128.0) -// RH_LH_tbl[2*k+1] = 2^48*log2(1.0+k/128.0) - -static int64_t __RH_LH_tbl[128*2+2] = { +/* + * RH_LH_tbl[2*k] = 2^48/(1.0+k/128.0) + * RH_LH_tbl[2*k+1] = 2^48*log2(1.0+k/128.0) + */ +static __s64 __RH_LH_tbl[128*2+2] = { 0x0001000000000000ll, 0x0000000000000000ll, 0x0000fe03f80fe040ll, 0x000002dfca16dde1ll, 0x0000fc0fc0fc0fc1ll, 0x000005b9e5a170b4ll, 0x0000fa232cf25214ll, 0x0000088e68ea899all, 0x0000f83e0f83e0f9ll, 0x00000b5d69bac77ell, 0x0000f6603d980f67ll, 0x00000e26fd5c8555ll, @@ -89,11 +89,12 @@ static int64_t __RH_LH_tbl[128*2+2] = { 0x0000820820820821ll, 0x0000fa2f045e7832ll, 0x000081848da8faf1ll, 0x0000fba577877d7dll, 0x0000810204081021ll, 0x0000fd1a708bbe11ll, 0x0000808080808081ll, 0x0000fe8df263f957ll, 0x0000800000000000ll, 0x0000ffff00000000ll, - }; - +}; - // LL_tbl[k] = 2^48*log2(1.0+k/2^15); -static int64_t __LL_tbl[256] = { +/* + * LL_tbl[k] = 2^48*log2(1.0+k/2^15) + */ +static __s64 __LL_tbl[256] = { 0x0000000000000000ull, 0x00000002e2a60a00ull, 0x000000070cb64ec5ull, 0x00000009ef50ce67ull, 0x0000000cd1e588fdull, 0x0000000fb4747e9cull, 0x0000001296fdaf5eull, 0x0000001579811b58ull, 0x000000185bfec2a1ull, 0x0000001b3e76a552ull, 0x0000001e20e8c380ull, 0x0000002103551d43ull, @@ -160,7 +161,4 @@ static int64_t __LL_tbl[256] = { 0x000002d4562d2ec6ull, 0x000002d73330209dull, 0x000002da102d63b0ull, 0x000002dced24f814ull, }; - - - #endif diff --git a/net/ceph/crush/hash.c b/net/ceph/crush/hash.c index 5bb63e37a8a1..ed123af49eba 100644 --- a/net/ceph/crush/hash.c +++ b/net/ceph/crush/hash.c @@ -1,6 +1,8 @@ - -#include <linux/types.h> -#include <linux/crush/hash.h> +#ifdef __KERNEL__ +# include <linux/crush/hash.h> +#else +# include "hash.h" +#endif /* * Robert Jenkins' function for mixing 32-bit values diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index 5b47736d27d9..393bfb22d5bb 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -1,27 +1,31 @@ +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2015 Intel Corporation All Rights Reserved + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ #ifdef __KERNEL__ # include <linux/string.h> # include <linux/slab.h> # include <linux/bug.h> # include <linux/kernel.h> -# ifndef dprintk -# define dprintk(args...) -# endif +# include <linux/crush/crush.h> +# include <linux/crush/hash.h> #else -# include <string.h> -# include <stdio.h> -# include <stdlib.h> -# include <assert.h> -# define BUG_ON(x) assert(!(x)) -# define dprintk(args...) /* printf(args) */ -# define kmalloc(x, f) malloc(x) -# define kfree(x) free(x) +# include "crush_compat.h" +# include "crush.h" +# include "hash.h" #endif - -#include <linux/crush/crush.h> -#include <linux/crush/hash.h> #include "crush_ln_table.h" +#define dprintk(args...) /* printf(args) */ + /* * Implement the core CRUSH mapping algorithm. */ @@ -139,7 +143,7 @@ static int bucket_list_choose(struct crush_bucket_list *bucket, int i; for (i = bucket->h.size-1; i >= 0; i--) { - __u64 w = crush_hash32_4(bucket->h.hash,x, bucket->h.items[i], + __u64 w = crush_hash32_4(bucket->h.hash, x, bucket->h.items[i], r, bucket->h.id); w &= 0xffff; dprintk("list_choose i=%d x=%d r=%d item %d weight %x " @@ -238,43 +242,46 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket, return bucket->h.items[high]; } -// compute 2^44*log2(input+1) -uint64_t crush_ln(unsigned xin) +/* compute 2^44*log2(input+1) */ +static __u64 crush_ln(unsigned int xin) { - unsigned x=xin, x1; - int iexpon, index1, index2; - uint64_t RH, LH, LL, xl64, result; + unsigned int x = xin, x1; + int iexpon, index1, index2; + __u64 RH, LH, LL, xl64, result; - x++; + x++; - // normalize input - iexpon = 15; - while(!(x&0x18000)) { x<<=1; iexpon--; } + /* normalize input */ + iexpon = 15; + while (!(x & 0x18000)) { + x <<= 1; + iexpon--; + } - index1 = (x>>8)<<1; - // RH ~ 2^56/index1 - RH = __RH_LH_tbl[index1 - 256]; - // LH ~ 2^48 * log2(index1/256) - LH = __RH_LH_tbl[index1 + 1 - 256]; + index1 = (x >> 8) << 1; + /* RH ~ 2^56/index1 */ + RH = __RH_LH_tbl[index1 - 256]; + /* LH ~ 2^48 * log2(index1/256) */ + LH = __RH_LH_tbl[index1 + 1 - 256]; - // RH*x ~ 2^48 * (2^15 + xf), xf<2^8 - xl64 = (int64_t)x * RH; - xl64 >>= 48; - x1 = xl64; + /* RH*x ~ 2^48 * (2^15 + xf), xf<2^8 */ + xl64 = (__s64)x * RH; + xl64 >>= 48; + x1 = xl64; - result = iexpon; - result <<= (12 + 32); + result = iexpon; + result <<= (12 + 32); - index2 = x1 & 0xff; - // LL ~ 2^48*log2(1.0+index2/2^15) - LL = __LL_tbl[index2]; + index2 = x1 & 0xff; + /* LL ~ 2^48*log2(1.0+index2/2^15) */ + LL = __LL_tbl[index2]; - LH = LH + LL; + LH = LH + LL; - LH >>= (48-12 - 32); - result += LH; + LH >>= (48 - 12 - 32); + result += LH; - return result; + return result; } @@ -290,9 +297,9 @@ uint64_t crush_ln(unsigned xin) static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket, int x, int r) { - unsigned i, high = 0; - unsigned u; - unsigned w; + unsigned int i, high = 0; + unsigned int u; + unsigned int w; __s64 ln, draw, high_draw = 0; for (i = 0; i < bucket->h.size; i++) { @@ -567,6 +574,10 @@ reject: out[outpos] = item; outpos++; count--; +#ifndef __KERNEL__ + if (map->choose_tries && ftotal <= map->choose_total_tries) + map->choose_tries[ftotal]++; +#endif } dprintk("CHOOSE returns %d\n", outpos); @@ -610,6 +621,20 @@ static void crush_choose_indep(const struct crush_map *map, } for (ftotal = 0; left > 0 && ftotal < tries; ftotal++) { +#ifdef DEBUG_INDEP + if (out2 && ftotal) { + dprintk("%u %d a: ", ftotal, left); + for (rep = outpos; rep < endpos; rep++) { + dprintk(" %d", out[rep]); + } + dprintk("\n"); + dprintk("%u %d b: ", ftotal, left); + for (rep = outpos; rep < endpos; rep++) { + dprintk(" %d", out2[rep]); + } + dprintk("\n"); + } +#endif for (rep = outpos; rep < endpos; rep++) { if (out[rep] != CRUSH_ITEM_UNDEF) continue; @@ -726,6 +751,24 @@ static void crush_choose_indep(const struct crush_map *map, out2[rep] = CRUSH_ITEM_NONE; } } +#ifndef __KERNEL__ + if (map->choose_tries && ftotal <= map->choose_total_tries) + map->choose_tries[ftotal]++; +#endif +#ifdef DEBUG_INDEP + if (out2) { + dprintk("%u %d a: ", ftotal, left); + for (rep = outpos; rep < endpos; rep++) { + dprintk(" %d", out[rep]); + } + dprintk("\n"); + dprintk("%u %d b: ", ftotal, left); + for (rep = outpos; rep < endpos; rep++) { + dprintk(" %d", out2[rep]); + } + dprintk("\n"); + } +#endif } /** @@ -790,8 +833,15 @@ int crush_do_rule(const struct crush_map *map, switch (curstep->op) { case CRUSH_RULE_TAKE: - w[0] = curstep->arg1; - wsize = 1; + if ((curstep->arg1 >= 0 && + curstep->arg1 < map->max_devices) || + (-1-curstep->arg1 < map->max_buckets && + map->buckets[-1-curstep->arg1])) { + w[0] = curstep->arg1; + wsize = 1; + } else { + dprintk(" bad take value %d\n", curstep->arg1); + } break; case CRUSH_RULE_SET_CHOOSE_TRIES: @@ -877,7 +927,7 @@ int crush_do_rule(const struct crush_map *map, 0); } else { out_size = ((numrep < (result_max-osize)) ? - numrep : (result_max-osize)); + numrep : (result_max-osize)); crush_choose_indep( map, map->buckets[-1-w[i]], @@ -923,5 +973,3 @@ int crush_do_rule(const struct crush_map *map, } return result_len; } - - diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 073262fea6dd..e3be1d22a247 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -6,6 +6,7 @@ #include <linux/inet.h> #include <linux/kthread.h> #include <linux/net.h> +#include <linux/nsproxy.h> #include <linux/slab.h> #include <linux/socket.h> #include <linux/string.h> @@ -278,7 +279,6 @@ static void _ceph_msgr_exit(void) ceph_msgr_slab_exit(); BUG_ON(zero_page == NULL); - kunmap(zero_page); page_cache_release(zero_page); zero_page = NULL; } @@ -480,7 +480,7 @@ static int ceph_tcp_connect(struct ceph_connection *con) int ret; BUG_ON(con->sock); - ret = sock_create_kern(&init_net, con->peer_addr.in_addr.ss_family, + ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family, SOCK_STREAM, IPPROTO_TCP, &sock); if (ret) return ret; @@ -1545,7 +1545,7 @@ static int write_partial_message_data(struct ceph_connection *con) page = ceph_msg_data_next(&msg->cursor, &page_offset, &length, &last_piece); ret = ceph_tcp_sendpage(con->sock, page, page_offset, - length, last_piece); + length, !last_piece); if (ret <= 0) { if (do_datacrc) msg->footer.data_crc = cpu_to_le32(crc); @@ -1732,17 +1732,17 @@ static int verify_hello(struct ceph_connection *con) static bool addr_is_blank(struct sockaddr_storage *ss) { + struct in_addr *addr = &((struct sockaddr_in *)ss)->sin_addr; + struct in6_addr *addr6 = &((struct sockaddr_in6 *)ss)->sin6_addr; + switch (ss->ss_family) { case AF_INET: - return ((struct sockaddr_in *)ss)->sin_addr.s_addr == 0; + return addr->s_addr == htonl(INADDR_ANY); case AF_INET6: - return - ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[0] == 0 && - ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[1] == 0 && - ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[2] == 0 && - ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[3] == 0; + return ipv6_addr_any(addr6); + default: + return true; } - return false; } static int addr_port(struct sockaddr_storage *ss) @@ -2945,11 +2945,18 @@ void ceph_messenger_init(struct ceph_messenger *msgr, msgr->tcp_nodelay = tcp_nodelay; atomic_set(&msgr->stopping, 0); + write_pnet(&msgr->net, get_net(current->nsproxy->net_ns)); dout("%s %p\n", __func__, msgr); } EXPORT_SYMBOL(ceph_messenger_init); +void ceph_messenger_fini(struct ceph_messenger *msgr) +{ + put_net(read_pnet(&msgr->net)); +} +EXPORT_SYMBOL(ceph_messenger_fini); + static void clear_standby(struct ceph_connection *con) { /* come back from STANDBY? */ diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 2b3cf05e87b0..9d6ff1215928 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -298,21 +298,28 @@ void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc) } EXPORT_SYMBOL(ceph_monc_request_next_osdmap); +/* + * Wait for an osdmap with a given epoch. + * + * @epoch: epoch to wait for + * @timeout: in jiffies, 0 means "wait forever" + */ int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch, unsigned long timeout) { unsigned long started = jiffies; - int ret; + long ret; mutex_lock(&monc->mutex); while (monc->have_osdmap < epoch) { mutex_unlock(&monc->mutex); - if (timeout != 0 && time_after_eq(jiffies, started + timeout)) + if (timeout && time_after_eq(jiffies, started + timeout)) return -ETIMEDOUT; ret = wait_event_interruptible_timeout(monc->client->auth_wq, - monc->have_osdmap >= epoch, timeout); + monc->have_osdmap >= epoch, + ceph_timeout_jiffies(timeout)); if (ret < 0) return ret; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index c4ec9239249a..50033677c0fa 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -296,6 +296,9 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req, case CEPH_OSD_OP_CMPXATTR: ceph_osd_data_release(&op->xattr.osd_data); break; + case CEPH_OSD_OP_STAT: + ceph_osd_data_release(&op->raw_data_in); + break; default: break; } @@ -450,7 +453,7 @@ __CEPH_FORALL_OSD_OPS(GENERATE_CASE) */ static struct ceph_osd_req_op * _osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which, - u16 opcode) + u16 opcode, u32 flags) { struct ceph_osd_req_op *op; @@ -460,14 +463,15 @@ _osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which, op = &osd_req->r_ops[which]; memset(op, 0, sizeof (*op)); op->op = opcode; + op->flags = flags; return op; } void osd_req_op_init(struct ceph_osd_request *osd_req, - unsigned int which, u16 opcode) + unsigned int which, u16 opcode, u32 flags) { - (void)_osd_req_op_init(osd_req, which, opcode); + (void)_osd_req_op_init(osd_req, which, opcode, flags); } EXPORT_SYMBOL(osd_req_op_init); @@ -476,7 +480,8 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req, u64 offset, u64 length, u64 truncate_size, u32 truncate_seq) { - struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); + struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, + opcode, 0); size_t payload_len = 0; BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE && @@ -515,7 +520,8 @@ EXPORT_SYMBOL(osd_req_op_extent_update); void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, const char *class, const char *method) { - struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); + struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, + opcode, 0); struct ceph_pagelist *pagelist; size_t payload_len = 0; size_t size; @@ -552,7 +558,8 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, const char *name, const void *value, size_t size, u8 cmp_op, u8 cmp_mode) { - struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); + struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, + opcode, 0); struct ceph_pagelist *pagelist; size_t payload_len; @@ -585,7 +592,8 @@ void osd_req_op_watch_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, u64 cookie, u64 version, int flag) { - struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); + struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, + opcode, 0); BUG_ON(opcode != CEPH_OSD_OP_NOTIFY_ACK && opcode != CEPH_OSD_OP_WATCH); @@ -602,7 +610,8 @@ void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req, u64 expected_write_size) { struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, - CEPH_OSD_OP_SETALLOCHINT); + CEPH_OSD_OP_SETALLOCHINT, + 0); op->alloc_hint.expected_object_size = expected_object_size; op->alloc_hint.expected_write_size = expected_write_size; @@ -786,7 +795,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, } if (opcode == CEPH_OSD_OP_CREATE || opcode == CEPH_OSD_OP_DELETE) { - osd_req_op_init(req, which, opcode); + osd_req_op_init(req, which, opcode, 0); } else { u32 object_size = le32_to_cpu(layout->fl_object_size); u32 object_base = off - objoff; @@ -1088,7 +1097,7 @@ static void __move_osd_to_lru(struct ceph_osd_client *osdc, BUG_ON(!list_empty(&osd->o_osd_lru)); list_add_tail(&osd->o_osd_lru, &osdc->osd_lru); - osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ; + osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl; } static void maybe_move_osd_to_lru(struct ceph_osd_client *osdc, @@ -1199,7 +1208,7 @@ static struct ceph_osd *__lookup_osd(struct ceph_osd_client *osdc, int o) static void __schedule_osd_timeout(struct ceph_osd_client *osdc) { schedule_delayed_work(&osdc->timeout_work, - osdc->client->options->osd_keepalive_timeout * HZ); + osdc->client->options->osd_keepalive_timeout); } static void __cancel_osd_timeout(struct ceph_osd_client *osdc) @@ -1567,10 +1576,9 @@ static void handle_timeout(struct work_struct *work) { struct ceph_osd_client *osdc = container_of(work, struct ceph_osd_client, timeout_work.work); + struct ceph_options *opts = osdc->client->options; struct ceph_osd_request *req; struct ceph_osd *osd; - unsigned long keepalive = - osdc->client->options->osd_keepalive_timeout * HZ; struct list_head slow_osds; dout("timeout\n"); down_read(&osdc->map_sem); @@ -1586,7 +1594,8 @@ static void handle_timeout(struct work_struct *work) */ INIT_LIST_HEAD(&slow_osds); list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) { - if (time_before(jiffies, req->r_stamp + keepalive)) + if (time_before(jiffies, + req->r_stamp + opts->osd_keepalive_timeout)) break; osd = req->r_osd; @@ -1613,8 +1622,7 @@ static void handle_osds_timeout(struct work_struct *work) struct ceph_osd_client *osdc = container_of(work, struct ceph_osd_client, osds_timeout_work.work); - unsigned long delay = - osdc->client->options->osd_idle_ttl * HZ >> 2; + unsigned long delay = osdc->client->options->osd_idle_ttl / 4; dout("osds timeout\n"); down_read(&osdc->map_sem); @@ -2619,7 +2627,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) osdc->event_count = 0; schedule_delayed_work(&osdc->osds_timeout_work, - round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ)); + round_jiffies_relative(osdc->client->options->osd_idle_ttl)); err = -ENOMEM; osdc->req_mempool = mempool_create_kmalloc_pool(10, diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 15796696d64e..4a3125836b64 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -89,7 +89,7 @@ static int crush_decode_tree_bucket(void **p, void *end, { int j; dout("crush_decode_tree_bucket %p to %p\n", *p, end); - ceph_decode_32_safe(p, end, b->num_nodes, bad); + ceph_decode_8_safe(p, end, b->num_nodes, bad); b->node_weights = kcalloc(b->num_nodes, sizeof(u32), GFP_NOFS); if (b->node_weights == NULL) return -ENOMEM; diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index 096d91447e06..d4f5f220a8e5 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -51,10 +51,7 @@ void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty) set_page_dirty_lock(pages[i]); put_page(pages[i]); } - if (is_vmalloc_addr(pages)) - vfree(pages); - else - kfree(pages); + kvfree(pages); } EXPORT_SYMBOL(ceph_put_page_vector); diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 936ad0a15371..b512fbd9d79a 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -14,6 +14,6 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ sunrpc_syms.o cache.o rpc_pipe.o \ svc_xprt.o sunrpc-$(CONFIG_SUNRPC_DEBUG) += debugfs.o -sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o bc_svc.o +sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o sunrpc-$(CONFIG_PROC_FS) += stats.o sunrpc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 9dd0ea8db463..9825ff0f91d6 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -37,16 +37,18 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ static inline int xprt_need_to_requeue(struct rpc_xprt *xprt) { - return xprt->bc_alloc_count > 0; + return xprt->bc_alloc_count < atomic_read(&xprt->bc_free_slots); } static inline void xprt_inc_alloc_count(struct rpc_xprt *xprt, unsigned int n) { + atomic_add(n, &xprt->bc_free_slots); xprt->bc_alloc_count += n; } static inline int xprt_dec_alloc_count(struct rpc_xprt *xprt, unsigned int n) { + atomic_sub(n, &xprt->bc_free_slots); return xprt->bc_alloc_count -= n; } @@ -60,13 +62,62 @@ static void xprt_free_allocation(struct rpc_rqst *req) dprintk("RPC: free allocations for req= %p\n", req); WARN_ON_ONCE(test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); - xbufp = &req->rq_private_buf; + xbufp = &req->rq_rcv_buf; free_page((unsigned long)xbufp->head[0].iov_base); xbufp = &req->rq_snd_buf; free_page((unsigned long)xbufp->head[0].iov_base); kfree(req); } +static int xprt_alloc_xdr_buf(struct xdr_buf *buf, gfp_t gfp_flags) +{ + struct page *page; + /* Preallocate one XDR receive buffer */ + page = alloc_page(gfp_flags); + if (page == NULL) + return -ENOMEM; + buf->head[0].iov_base = page_address(page); + buf->head[0].iov_len = PAGE_SIZE; + buf->tail[0].iov_base = NULL; + buf->tail[0].iov_len = 0; + buf->page_len = 0; + buf->len = 0; + buf->buflen = PAGE_SIZE; + return 0; +} + +static +struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt, gfp_t gfp_flags) +{ + struct rpc_rqst *req; + + /* Pre-allocate one backchannel rpc_rqst */ + req = kzalloc(sizeof(*req), gfp_flags); + if (req == NULL) + return NULL; + + req->rq_xprt = xprt; + INIT_LIST_HEAD(&req->rq_list); + INIT_LIST_HEAD(&req->rq_bc_list); + + /* Preallocate one XDR receive buffer */ + if (xprt_alloc_xdr_buf(&req->rq_rcv_buf, gfp_flags) < 0) { + printk(KERN_ERR "Failed to create bc receive xbuf\n"); + goto out_free; + } + req->rq_rcv_buf.len = PAGE_SIZE; + + /* Preallocate one XDR send buffer */ + if (xprt_alloc_xdr_buf(&req->rq_snd_buf, gfp_flags) < 0) { + printk(KERN_ERR "Failed to create bc snd xbuf\n"); + goto out_free; + } + return req; +out_free: + xprt_free_allocation(req); + return NULL; +} + /* * Preallocate up to min_reqs structures and related buffers for use * by the backchannel. This function can be called multiple times @@ -87,9 +138,7 @@ static void xprt_free_allocation(struct rpc_rqst *req) */ int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs) { - struct page *page_rcv = NULL, *page_snd = NULL; - struct xdr_buf *xbufp = NULL; - struct rpc_rqst *req, *tmp; + struct rpc_rqst *req; struct list_head tmp_list; int i; @@ -106,7 +155,7 @@ int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs) INIT_LIST_HEAD(&tmp_list); for (i = 0; i < min_reqs; i++) { /* Pre-allocate one backchannel rpc_rqst */ - req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL); + req = xprt_alloc_bc_req(xprt, GFP_KERNEL); if (req == NULL) { printk(KERN_ERR "Failed to create bc rpc_rqst\n"); goto out_free; @@ -115,41 +164,6 @@ int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs) /* Add the allocated buffer to the tmp list */ dprintk("RPC: adding req= %p\n", req); list_add(&req->rq_bc_pa_list, &tmp_list); - - req->rq_xprt = xprt; - INIT_LIST_HEAD(&req->rq_list); - INIT_LIST_HEAD(&req->rq_bc_list); - - /* Preallocate one XDR receive buffer */ - page_rcv = alloc_page(GFP_KERNEL); - if (page_rcv == NULL) { - printk(KERN_ERR "Failed to create bc receive xbuf\n"); - goto out_free; - } - xbufp = &req->rq_rcv_buf; - xbufp->head[0].iov_base = page_address(page_rcv); - xbufp->head[0].iov_len = PAGE_SIZE; - xbufp->tail[0].iov_base = NULL; - xbufp->tail[0].iov_len = 0; - xbufp->page_len = 0; - xbufp->len = PAGE_SIZE; - xbufp->buflen = PAGE_SIZE; - - /* Preallocate one XDR send buffer */ - page_snd = alloc_page(GFP_KERNEL); - if (page_snd == NULL) { - printk(KERN_ERR "Failed to create bc snd xbuf\n"); - goto out_free; - } - - xbufp = &req->rq_snd_buf; - xbufp->head[0].iov_base = page_address(page_snd); - xbufp->head[0].iov_len = 0; - xbufp->tail[0].iov_base = NULL; - xbufp->tail[0].iov_len = 0; - xbufp->page_len = 0; - xbufp->len = 0; - xbufp->buflen = PAGE_SIZE; } /* @@ -167,7 +181,10 @@ out_free: /* * Memory allocation failed, free the temporary list */ - list_for_each_entry_safe(req, tmp, &tmp_list, rq_bc_pa_list) { + while (!list_empty(&tmp_list)) { + req = list_first_entry(&tmp_list, + struct rpc_rqst, + rq_bc_pa_list); list_del(&req->rq_bc_pa_list); xprt_free_allocation(req); } @@ -217,9 +234,15 @@ static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid) struct rpc_rqst *req = NULL; dprintk("RPC: allocate a backchannel request\n"); - if (list_empty(&xprt->bc_pa_list)) + if (atomic_read(&xprt->bc_free_slots) <= 0) goto not_found; - + if (list_empty(&xprt->bc_pa_list)) { + req = xprt_alloc_bc_req(xprt, GFP_ATOMIC); + if (!req) + goto not_found; + /* Note: this 'free' request adds it to xprt->bc_pa_list */ + xprt_free_bc_request(req); + } req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, rq_bc_pa_list); req->rq_reply_bytes_recvd = 0; @@ -245,11 +268,21 @@ void xprt_free_bc_request(struct rpc_rqst *req) req->rq_connect_cookie = xprt->connect_cookie - 1; smp_mb__before_atomic(); - WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); smp_mb__after_atomic(); - if (!xprt_need_to_requeue(xprt)) { + /* + * Return it to the list of preallocations so that it + * may be reused by a new callback request. + */ + spin_lock_bh(&xprt->bc_pa_lock); + if (xprt_need_to_requeue(xprt)) { + list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list); + xprt->bc_alloc_count++; + req = NULL; + } + spin_unlock_bh(&xprt->bc_pa_lock); + if (req != NULL) { /* * The last remaining session was destroyed while this * entry was in use. Free the entry and don't attempt @@ -260,14 +293,6 @@ void xprt_free_bc_request(struct rpc_rqst *req) xprt_free_allocation(req); return; } - - /* - * Return it to the list of preallocations so that it - * may be reused by a new callback request. - */ - spin_lock_bh(&xprt->bc_pa_lock); - list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list); - spin_unlock_bh(&xprt->bc_pa_lock); } /* @@ -311,6 +336,7 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied) spin_lock(&xprt->bc_pa_lock); list_del(&req->rq_bc_pa_list); + xprt->bc_alloc_count--; spin_unlock(&xprt->bc_pa_lock); req->rq_private_buf.len = copied; diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c deleted file mode 100644 index 15c7a8a1c24f..000000000000 --- a/net/sunrpc/bc_svc.c +++ /dev/null @@ -1,63 +0,0 @@ -/****************************************************************************** - -(c) 2007 Network Appliance, Inc. All Rights Reserved. -(c) 2009 NetApp. All Rights Reserved. - -NetApp provides this source code under the GPL v2 License. -The GPL v2 license is available at -http://opensource.org/licenses/gpl-license.php. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -******************************************************************************/ - -/* - * The NFSv4.1 callback service helper routines. - * They implement the transport level processing required to send the - * reply over an existing open connection previously established by the client. - */ - -#include <linux/module.h> - -#include <linux/sunrpc/xprt.h> -#include <linux/sunrpc/sched.h> -#include <linux/sunrpc/bc_xprt.h> - -#define RPCDBG_FACILITY RPCDBG_SVCDSP - -/* Empty callback ops */ -static const struct rpc_call_ops nfs41_callback_ops = { -}; - - -/* - * Send the callback reply - */ -int bc_send(struct rpc_rqst *req) -{ - struct rpc_task *task; - int ret; - - dprintk("RPC: bc_send req= %p\n", req); - task = rpc_run_bc_task(req, &nfs41_callback_ops); - if (IS_ERR(task)) - ret = PTR_ERR(task); - else { - WARN_ON_ONCE(atomic_read(&task->tk_count) != 1); - ret = task->tk_status; - rpc_put_task(task); - } - dprintk("RPC: bc_send ret= %d\n", ret); - return ret; -} - diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index e6ce1517367f..cbc6af923dd1 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -891,15 +891,8 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) task->tk_flags |= RPC_TASK_SOFT; if (clnt->cl_noretranstimeo) task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT; - if (sk_memalloc_socks()) { - struct rpc_xprt *xprt; - - rcu_read_lock(); - xprt = rcu_dereference(clnt->cl_xprt); - if (xprt->swapper) - task->tk_flags |= RPC_TASK_SWAPPER; - rcu_read_unlock(); - } + if (atomic_read(&clnt->cl_swapper)) + task->tk_flags |= RPC_TASK_SWAPPER; /* Add to the client's list of all tasks */ spin_lock(&clnt->cl_lock); list_add_tail(&task->tk_task, &clnt->cl_tasks); @@ -1031,15 +1024,14 @@ EXPORT_SYMBOL_GPL(rpc_call_async); * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run * rpc_execute against it * @req: RPC request - * @tk_ops: RPC call ops */ -struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, - const struct rpc_call_ops *tk_ops) +struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req) { struct rpc_task *task; struct xdr_buf *xbufp = &req->rq_snd_buf; struct rpc_task_setup task_setup_data = { - .callback_ops = tk_ops, + .callback_ops = &rpc_default_ops, + .flags = RPC_TASK_SOFTCONN, }; dprintk("RPC: rpc_run_bc_task req= %p\n", req); @@ -1614,6 +1606,7 @@ call_allocate(struct rpc_task *task) req->rq_callsize + req->rq_rcvsize); if (req->rq_buffer != NULL) return; + xprt_inject_disconnect(xprt); dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid); @@ -1951,33 +1944,36 @@ call_bc_transmit(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; - if (!xprt_prepare_transmit(task)) { - /* - * Could not reserve the transport. Try again after the - * transport is released. - */ - task->tk_status = 0; - task->tk_action = call_bc_transmit; - return; - } + if (!xprt_prepare_transmit(task)) + goto out_retry; - task->tk_action = rpc_exit_task; if (task->tk_status < 0) { printk(KERN_NOTICE "RPC: Could not send backchannel reply " "error: %d\n", task->tk_status); - return; + goto out_done; } + if (req->rq_connect_cookie != req->rq_xprt->connect_cookie) + req->rq_bytes_sent = 0; xprt_transmit(task); + + if (task->tk_status == -EAGAIN) + goto out_nospace; + xprt_end_transmit(task); dprint_status(task); switch (task->tk_status) { case 0: /* Success */ - break; case -EHOSTDOWN: case -EHOSTUNREACH: case -ENETUNREACH: + case -ECONNRESET: + case -ECONNREFUSED: + case -EADDRINUSE: + case -ENOTCONN: + case -EPIPE: + break; case -ETIMEDOUT: /* * Problem reaching the server. Disconnect and let the @@ -2002,6 +1998,13 @@ call_bc_transmit(struct rpc_task *task) break; } rpc_wake_up_queued_task(&req->rq_xprt->pending, task); +out_done: + task->tk_action = rpc_exit_task; + return; +out_nospace: + req->rq_connect_cookie = req->rq_xprt->connect_cookie; +out_retry: + task->tk_status = 0; } #endif /* CONFIG_SUNRPC_BACKCHANNEL */ @@ -2476,3 +2479,59 @@ void rpc_show_tasks(struct net *net) spin_unlock(&sn->rpc_client_lock); } #endif + +#if IS_ENABLED(CONFIG_SUNRPC_SWAP) +int +rpc_clnt_swap_activate(struct rpc_clnt *clnt) +{ + int ret = 0; + struct rpc_xprt *xprt; + + if (atomic_inc_return(&clnt->cl_swapper) == 1) { +retry: + rcu_read_lock(); + xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); + rcu_read_unlock(); + if (!xprt) { + /* + * If we didn't get a reference, then we likely are + * racing with a migration event. Wait for a grace + * period and try again. + */ + synchronize_rcu(); + goto retry; + } + + ret = xprt_enable_swap(xprt); + xprt_put(xprt); + } + return ret; +} +EXPORT_SYMBOL_GPL(rpc_clnt_swap_activate); + +void +rpc_clnt_swap_deactivate(struct rpc_clnt *clnt) +{ + struct rpc_xprt *xprt; + + if (atomic_dec_if_positive(&clnt->cl_swapper) == 0) { +retry: + rcu_read_lock(); + xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); + rcu_read_unlock(); + if (!xprt) { + /* + * If we didn't get a reference, then we likely are + * racing with a migration event. Wait for a grace + * period and try again. + */ + synchronize_rcu(); + goto retry; + } + + xprt_disable_swap(xprt); + xprt_put(xprt); + } +} +EXPORT_SYMBOL_GPL(rpc_clnt_swap_deactivate); +#endif /* CONFIG_SUNRPC_SWAP */ diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c index 82962f7e6e88..e7b4d93566df 100644 --- a/net/sunrpc/debugfs.c +++ b/net/sunrpc/debugfs.c @@ -10,9 +10,12 @@ #include "netns.h" static struct dentry *topdir; +static struct dentry *rpc_fault_dir; static struct dentry *rpc_clnt_dir; static struct dentry *rpc_xprt_dir; +unsigned int rpc_inject_disconnect; + struct rpc_clnt_iter { struct rpc_clnt *clnt; loff_t pos; @@ -257,6 +260,8 @@ rpc_xprt_debugfs_register(struct rpc_xprt *xprt) debugfs_remove_recursive(xprt->debugfs); xprt->debugfs = NULL; } + + atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect); } void @@ -266,11 +271,79 @@ rpc_xprt_debugfs_unregister(struct rpc_xprt *xprt) xprt->debugfs = NULL; } +static int +fault_open(struct inode *inode, struct file *filp) +{ + filp->private_data = kmalloc(128, GFP_KERNEL); + if (!filp->private_data) + return -ENOMEM; + return 0; +} + +static int +fault_release(struct inode *inode, struct file *filp) +{ + kfree(filp->private_data); + return 0; +} + +static ssize_t +fault_disconnect_read(struct file *filp, char __user *user_buf, + size_t len, loff_t *offset) +{ + char *buffer = (char *)filp->private_data; + size_t size; + + size = sprintf(buffer, "%u\n", rpc_inject_disconnect); + return simple_read_from_buffer(user_buf, len, offset, buffer, size); +} + +static ssize_t +fault_disconnect_write(struct file *filp, const char __user *user_buf, + size_t len, loff_t *offset) +{ + char buffer[16]; + + if (len >= sizeof(buffer)) + len = sizeof(buffer) - 1; + if (copy_from_user(buffer, user_buf, len)) + return -EFAULT; + buffer[len] = '\0'; + if (kstrtouint(buffer, 10, &rpc_inject_disconnect)) + return -EINVAL; + return len; +} + +static const struct file_operations fault_disconnect_fops = { + .owner = THIS_MODULE, + .open = fault_open, + .read = fault_disconnect_read, + .write = fault_disconnect_write, + .release = fault_release, +}; + +static struct dentry * +inject_fault_dir(struct dentry *topdir) +{ + struct dentry *faultdir; + + faultdir = debugfs_create_dir("inject_fault", topdir); + if (!faultdir) + return NULL; + + if (!debugfs_create_file("disconnect", S_IFREG | S_IRUSR, faultdir, + NULL, &fault_disconnect_fops)) + return NULL; + + return faultdir; +} + void __exit sunrpc_debugfs_exit(void) { debugfs_remove_recursive(topdir); topdir = NULL; + rpc_fault_dir = NULL; rpc_clnt_dir = NULL; rpc_xprt_dir = NULL; } @@ -282,6 +355,10 @@ sunrpc_debugfs_init(void) if (!topdir) return; + rpc_fault_dir = inject_fault_dir(topdir); + if (!rpc_fault_dir) + goto out_remove; + rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir); if (!rpc_clnt_dir) goto out_remove; @@ -294,5 +371,6 @@ sunrpc_debugfs_init(void) out_remove: debugfs_remove_recursive(topdir); topdir = NULL; + rpc_fault_dir = NULL; rpc_clnt_dir = NULL; } diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 852ae606b02a..5a16d8d8c831 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1350,6 +1350,11 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, { struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; + struct rpc_task *task; + int proc_error; + int error; + + dprintk("svc: %s(%p)\n", __func__, req); /* Build the svc_rqst used by the common processing routine */ rqstp->rq_xprt = serv->sv_bc_xprt; @@ -1372,21 +1377,36 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, /* * Skip the next two words because they've already been - * processed in the trasport + * processed in the transport */ svc_getu32(argv); /* XID */ svc_getnl(argv); /* CALLDIR */ - /* Returns 1 for send, 0 for drop */ - if (svc_process_common(rqstp, argv, resv)) { - memcpy(&req->rq_snd_buf, &rqstp->rq_res, - sizeof(req->rq_snd_buf)); - return bc_send(req); - } else { - /* drop request */ + /* Parse and execute the bc call */ + proc_error = svc_process_common(rqstp, argv, resv); + + atomic_inc(&req->rq_xprt->bc_free_slots); + if (!proc_error) { + /* Processing error: drop the request */ xprt_free_bc_request(req); return 0; } + + /* Finally, send the reply synchronously */ + memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf)); + task = rpc_run_bc_task(req); + if (IS_ERR(task)) { + error = PTR_ERR(task); + goto out; + } + + WARN_ON_ONCE(atomic_read(&task->tk_count) != 1); + error = task->tk_status; + rpc_put_task(task); + +out: + dprintk("svc: %s(), error=%d\n", __func__, error); + return error; } EXPORT_SYMBOL_GPL(bc_svc_process); #endif /* CONFIG_SUNRPC_BACKCHANNEL */ diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 1d4fe24af06a..ab5dd621ae0c 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -68,6 +68,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net); static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); static void xprt_connect_status(struct rpc_task *task); static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); +static void __xprt_put_cong(struct rpc_xprt *, struct rpc_rqst *); static void xprt_destroy(struct rpc_xprt *xprt); static DEFINE_SPINLOCK(xprt_list_lock); @@ -250,6 +251,8 @@ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) } xprt_clear_locked(xprt); out_sleep: + if (req) + __xprt_put_cong(xprt, req); dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt); task->tk_timeout = 0; task->tk_status = -EAGAIN; @@ -608,8 +611,8 @@ static void xprt_autoclose(struct work_struct *work) struct rpc_xprt *xprt = container_of(work, struct rpc_xprt, task_cleanup); - xprt->ops->close(xprt); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); + xprt->ops->close(xprt); xprt_release_write(xprt, NULL); } @@ -967,6 +970,7 @@ void xprt_transmit(struct rpc_task *task) task->tk_status = status; return; } + xprt_inject_disconnect(xprt); dprintk("RPC: %5u xmit complete\n", task->tk_pid); task->tk_flags |= RPC_TASK_SENT; @@ -1285,6 +1289,7 @@ void xprt_release(struct rpc_task *task) spin_unlock_bh(&xprt->transport_lock); if (req->rq_buffer) xprt->ops->buf_free(req->rq_buffer); + xprt_inject_disconnect(xprt); if (req->rq_cred != NULL) put_rpccred(req->rq_cred); task->tk_rqstp = NULL; diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index 302d4ebf6fbf..f1e8dafbd507 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c @@ -11,6 +11,21 @@ * can take tens of usecs to complete. */ +/* Normal operation + * + * A Memory Region is prepared for RDMA READ or WRITE using the + * ib_map_phys_fmr verb (fmr_op_map). When the RDMA operation is + * finished, the Memory Region is unmapped using the ib_unmap_fmr + * verb (fmr_op_unmap). + */ + +/* Transport recovery + * + * After a transport reconnect, fmr_op_map re-uses the MR already + * allocated for the RPC, but generates a fresh rkey then maps the + * MR again. This process is synchronous. + */ + #include "xprt_rdma.h" #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) @@ -50,19 +65,28 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt) struct rpcrdma_mw *r; int i, rc; + spin_lock_init(&buf->rb_mwlock); INIT_LIST_HEAD(&buf->rb_mws); INIT_LIST_HEAD(&buf->rb_all); - i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; - dprintk("RPC: %s: initializing %d FMRs\n", __func__, i); + i = max_t(int, RPCRDMA_MAX_DATA_SEGS / RPCRDMA_MAX_FMR_SGES, 1); + i += 2; /* head + tail */ + i *= buf->rb_max_requests; /* one set for each RPC slot */ + dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i); + rc = -ENOMEM; while (i--) { r = kzalloc(sizeof(*r), GFP_KERNEL); if (!r) - return -ENOMEM; + goto out; - r->r.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr); - if (IS_ERR(r->r.fmr)) + r->r.fmr.physaddrs = kmalloc(RPCRDMA_MAX_FMR_SGES * + sizeof(u64), GFP_KERNEL); + if (!r->r.fmr.physaddrs) + goto out_free; + + r->r.fmr.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr); + if (IS_ERR(r->r.fmr.fmr)) goto out_fmr_err; list_add(&r->mw_list, &buf->rb_mws); @@ -71,12 +95,24 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt) return 0; out_fmr_err: - rc = PTR_ERR(r->r.fmr); + rc = PTR_ERR(r->r.fmr.fmr); dprintk("RPC: %s: ib_alloc_fmr status %i\n", __func__, rc); + kfree(r->r.fmr.physaddrs); +out_free: kfree(r); +out: return rc; } +static int +__fmr_unmap(struct rpcrdma_mw *r) +{ + LIST_HEAD(l); + + list_add(&r->r.fmr.fmr->list, &l); + return ib_unmap_fmr(&l); +} + /* Use the ib_map_phys_fmr() verb to register a memory region * for remote access via RDMA READ or RDMA WRITE. */ @@ -85,12 +121,24 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, int nsegs, bool writing) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; - struct ib_device *device = ia->ri_id->device; + struct ib_device *device = ia->ri_device; enum dma_data_direction direction = rpcrdma_data_dir(writing); struct rpcrdma_mr_seg *seg1 = seg; - struct rpcrdma_mw *mw = seg1->rl_mw; - u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; int len, pageoff, i, rc; + struct rpcrdma_mw *mw; + + mw = seg1->rl_mw; + seg1->rl_mw = NULL; + if (!mw) { + mw = rpcrdma_get_mw(r_xprt); + if (!mw) + return -ENOMEM; + } else { + /* this is a retransmit; generate a fresh rkey */ + rc = __fmr_unmap(mw); + if (rc) + return rc; + } pageoff = offset_in_page(seg1->mr_offset); seg1->mr_offset -= pageoff; /* start of page */ @@ -100,7 +148,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, nsegs = RPCRDMA_MAX_FMR_SGES; for (i = 0; i < nsegs;) { rpcrdma_map_one(device, seg, direction); - physaddrs[i] = seg->mr_dma; + mw->r.fmr.physaddrs[i] = seg->mr_dma; len += seg->mr_len; ++seg; ++i; @@ -110,11 +158,13 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, break; } - rc = ib_map_phys_fmr(mw->r.fmr, physaddrs, i, seg1->mr_dma); + rc = ib_map_phys_fmr(mw->r.fmr.fmr, mw->r.fmr.physaddrs, + i, seg1->mr_dma); if (rc) goto out_maperr; - seg1->mr_rkey = mw->r.fmr->rkey; + seg1->rl_mw = mw; + seg1->mr_rkey = mw->r.fmr.fmr->rkey; seg1->mr_base = seg1->mr_dma + pageoff; seg1->mr_nsegs = i; seg1->mr_len = len; @@ -137,48 +187,28 @@ fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_mr_seg *seg1 = seg; - struct ib_device *device; + struct rpcrdma_mw *mw = seg1->rl_mw; int rc, nsegs = seg->mr_nsegs; - LIST_HEAD(l); - list_add(&seg1->rl_mw->r.fmr->list, &l); - rc = ib_unmap_fmr(&l); - read_lock(&ia->ri_qplock); - device = ia->ri_id->device; + dprintk("RPC: %s: FMR %p\n", __func__, mw); + + seg1->rl_mw = NULL; while (seg1->mr_nsegs--) - rpcrdma_unmap_one(device, seg++); - read_unlock(&ia->ri_qplock); + rpcrdma_unmap_one(ia->ri_device, seg++); + rc = __fmr_unmap(mw); if (rc) goto out_err; + rpcrdma_put_mw(r_xprt, mw); return nsegs; out_err: + /* The FMR is abandoned, but remains in rb_all. fmr_op_destroy + * will attempt to release it when the transport is destroyed. + */ dprintk("RPC: %s: ib_unmap_fmr status %i\n", __func__, rc); return nsegs; } -/* After a disconnect, unmap all FMRs. - * - * This is invoked only in the transport connect worker in order - * to serialize with rpcrdma_register_fmr_external(). - */ -static void -fmr_op_reset(struct rpcrdma_xprt *r_xprt) -{ - struct rpcrdma_buffer *buf = &r_xprt->rx_buf; - struct rpcrdma_mw *r; - LIST_HEAD(list); - int rc; - - list_for_each_entry(r, &buf->rb_all, mw_all) - list_add(&r->r.fmr->list, &list); - - rc = ib_unmap_fmr(&list); - if (rc) - dprintk("RPC: %s: ib_unmap_fmr failed %i\n", - __func__, rc); -} - static void fmr_op_destroy(struct rpcrdma_buffer *buf) { @@ -188,10 +218,13 @@ fmr_op_destroy(struct rpcrdma_buffer *buf) while (!list_empty(&buf->rb_all)) { r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); list_del(&r->mw_all); - rc = ib_dealloc_fmr(r->r.fmr); + kfree(r->r.fmr.physaddrs); + + rc = ib_dealloc_fmr(r->r.fmr.fmr); if (rc) dprintk("RPC: %s: ib_dealloc_fmr failed %i\n", __func__, rc); + kfree(r); } } @@ -202,7 +235,6 @@ const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { .ro_open = fmr_op_open, .ro_maxpages = fmr_op_maxpages, .ro_init = fmr_op_init, - .ro_reset = fmr_op_reset, .ro_destroy = fmr_op_destroy, .ro_displayname = "fmr", }; diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index d234521320a4..04ea914201b2 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -11,12 +11,136 @@ * but most complex memory registration mode. */ +/* Normal operation + * + * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG + * Work Request (frmr_op_map). When the RDMA operation is finished, this + * Memory Region is invalidated using a LOCAL_INV Work Request + * (frmr_op_unmap). + * + * Typically these Work Requests are not signaled, and neither are RDMA + * SEND Work Requests (with the exception of signaling occasionally to + * prevent provider work queue overflows). This greatly reduces HCA + * interrupt workload. + * + * As an optimization, frwr_op_unmap marks MRs INVALID before the + * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on + * rb_mws immediately so that no work (like managing a linked list + * under a spinlock) is needed in the completion upcall. + * + * But this means that frwr_op_map() can occasionally encounter an MR + * that is INVALID but the LOCAL_INV WR has not completed. Work Queue + * ordering prevents a subsequent FAST_REG WR from executing against + * that MR while it is still being invalidated. + */ + +/* Transport recovery + * + * ->op_map and the transport connect worker cannot run at the same + * time, but ->op_unmap can fire while the transport connect worker + * is running. Thus MR recovery is handled in ->op_map, to guarantee + * that recovered MRs are owned by a sending RPC, and not one where + * ->op_unmap could fire at the same time transport reconnect is + * being done. + * + * When the underlying transport disconnects, MRs are left in one of + * three states: + * + * INVALID: The MR was not in use before the QP entered ERROR state. + * (Or, the LOCAL_INV WR has not completed or flushed yet). + * + * STALE: The MR was being registered or unregistered when the QP + * entered ERROR state, and the pending WR was flushed. + * + * VALID: The MR was registered before the QP entered ERROR state. + * + * When frwr_op_map encounters STALE and VALID MRs, they are recovered + * with ib_dereg_mr and then are re-initialized. Beause MR recovery + * allocates fresh resources, it is deferred to a workqueue, and the + * recovered MRs are placed back on the rb_mws list when recovery is + * complete. frwr_op_map allocates another MR for the current RPC while + * the broken MR is reset. + * + * To ensure that frwr_op_map doesn't encounter an MR that is marked + * INVALID but that is about to be flushed due to a previous transport + * disconnect, the transport connect worker attempts to drain all + * pending send queue WRs before the transport is reconnected. + */ + #include "xprt_rdma.h" #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_TRANS #endif +static struct workqueue_struct *frwr_recovery_wq; + +#define FRWR_RECOVERY_WQ_FLAGS (WQ_UNBOUND | WQ_MEM_RECLAIM) + +int +frwr_alloc_recovery_wq(void) +{ + frwr_recovery_wq = alloc_workqueue("frwr_recovery", + FRWR_RECOVERY_WQ_FLAGS, 0); + return !frwr_recovery_wq ? -ENOMEM : 0; +} + +void +frwr_destroy_recovery_wq(void) +{ + struct workqueue_struct *wq; + + if (!frwr_recovery_wq) + return; + + wq = frwr_recovery_wq; + frwr_recovery_wq = NULL; + destroy_workqueue(wq); +} + +/* Deferred reset of a single FRMR. Generate a fresh rkey by + * replacing the MR. + * + * There's no recovery if this fails. The FRMR is abandoned, but + * remains in rb_all. It will be cleaned up when the transport is + * destroyed. + */ +static void +__frwr_recovery_worker(struct work_struct *work) +{ + struct rpcrdma_mw *r = container_of(work, struct rpcrdma_mw, + r.frmr.fr_work); + struct rpcrdma_xprt *r_xprt = r->r.frmr.fr_xprt; + unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; + struct ib_pd *pd = r_xprt->rx_ia.ri_pd; + + if (ib_dereg_mr(r->r.frmr.fr_mr)) + goto out_fail; + + r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(pd, depth); + if (IS_ERR(r->r.frmr.fr_mr)) + goto out_fail; + + dprintk("RPC: %s: recovered FRMR %p\n", __func__, r); + r->r.frmr.fr_state = FRMR_IS_INVALID; + rpcrdma_put_mw(r_xprt, r); + return; + +out_fail: + pr_warn("RPC: %s: FRMR %p unrecovered\n", + __func__, r); +} + +/* A broken MR was discovered in a context that can't sleep. + * Defer recovery to the recovery worker. + */ +static void +__frwr_queue_recovery(struct rpcrdma_mw *r) +{ + INIT_WORK(&r->r.frmr.fr_work, __frwr_recovery_worker); + queue_work(frwr_recovery_wq, &r->r.frmr.fr_work); +} + static int __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device, unsigned int depth) @@ -128,7 +252,7 @@ frwr_sendcompletion(struct ib_wc *wc) /* WARNING: Only wr_id and status are reliable at this point */ r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; - dprintk("RPC: %s: frmr %p (stale), status %s (%d)\n", + pr_warn("RPC: %s: frmr %p flushed, status %s (%d)\n", __func__, r, ib_wc_status_msg(wc->status), wc->status); r->r.frmr.fr_state = FRMR_IS_STALE; } @@ -137,16 +261,19 @@ static int frwr_op_init(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_buffer *buf = &r_xprt->rx_buf; - struct ib_device *device = r_xprt->rx_ia.ri_id->device; + struct ib_device *device = r_xprt->rx_ia.ri_device; unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; struct ib_pd *pd = r_xprt->rx_ia.ri_pd; int i; + spin_lock_init(&buf->rb_mwlock); INIT_LIST_HEAD(&buf->rb_mws); INIT_LIST_HEAD(&buf->rb_all); - i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; - dprintk("RPC: %s: initializing %d FRMRs\n", __func__, i); + i = max_t(int, RPCRDMA_MAX_DATA_SEGS / depth, 1); + i += 2; /* head + tail */ + i *= buf->rb_max_requests; /* one set for each RPC slot */ + dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i); while (i--) { struct rpcrdma_mw *r; @@ -165,6 +292,7 @@ frwr_op_init(struct rpcrdma_xprt *r_xprt) list_add(&r->mw_list, &buf->rb_mws); list_add(&r->mw_all, &buf->rb_all); r->mw_sendcompletion = frwr_sendcompletion; + r->r.frmr.fr_xprt = r_xprt; } return 0; @@ -178,12 +306,12 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, int nsegs, bool writing) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; - struct ib_device *device = ia->ri_id->device; + struct ib_device *device = ia->ri_device; enum dma_data_direction direction = rpcrdma_data_dir(writing); struct rpcrdma_mr_seg *seg1 = seg; - struct rpcrdma_mw *mw = seg1->rl_mw; - struct rpcrdma_frmr *frmr = &mw->r.frmr; - struct ib_mr *mr = frmr->fr_mr; + struct rpcrdma_mw *mw; + struct rpcrdma_frmr *frmr; + struct ib_mr *mr; struct ib_send_wr fastreg_wr, *bad_wr; u8 key; int len, pageoff; @@ -192,12 +320,25 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, u64 pa; int page_no; + mw = seg1->rl_mw; + seg1->rl_mw = NULL; + do { + if (mw) + __frwr_queue_recovery(mw); + mw = rpcrdma_get_mw(r_xprt); + if (!mw) + return -ENOMEM; + } while (mw->r.frmr.fr_state != FRMR_IS_INVALID); + frmr = &mw->r.frmr; + frmr->fr_state = FRMR_IS_VALID; + pageoff = offset_in_page(seg1->mr_offset); seg1->mr_offset -= pageoff; /* start of page */ seg1->mr_len += pageoff; len = -pageoff; if (nsegs > ia->ri_max_frmr_depth) nsegs = ia->ri_max_frmr_depth; + for (page_no = i = 0; i < nsegs;) { rpcrdma_map_one(device, seg, direction); pa = seg->mr_dma; @@ -216,8 +357,6 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, dprintk("RPC: %s: Using frmr %p to map %d segments (%d bytes)\n", __func__, mw, i, len); - frmr->fr_state = FRMR_IS_VALID; - memset(&fastreg_wr, 0, sizeof(fastreg_wr)); fastreg_wr.wr_id = (unsigned long)(void *)mw; fastreg_wr.opcode = IB_WR_FAST_REG_MR; @@ -229,6 +368,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, fastreg_wr.wr.fast_reg.access_flags = writing ? IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : IB_ACCESS_REMOTE_READ; + mr = frmr->fr_mr; key = (u8)(mr->rkey & 0x000000FF); ib_update_fast_reg_key(mr, ++key); fastreg_wr.wr.fast_reg.rkey = mr->rkey; @@ -238,6 +378,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, if (rc) goto out_senderr; + seg1->rl_mw = mw; seg1->mr_rkey = mr->rkey; seg1->mr_base = seg1->mr_dma + pageoff; seg1->mr_nsegs = i; @@ -246,10 +387,9 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, out_senderr: dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); - ib_update_fast_reg_key(mr, --key); - frmr->fr_state = FRMR_IS_INVALID; while (i--) rpcrdma_unmap_one(device, --seg); + __frwr_queue_recovery(mw); return rc; } @@ -261,78 +401,46 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) { struct rpcrdma_mr_seg *seg1 = seg; struct rpcrdma_ia *ia = &r_xprt->rx_ia; + struct rpcrdma_mw *mw = seg1->rl_mw; struct ib_send_wr invalidate_wr, *bad_wr; int rc, nsegs = seg->mr_nsegs; - struct ib_device *device; - seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID; + dprintk("RPC: %s: FRMR %p\n", __func__, mw); + + seg1->rl_mw = NULL; + mw->r.frmr.fr_state = FRMR_IS_INVALID; memset(&invalidate_wr, 0, sizeof(invalidate_wr)); - invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw; + invalidate_wr.wr_id = (unsigned long)(void *)mw; invalidate_wr.opcode = IB_WR_LOCAL_INV; - invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey; + invalidate_wr.ex.invalidate_rkey = mw->r.frmr.fr_mr->rkey; DECR_CQCOUNT(&r_xprt->rx_ep); - read_lock(&ia->ri_qplock); - device = ia->ri_id->device; while (seg1->mr_nsegs--) - rpcrdma_unmap_one(device, seg++); + rpcrdma_unmap_one(ia->ri_device, seg++); + read_lock(&ia->ri_qplock); rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); read_unlock(&ia->ri_qplock); if (rc) goto out_err; + + rpcrdma_put_mw(r_xprt, mw); return nsegs; out_err: - /* Force rpcrdma_buffer_get() to retry */ - seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE; dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); + __frwr_queue_recovery(mw); return nsegs; } -/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in - * an unusable state. Find FRMRs in this state and dereg / reg - * each. FRMRs that are VALID and attached to an rpcrdma_req are - * also torn down. - * - * This gives all in-use FRMRs a fresh rkey and leaves them INVALID. - * - * This is invoked only in the transport connect worker in order - * to serialize with rpcrdma_register_frmr_external(). - */ -static void -frwr_op_reset(struct rpcrdma_xprt *r_xprt) -{ - struct rpcrdma_buffer *buf = &r_xprt->rx_buf; - struct ib_device *device = r_xprt->rx_ia.ri_id->device; - unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; - struct ib_pd *pd = r_xprt->rx_ia.ri_pd; - struct rpcrdma_mw *r; - int rc; - - list_for_each_entry(r, &buf->rb_all, mw_all) { - if (r->r.frmr.fr_state == FRMR_IS_INVALID) - continue; - - __frwr_release(r); - rc = __frwr_init(r, pd, device, depth); - if (rc) { - dprintk("RPC: %s: mw %p left %s\n", - __func__, r, - (r->r.frmr.fr_state == FRMR_IS_STALE ? - "stale" : "valid")); - continue; - } - - r->r.frmr.fr_state = FRMR_IS_INVALID; - } -} - static void frwr_op_destroy(struct rpcrdma_buffer *buf) { struct rpcrdma_mw *r; + /* Ensure stale MWs for "buf" are no longer in flight */ + flush_workqueue(frwr_recovery_wq); + while (!list_empty(&buf->rb_all)) { r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); list_del(&r->mw_all); @@ -347,7 +455,6 @@ const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { .ro_open = frwr_op_open, .ro_maxpages = frwr_op_maxpages, .ro_init = frwr_op_init, - .ro_reset = frwr_op_reset, .ro_destroy = frwr_op_destroy, .ro_displayname = "frwr", }; diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c index ba518af16787..41985d07fdb7 100644 --- a/net/sunrpc/xprtrdma/physical_ops.c +++ b/net/sunrpc/xprtrdma/physical_ops.c @@ -50,8 +50,7 @@ physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, { struct rpcrdma_ia *ia = &r_xprt->rx_ia; - rpcrdma_map_one(ia->ri_id->device, seg, - rpcrdma_data_dir(writing)); + rpcrdma_map_one(ia->ri_device, seg, rpcrdma_data_dir(writing)); seg->mr_rkey = ia->ri_bind_mem->rkey; seg->mr_base = seg->mr_dma; seg->mr_nsegs = 1; @@ -65,19 +64,11 @@ physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; - read_lock(&ia->ri_qplock); - rpcrdma_unmap_one(ia->ri_id->device, seg); - read_unlock(&ia->ri_qplock); - + rpcrdma_unmap_one(ia->ri_device, seg); return 1; } static void -physical_op_reset(struct rpcrdma_xprt *r_xprt) -{ -} - -static void physical_op_destroy(struct rpcrdma_buffer *buf) { } @@ -88,7 +79,6 @@ const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = { .ro_open = physical_op_open, .ro_maxpages = physical_op_maxpages, .ro_init = physical_op_init, - .ro_reset = physical_op_reset, .ro_destroy = physical_op_destroy, .ro_displayname = "physical", }; diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 2c53ea9e1b83..84ea37daef36 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -284,9 +284,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, return (unsigned char *)iptr - (unsigned char *)headerp; out: - if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR) - return n; - for (pos = 0; nchunks--;) pos += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, &req->rl_segments[pos]); @@ -732,8 +729,8 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) struct rpcrdma_msg *headerp; struct rpcrdma_req *req; struct rpc_rqst *rqst; - struct rpc_xprt *xprt = rep->rr_xprt; - struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + struct rpcrdma_xprt *r_xprt = rep->rr_rxprt; + struct rpc_xprt *xprt = &r_xprt->rx_xprt; __be32 *iptr; int rdmalen, status; unsigned long cwnd; @@ -770,7 +767,6 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) rep->rr_len); repost: r_xprt->rx_stats.bad_reply_count++; - rep->rr_func = rpcrdma_reply_handler; if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep)) rpcrdma_recv_buffer_put(rep); diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 436da2caec95..680f888a9ddd 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -240,6 +240,16 @@ xprt_rdma_connect_worker(struct work_struct *work) xprt_clear_connecting(xprt); } +static void +xprt_rdma_inject_disconnect(struct rpc_xprt *xprt) +{ + struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt, + rx_xprt); + + pr_info("rpcrdma: injecting transport disconnect on xprt=%p\n", xprt); + rdma_disconnect(r_xprt->rx_ia.ri_id); +} + /* * xprt_rdma_destroy * @@ -612,12 +622,6 @@ xprt_rdma_send_request(struct rpc_task *task) if (req->rl_reply == NULL) /* e.g. reconnection */ rpcrdma_recv_buffer_get(req); - if (req->rl_reply) { - req->rl_reply->rr_func = rpcrdma_reply_handler; - /* this need only be done once, but... */ - req->rl_reply->rr_xprt = xprt; - } - /* Must suppress retransmit to maintain credits */ if (req->rl_connect_cookie == xprt->connect_cookie) goto drop_connection; @@ -676,6 +680,17 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) r_xprt->rx_stats.bad_reply_count); } +static int +xprt_rdma_enable_swap(struct rpc_xprt *xprt) +{ + return -EINVAL; +} + +static void +xprt_rdma_disable_swap(struct rpc_xprt *xprt) +{ +} + /* * Plumbing for rpc transport switch and kernel module */ @@ -694,7 +709,10 @@ static struct rpc_xprt_ops xprt_rdma_procs = { .send_request = xprt_rdma_send_request, .close = xprt_rdma_close, .destroy = xprt_rdma_destroy, - .print_stats = xprt_rdma_print_stats + .print_stats = xprt_rdma_print_stats, + .enable_swap = xprt_rdma_enable_swap, + .disable_swap = xprt_rdma_disable_swap, + .inject_disconnect = xprt_rdma_inject_disconnect }; static struct xprt_class xprt_rdma = { @@ -720,17 +738,24 @@ void xprt_rdma_cleanup(void) if (rc) dprintk("RPC: %s: xprt_unregister returned %i\n", __func__, rc); + + frwr_destroy_recovery_wq(); } int xprt_rdma_init(void) { int rc; - rc = xprt_register_transport(&xprt_rdma); - + rc = frwr_alloc_recovery_wq(); if (rc) return rc; + rc = xprt_register_transport(&xprt_rdma); + if (rc) { + frwr_destroy_recovery_wq(); + return rc; + } + dprintk("RPCRDMA Module Init, register RPC RDMA transport\n"); dprintk("Defaults:\n"); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 52df265b472a..891c4ede2c20 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -80,7 +80,6 @@ static void rpcrdma_run_tasklet(unsigned long data) { struct rpcrdma_rep *rep; - void (*func)(struct rpcrdma_rep *); unsigned long flags; data = data; @@ -89,14 +88,9 @@ rpcrdma_run_tasklet(unsigned long data) rep = list_entry(rpcrdma_tasklets_g.next, struct rpcrdma_rep, rr_list); list_del(&rep->rr_list); - func = rep->rr_func; - rep->rr_func = NULL; spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); - if (func) - func(rep); - else - rpcrdma_recv_buffer_put(rep); + rpcrdma_reply_handler(rep); spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); } @@ -236,7 +230,7 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list) __func__, rep, wc->byte_len); rep->rr_len = wc->byte_len; - ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device, + ib_dma_sync_single_for_cpu(rep->rr_device, rdmab_addr(rep->rr_rdmabuf), rep->rr_len, DMA_FROM_DEVICE); prefetch(rdmab_to_msg(rep->rr_rdmabuf)); @@ -407,7 +401,7 @@ connected: pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n", sap, rpc_get_port(sap), - ia->ri_id->device->name, + ia->ri_device->name, ia->ri_ops->ro_displayname, xprt->rx_buf.rb_max_requests, ird, ird < 4 && ird < tird / 2 ? " (low!)" : ""); @@ -508,8 +502,9 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) rc = PTR_ERR(ia->ri_id); goto out1; } + ia->ri_device = ia->ri_id->device; - ia->ri_pd = ib_alloc_pd(ia->ri_id->device); + ia->ri_pd = ib_alloc_pd(ia->ri_device); if (IS_ERR(ia->ri_pd)) { rc = PTR_ERR(ia->ri_pd); dprintk("RPC: %s: ib_alloc_pd() failed %i\n", @@ -517,7 +512,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) goto out2; } - rc = ib_query_device(ia->ri_id->device, devattr); + rc = ib_query_device(ia->ri_device, devattr); if (rc) { dprintk("RPC: %s: ib_query_device failed %d\n", __func__, rc); @@ -526,7 +521,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) { ia->ri_have_dma_lkey = 1; - ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey; + ia->ri_dma_lkey = ia->ri_device->local_dma_lkey; } if (memreg == RPCRDMA_FRMR) { @@ -541,7 +536,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) } } if (memreg == RPCRDMA_MTHCAFMR) { - if (!ia->ri_id->device->alloc_fmr) { + if (!ia->ri_device->alloc_fmr) { dprintk("RPC: %s: MTHCAFMR registration " "not supported by HCA\n", __func__); memreg = RPCRDMA_ALLPHYSICAL; @@ -590,9 +585,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) dprintk("RPC: %s: memory registration strategy is '%s'\n", __func__, ia->ri_ops->ro_displayname); - /* Else will do memory reg/dereg for each chunk */ - ia->ri_memreg_strategy = memreg; - rwlock_init(&ia->ri_qplock); return 0; @@ -622,17 +614,17 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia) dprintk("RPC: %s: ib_dereg_mr returned %i\n", __func__, rc); } + if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) { if (ia->ri_id->qp) rdma_destroy_qp(ia->ri_id); rdma_destroy_id(ia->ri_id); ia->ri_id = NULL; } - if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) { - rc = ib_dealloc_pd(ia->ri_pd); - dprintk("RPC: %s: ib_dealloc_pd returned %i\n", - __func__, rc); - } + + /* If the pd is still busy, xprtrdma missed freeing a resource */ + if (ia->ri_pd && !IS_ERR(ia->ri_pd)) + WARN_ON(ib_dealloc_pd(ia->ri_pd)); } /* @@ -693,8 +685,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); cq_attr.cqe = ep->rep_attr.cap.max_send_wr + 1; - sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall, - rpcrdma_cq_async_error_upcall, ep, &cq_attr); + sendcq = ib_create_cq(ia->ri_device, rpcrdma_sendcq_upcall, + rpcrdma_cq_async_error_upcall, ep, &cq_attr); if (IS_ERR(sendcq)) { rc = PTR_ERR(sendcq); dprintk("RPC: %s: failed to create send CQ: %i\n", @@ -710,8 +702,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, } cq_attr.cqe = ep->rep_attr.cap.max_recv_wr + 1; - recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall, - rpcrdma_cq_async_error_upcall, ep, &cq_attr); + recvcq = ib_create_cq(ia->ri_device, rpcrdma_recvcq_upcall, + rpcrdma_cq_async_error_upcall, ep, &cq_attr); if (IS_ERR(recvcq)) { rc = PTR_ERR(recvcq); dprintk("RPC: %s: failed to create recv CQ: %i\n", @@ -817,8 +809,6 @@ retry: rpcrdma_flush_cqs(ep); xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); - ia->ri_ops->ro_reset(xprt); - id = rpcrdma_create_id(xprt, ia, (struct sockaddr *)&xprt->rx_data.addr); if (IS_ERR(id)) { @@ -832,7 +822,7 @@ retry: * More stuff I haven't thought of! * Rrrgh! */ - if (ia->ri_id->device != id->device) { + if (ia->ri_device != id->device) { printk("RPC: %s: can't reconnect on " "different device!\n", __func__); rdma_destroy_id(id); @@ -974,7 +964,8 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) goto out_free; } - rep->rr_buffer = &r_xprt->rx_buf; + rep->rr_device = ia->ri_device; + rep->rr_rxprt = r_xprt; return rep; out_free: @@ -1098,31 +1089,33 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) kfree(buf->rb_pool); } -/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving - * some req segments uninitialized. - */ -static void -rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf) +struct rpcrdma_mw * +rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt) { - if (*mw) { - list_add_tail(&(*mw)->mw_list, &buf->rb_mws); - *mw = NULL; + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; + struct rpcrdma_mw *mw = NULL; + + spin_lock(&buf->rb_mwlock); + if (!list_empty(&buf->rb_mws)) { + mw = list_first_entry(&buf->rb_mws, + struct rpcrdma_mw, mw_list); + list_del_init(&mw->mw_list); } + spin_unlock(&buf->rb_mwlock); + + if (!mw) + pr_err("RPC: %s: no MWs available\n", __func__); + return mw; } -/* Cycle mw's back in reverse order, and "spin" them. - * This delays and scrambles reuse as much as possible. - */ -static void -rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) +void +rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw) { - struct rpcrdma_mr_seg *seg = req->rl_segments; - struct rpcrdma_mr_seg *seg1 = seg; - int i; + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; - for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++) - rpcrdma_buffer_put_mr(&seg->rl_mw, buf); - rpcrdma_buffer_put_mr(&seg1->rl_mw, buf); + spin_lock(&buf->rb_mwlock); + list_add_tail(&mw->mw_list, &buf->rb_mws); + spin_unlock(&buf->rb_mwlock); } static void @@ -1132,115 +1125,10 @@ rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) req->rl_niovs = 0; if (req->rl_reply) { buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply; - req->rl_reply->rr_func = NULL; req->rl_reply = NULL; } } -/* rpcrdma_unmap_one() was already done during deregistration. - * Redo only the ib_post_send(). - */ -static void -rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia) -{ - struct rpcrdma_xprt *r_xprt = - container_of(ia, struct rpcrdma_xprt, rx_ia); - struct ib_send_wr invalidate_wr, *bad_wr; - int rc; - - dprintk("RPC: %s: FRMR %p is stale\n", __func__, r); - - /* When this FRMR is re-inserted into rb_mws, it is no longer stale */ - r->r.frmr.fr_state = FRMR_IS_INVALID; - - memset(&invalidate_wr, 0, sizeof(invalidate_wr)); - invalidate_wr.wr_id = (unsigned long)(void *)r; - invalidate_wr.opcode = IB_WR_LOCAL_INV; - invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey; - DECR_CQCOUNT(&r_xprt->rx_ep); - - dprintk("RPC: %s: frmr %p invalidating rkey %08x\n", - __func__, r, r->r.frmr.fr_mr->rkey); - - read_lock(&ia->ri_qplock); - rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); - read_unlock(&ia->ri_qplock); - if (rc) { - /* Force rpcrdma_buffer_get() to retry */ - r->r.frmr.fr_state = FRMR_IS_STALE; - dprintk("RPC: %s: ib_post_send failed, %i\n", - __func__, rc); - } -} - -static void -rpcrdma_retry_flushed_linv(struct list_head *stale, - struct rpcrdma_buffer *buf) -{ - struct rpcrdma_ia *ia = rdmab_to_ia(buf); - struct list_head *pos; - struct rpcrdma_mw *r; - unsigned long flags; - - list_for_each(pos, stale) { - r = list_entry(pos, struct rpcrdma_mw, mw_list); - rpcrdma_retry_local_inv(r, ia); - } - - spin_lock_irqsave(&buf->rb_lock, flags); - list_splice_tail(stale, &buf->rb_mws); - spin_unlock_irqrestore(&buf->rb_lock, flags); -} - -static struct rpcrdma_req * -rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf, - struct list_head *stale) -{ - struct rpcrdma_mw *r; - int i; - - i = RPCRDMA_MAX_SEGS - 1; - while (!list_empty(&buf->rb_mws)) { - r = list_entry(buf->rb_mws.next, - struct rpcrdma_mw, mw_list); - list_del(&r->mw_list); - if (r->r.frmr.fr_state == FRMR_IS_STALE) { - list_add(&r->mw_list, stale); - continue; - } - req->rl_segments[i].rl_mw = r; - if (unlikely(i-- == 0)) - return req; /* Success */ - } - - /* Not enough entries on rb_mws for this req */ - rpcrdma_buffer_put_sendbuf(req, buf); - rpcrdma_buffer_put_mrs(req, buf); - return NULL; -} - -static struct rpcrdma_req * -rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) -{ - struct rpcrdma_mw *r; - int i; - - i = RPCRDMA_MAX_SEGS - 1; - while (!list_empty(&buf->rb_mws)) { - r = list_entry(buf->rb_mws.next, - struct rpcrdma_mw, mw_list); - list_del(&r->mw_list); - req->rl_segments[i].rl_mw = r; - if (unlikely(i-- == 0)) - return req; /* Success */ - } - - /* Not enough entries on rb_mws for this req */ - rpcrdma_buffer_put_sendbuf(req, buf); - rpcrdma_buffer_put_mrs(req, buf); - return NULL; -} - /* * Get a set of request/reply buffers. * @@ -1253,12 +1141,11 @@ rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) struct rpcrdma_req * rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) { - struct rpcrdma_ia *ia = rdmab_to_ia(buffers); - struct list_head stale; struct rpcrdma_req *req; unsigned long flags; spin_lock_irqsave(&buffers->rb_lock, flags); + if (buffers->rb_send_index == buffers->rb_max_requests) { spin_unlock_irqrestore(&buffers->rb_lock, flags); dprintk("RPC: %s: out of request buffers\n", __func__); @@ -1277,20 +1164,7 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) } buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; - INIT_LIST_HEAD(&stale); - switch (ia->ri_memreg_strategy) { - case RPCRDMA_FRMR: - req = rpcrdma_buffer_get_frmrs(req, buffers, &stale); - break; - case RPCRDMA_MTHCAFMR: - req = rpcrdma_buffer_get_fmrs(req, buffers); - break; - default: - break; - } spin_unlock_irqrestore(&buffers->rb_lock, flags); - if (!list_empty(&stale)) - rpcrdma_retry_flushed_linv(&stale, buffers); return req; } @@ -1302,19 +1176,10 @@ void rpcrdma_buffer_put(struct rpcrdma_req *req) { struct rpcrdma_buffer *buffers = req->rl_buffer; - struct rpcrdma_ia *ia = rdmab_to_ia(buffers); unsigned long flags; spin_lock_irqsave(&buffers->rb_lock, flags); rpcrdma_buffer_put_sendbuf(req, buffers); - switch (ia->ri_memreg_strategy) { - case RPCRDMA_FRMR: - case RPCRDMA_MTHCAFMR: - rpcrdma_buffer_put_mrs(req, buffers); - break; - default: - break; - } spin_unlock_irqrestore(&buffers->rb_lock, flags); } @@ -1344,10 +1209,9 @@ rpcrdma_recv_buffer_get(struct rpcrdma_req *req) void rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) { - struct rpcrdma_buffer *buffers = rep->rr_buffer; + struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf; unsigned long flags; - rep->rr_func = NULL; spin_lock_irqsave(&buffers->rb_lock, flags); buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep; spin_unlock_irqrestore(&buffers->rb_lock, flags); @@ -1376,9 +1240,9 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, /* * All memory passed here was kmalloc'ed, therefore phys-contiguous. */ - iov->addr = ib_dma_map_single(ia->ri_id->device, + iov->addr = ib_dma_map_single(ia->ri_device, va, len, DMA_BIDIRECTIONAL); - if (ib_dma_mapping_error(ia->ri_id->device, iov->addr)) + if (ib_dma_mapping_error(ia->ri_device, iov->addr)) return -ENOMEM; iov->length = len; @@ -1422,8 +1286,8 @@ rpcrdma_deregister_internal(struct rpcrdma_ia *ia, { int rc; - ib_dma_unmap_single(ia->ri_id->device, - iov->addr, iov->length, DMA_BIDIRECTIONAL); + ib_dma_unmap_single(ia->ri_device, + iov->addr, iov->length, DMA_BIDIRECTIONAL); if (NULL == mr) return 0; @@ -1516,15 +1380,18 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, send_wr.num_sge = req->rl_niovs; send_wr.opcode = IB_WR_SEND; if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */ - ib_dma_sync_single_for_device(ia->ri_id->device, - req->rl_send_iov[3].addr, req->rl_send_iov[3].length, - DMA_TO_DEVICE); - ib_dma_sync_single_for_device(ia->ri_id->device, - req->rl_send_iov[1].addr, req->rl_send_iov[1].length, - DMA_TO_DEVICE); - ib_dma_sync_single_for_device(ia->ri_id->device, - req->rl_send_iov[0].addr, req->rl_send_iov[0].length, - DMA_TO_DEVICE); + ib_dma_sync_single_for_device(ia->ri_device, + req->rl_send_iov[3].addr, + req->rl_send_iov[3].length, + DMA_TO_DEVICE); + ib_dma_sync_single_for_device(ia->ri_device, + req->rl_send_iov[1].addr, + req->rl_send_iov[1].length, + DMA_TO_DEVICE); + ib_dma_sync_single_for_device(ia->ri_device, + req->rl_send_iov[0].addr, + req->rl_send_iov[0].length, + DMA_TO_DEVICE); if (DECR_CQCOUNT(ep) > 0) send_wr.send_flags = 0; @@ -1557,7 +1424,7 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; recv_wr.num_sge = 1; - ib_dma_sync_single_for_cpu(ia->ri_id->device, + ib_dma_sync_single_for_cpu(ia->ri_device, rdmab_addr(rep->rr_rdmabuf), rdmab_length(rep->rr_rdmabuf), DMA_BIDIRECTIONAL); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 58163b88738c..f49dd8b38122 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -62,6 +62,7 @@ struct rpcrdma_ia { const struct rpcrdma_memreg_ops *ri_ops; rwlock_t ri_qplock; + struct ib_device *ri_device; struct rdma_cm_id *ri_id; struct ib_pd *ri_pd; struct ib_mr *ri_bind_mem; @@ -69,7 +70,6 @@ struct rpcrdma_ia { int ri_have_dma_lkey; struct completion ri_done; int ri_async_rc; - enum rpcrdma_memreg ri_memreg_strategy; unsigned int ri_max_frmr_depth; struct ib_device_attr ri_devattr; struct ib_qp_attr ri_qp_attr; @@ -173,9 +173,8 @@ struct rpcrdma_buffer; struct rpcrdma_rep { unsigned int rr_len; - struct rpcrdma_buffer *rr_buffer; - struct rpc_xprt *rr_xprt; - void (*rr_func)(struct rpcrdma_rep *); + struct ib_device *rr_device; + struct rpcrdma_xprt *rr_rxprt; struct list_head rr_list; struct rpcrdma_regbuf *rr_rdmabuf; }; @@ -203,11 +202,18 @@ struct rpcrdma_frmr { struct ib_fast_reg_page_list *fr_pgl; struct ib_mr *fr_mr; enum rpcrdma_frmr_state fr_state; + struct work_struct fr_work; + struct rpcrdma_xprt *fr_xprt; +}; + +struct rpcrdma_fmr { + struct ib_fmr *fmr; + u64 *physaddrs; }; struct rpcrdma_mw { union { - struct ib_fmr *fmr; + struct rpcrdma_fmr fmr; struct rpcrdma_frmr frmr; } r; void (*mw_sendcompletion)(struct ib_wc *); @@ -281,15 +287,17 @@ rpcr_to_rdmar(struct rpc_rqst *rqst) * One of these is associated with a transport instance */ struct rpcrdma_buffer { - spinlock_t rb_lock; /* protects indexes */ - u32 rb_max_requests;/* client max requests */ - struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */ - struct list_head rb_all; - int rb_send_index; + spinlock_t rb_mwlock; /* protect rb_mws list */ + struct list_head rb_mws; + struct list_head rb_all; + char *rb_pool; + + spinlock_t rb_lock; /* protect buf arrays */ + u32 rb_max_requests; + int rb_send_index; + int rb_recv_index; struct rpcrdma_req **rb_send_bufs; - int rb_recv_index; struct rpcrdma_rep **rb_recv_bufs; - char *rb_pool; }; #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) @@ -350,7 +358,6 @@ struct rpcrdma_memreg_ops { struct rpcrdma_create_data_internal *); size_t (*ro_maxpages)(struct rpcrdma_xprt *); int (*ro_init)(struct rpcrdma_xprt *); - void (*ro_reset)(struct rpcrdma_xprt *); void (*ro_destroy)(struct rpcrdma_buffer *); const char *ro_displayname; }; @@ -413,6 +420,8 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *, int rpcrdma_buffer_create(struct rpcrdma_xprt *); void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); +struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *); +void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *); struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); void rpcrdma_buffer_put(struct rpcrdma_req *); void rpcrdma_recv_buffer_get(struct rpcrdma_req *); @@ -425,6 +434,9 @@ void rpcrdma_free_regbuf(struct rpcrdma_ia *, unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *); +int frwr_alloc_recovery_wq(void); +void frwr_destroy_recovery_wq(void); + /* * Wrappers for chunk registration, shared by read/write chunk code. */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index b0517287075b..e193c2b5476b 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -623,24 +623,6 @@ process_status: } /** - * xs_tcp_shutdown - gracefully shut down a TCP socket - * @xprt: transport - * - * Initiates a graceful shutdown of the TCP socket by calling the - * equivalent of shutdown(SHUT_RDWR); - */ -static void xs_tcp_shutdown(struct rpc_xprt *xprt) -{ - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - struct socket *sock = transport->sock; - - if (sock != NULL) { - kernel_sock_shutdown(sock, SHUT_RDWR); - trace_rpc_socket_shutdown(xprt, sock); - } -} - -/** * xs_tcp_send_request - write an RPC request to a TCP socket * @task: address of RPC task that manages the state of an RPC request * @@ -786,6 +768,7 @@ static void xs_sock_mark_closed(struct rpc_xprt *xprt) xs_sock_reset_connection_flags(xprt); /* Mark transport as closed and wake up all pending tasks */ xprt_disconnect_done(xprt); + xprt_force_disconnect(xprt); } /** @@ -827,6 +810,9 @@ static void xs_reset_transport(struct sock_xprt *transport) if (sk == NULL) return; + if (atomic_read(&transport->xprt.swapper)) + sk_clear_memalloc(sk); + write_lock_bh(&sk->sk_callback_lock); transport->inet = NULL; transport->sock = NULL; @@ -863,6 +849,13 @@ static void xs_close(struct rpc_xprt *xprt) xprt_disconnect_done(xprt); } +static void xs_inject_disconnect(struct rpc_xprt *xprt) +{ + dprintk("RPC: injecting transport disconnect on xprt=%p\n", + xprt); + xprt_disconnect_done(xprt); +} + static void xs_xprt_free(struct rpc_xprt *xprt) { xs_free_peer_addresses(xprt); @@ -901,7 +894,6 @@ static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) /** * xs_local_data_ready - "data ready" callback for AF_LOCAL sockets * @sk: socket with data to read - * @len: how much data to read * * Currently this assumes we can read the whole reply in a single gulp. */ @@ -965,7 +957,6 @@ static void xs_local_data_ready(struct sock *sk) /** * xs_udp_data_ready - "data ready" callback for UDP sockets * @sk: socket with data to read - * @len: how much data to read * */ static void xs_udp_data_ready(struct sock *sk) @@ -1389,7 +1380,6 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns /** * xs_tcp_data_ready - "data ready" callback for TCP sockets * @sk: socket with data to read - * @bytes: how much data to read * */ static void xs_tcp_data_ready(struct sock *sk) @@ -1886,9 +1876,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, /** * xs_local_setup_socket - create AF_LOCAL socket, connect to a local endpoint - * @xprt: RPC transport to connect * @transport: socket transport to connect - * @create_sock: function to create a socket of the correct type */ static int xs_local_setup_socket(struct sock_xprt *transport) { @@ -1960,43 +1948,84 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) msleep_interruptible(15000); } -#ifdef CONFIG_SUNRPC_SWAP +#if IS_ENABLED(CONFIG_SUNRPC_SWAP) +/* + * Note that this should be called with XPRT_LOCKED held (or when we otherwise + * know that we have exclusive access to the socket), to guard against + * races with xs_reset_transport. + */ static void xs_set_memalloc(struct rpc_xprt *xprt) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - if (xprt->swapper) + /* + * If there's no sock, then we have nothing to set. The + * reconnecting process will get it for us. + */ + if (!transport->inet) + return; + if (atomic_read(&xprt->swapper)) sk_set_memalloc(transport->inet); } /** - * xs_swapper - Tag this transport as being used for swap. + * xs_enable_swap - Tag this transport as being used for swap. * @xprt: transport to tag - * @enable: enable/disable * + * Take a reference to this transport on behalf of the rpc_clnt, and + * optionally mark it for swapping if it wasn't already. */ -int xs_swapper(struct rpc_xprt *xprt, int enable) +static int +xs_enable_swap(struct rpc_xprt *xprt) { - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, - xprt); - int err = 0; + struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt); - if (enable) { - xprt->swapper++; - xs_set_memalloc(xprt); - } else if (xprt->swapper) { - xprt->swapper--; - sk_clear_memalloc(transport->inet); - } + if (atomic_inc_return(&xprt->swapper) != 1) + return 0; + if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) + return -ERESTARTSYS; + if (xs->inet) + sk_set_memalloc(xs->inet); + xprt_release_xprt(xprt, NULL); + return 0; +} - return err; +/** + * xs_disable_swap - Untag this transport as being used for swap. + * @xprt: transport to tag + * + * Drop a "swapper" reference to this xprt on behalf of the rpc_clnt. If the + * swapper refcount goes to 0, untag the socket as a memalloc socket. + */ +static void +xs_disable_swap(struct rpc_xprt *xprt) +{ + struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt); + + if (!atomic_dec_and_test(&xprt->swapper)) + return; + if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) + return; + if (xs->inet) + sk_clear_memalloc(xs->inet); + xprt_release_xprt(xprt, NULL); } -EXPORT_SYMBOL_GPL(xs_swapper); #else static void xs_set_memalloc(struct rpc_xprt *xprt) { } + +static int +xs_enable_swap(struct rpc_xprt *xprt) +{ + return -EINVAL; +} + +static void +xs_disable_swap(struct rpc_xprt *xprt) +{ +} #endif static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) @@ -2057,6 +2086,27 @@ out: xprt_wake_pending_tasks(xprt, status); } +/** + * xs_tcp_shutdown - gracefully shut down a TCP socket + * @xprt: transport + * + * Initiates a graceful shutdown of the TCP socket by calling the + * equivalent of shutdown(SHUT_RDWR); + */ +static void xs_tcp_shutdown(struct rpc_xprt *xprt) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + struct socket *sock = transport->sock; + + if (sock == NULL) + return; + if (xprt_connected(xprt)) { + kernel_sock_shutdown(sock, SHUT_RDWR); + trace_rpc_socket_shutdown(xprt, sock); + } else + xs_reset_transport(transport); +} + static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -2067,6 +2117,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) unsigned int keepidle = xprt->timeout->to_initval / HZ; unsigned int keepcnt = xprt->timeout->to_retries + 1; unsigned int opt_on = 1; + unsigned int timeo; /* TCP Keepalive options */ kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, @@ -2078,6 +2129,12 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, (char *)&keepcnt, sizeof(keepcnt)); + /* TCP user timeout (see RFC5482) */ + timeo = jiffies_to_msecs(xprt->timeout->to_initval) * + (xprt->timeout->to_retries + 1); + kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT, + (char *)&timeo, sizeof(timeo)); + write_lock_bh(&sk->sk_callback_lock); xs_save_old_callbacks(transport, sk); @@ -2125,9 +2182,6 @@ out: /** * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint - * @xprt: RPC transport to connect - * @transport: socket transport to connect - * @create_sock: function to create a socket of the correct type * * Invoked by a work queue tasklet. */ @@ -2463,6 +2517,8 @@ static struct rpc_xprt_ops xs_local_ops = { .close = xs_close, .destroy = xs_destroy, .print_stats = xs_local_print_stats, + .enable_swap = xs_enable_swap, + .disable_swap = xs_disable_swap, }; static struct rpc_xprt_ops xs_udp_ops = { @@ -2482,6 +2538,9 @@ static struct rpc_xprt_ops xs_udp_ops = { .close = xs_close, .destroy = xs_destroy, .print_stats = xs_udp_print_stats, + .enable_swap = xs_enable_swap, + .disable_swap = xs_disable_swap, + .inject_disconnect = xs_inject_disconnect, }; static struct rpc_xprt_ops xs_tcp_ops = { @@ -2498,6 +2557,9 @@ static struct rpc_xprt_ops xs_tcp_ops = { .close = xs_tcp_shutdown, .destroy = xs_destroy, .print_stats = xs_tcp_print_stats, + .enable_swap = xs_enable_swap, + .disable_swap = xs_disable_swap, + .inject_disconnect = xs_inject_disconnect, }; /* @@ -2515,6 +2577,9 @@ static struct rpc_xprt_ops bc_tcp_ops = { .close = bc_close, .destroy = bc_destroy, .print_stats = xs_tcp_print_stats, + .enable_swap = xs_enable_swap, + .disable_swap = xs_disable_swap, + .inject_disconnect = xs_inject_disconnect, }; static int xs_init_anyaddr(const int family, struct sockaddr *sap) diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index f52abae0ec5f..aceaaed09811 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -86,7 +86,7 @@ $(simple-targets): $(obj)/conf PHONY += oldnoconfig savedefconfig defconfig # oldnoconfig is an alias of olddefconfig, because people already are dependent -# on its behavior(sets new symbols to their default value but not 'n') with the +# on its behavior (sets new symbols to their default value but not 'n') with the # counter-intuitive name. oldnoconfig: olddefconfig @@ -126,10 +126,11 @@ tinyconfig: # Help text used by make help help: @echo ' config - Update current config utilising a line-oriented program' - @echo ' nconfig - Update current config utilising a ncurses menu based program' + @echo ' nconfig - Update current config utilising a ncurses menu based' + @echo ' program' @echo ' menuconfig - Update current config utilising a menu based program' - @echo ' xconfig - Update current config utilising a QT based front-end' - @echo ' gconfig - Update current config utilising a GTK based front-end' + @echo ' xconfig - Update current config utilising a Qt based front-end' + @echo ' gconfig - Update current config utilising a GTK+ based front-end' @echo ' oldconfig - Update current config utilising a provided .config as base' @echo ' localmodconfig - Update current config disabling modules not loaded' @echo ' localyesconfig - Update current config converting local mods to core' @@ -142,7 +143,8 @@ help: @echo ' alldefconfig - New config with all symbols set to default' @echo ' randconfig - New config with random answer to all options' @echo ' listnewconfig - List new options' - @echo ' olddefconfig - Same as silentoldconfig but sets new symbols to their default value' + @echo ' olddefconfig - Same as silentoldconfig but sets new symbols to their' + @echo ' default value' @echo ' kvmconfig - Enable additional options for kvm guest kernel support' @echo ' xenconfig - Enable additional options for xen dom0 and guest kernel support' @echo ' tinyconfig - Configure the tiniest possible kernel' @@ -163,9 +165,9 @@ HOST_EXTRACFLAGS += $(shell $(CONFIG_SHELL) $(check-lxdialog) -ccflags) \ # mconf: Used for the menuconfig target # Utilizes the lxdialog package # qconf: Used for the xconfig target -# Based on QT which needs to be installed to compile it +# Based on Qt which needs to be installed to compile it # gconf: Used for the gconfig target -# Based on GTK which needs to be installed to compile it +# Based on GTK+ which needs to be installed to compile it # object files used by all kconfig flavours lxdialog := lxdialog/checklist.o lxdialog/util.o lxdialog/inputbox.o @@ -222,11 +224,11 @@ ifeq ($(MAKECMDGOALS),xconfig) $(obj)/.tmp_qtcheck: $(src)/Makefile -include $(obj)/.tmp_qtcheck -# QT needs some extra effort... +# Qt needs some extra effort... $(obj)/.tmp_qtcheck: @set -e; $(kecho) " CHECK qt"; dir=""; pkg=""; \ if ! pkg-config --exists QtCore 2> /dev/null; then \ - echo "* Unable to find the QT4 tool qmake. Trying to use QT3"; \ + echo "* Unable to find the Qt4 tool qmake. Trying to use Qt3"; \ pkg-config --exists qt 2> /dev/null && pkg=qt; \ pkg-config --exists qt-mt 2> /dev/null && pkg=qt-mt; \ if [ -n "$$pkg" ]; then \ @@ -240,8 +242,8 @@ $(obj)/.tmp_qtcheck: done; \ if [ -z "$$dir" ]; then \ echo >&2 "*"; \ - echo >&2 "* Unable to find any QT installation. Please make sure that"; \ - echo >&2 "* the QT4 or QT3 development package is correctly installed and"; \ + echo >&2 "* Unable to find any Qt installation. Please make sure that"; \ + echo >&2 "* the Qt4 or Qt3 development package is correctly installed and"; \ echo >&2 "* either qmake can be found or install pkg-config or set"; \ echo >&2 "* the QTDIR environment variable to the correct location."; \ echo >&2 "*"; \ @@ -278,7 +280,7 @@ $(obj)/gconf.o: $(obj)/.tmp_gtkcheck ifeq ($(MAKECMDGOALS),gconfig) -include $(obj)/.tmp_gtkcheck -# GTK needs some extra effort, too... +# GTK+ needs some extra effort, too... $(obj)/.tmp_gtkcheck: @if `pkg-config --exists gtk+-2.0 gmodule-2.0 libglade-2.0`; then \ if `pkg-config --atleast-version=2.0.0 gtk+-2.0`; then \ @@ -309,7 +311,7 @@ quiet_cmd_moc = MOC $@ $(obj)/%.moc: $(src)/%.h $(obj)/.tmp_qtcheck $(call cmd,moc) -# Extract gconf menu items for I18N support +# Extract gconf menu items for i18n support $(obj)/gconf.glade.h: $(obj)/gconf.glade $(Q)intltool-extract --type=gettext/glade --srcdir=$(srctree) \ $(obj)/gconf.glade diff --git a/scripts/kconfig/expr.c b/scripts/kconfig/expr.c index fb0a2a286dca..667d1aa23711 100644 --- a/scripts/kconfig/expr.c +++ b/scripts/kconfig/expr.c @@ -13,9 +13,6 @@ static int expr_eq(struct expr *e1, struct expr *e2); static struct expr *expr_eliminate_yn(struct expr *e); -static struct expr *expr_extract_eq_and(struct expr **ep1, struct expr **ep2); -static struct expr *expr_extract_eq_or(struct expr **ep1, struct expr **ep2); -static void expr_extract_eq(enum expr_type type, struct expr **ep, struct expr **ep1, struct expr **ep2); struct expr *expr_alloc_symbol(struct symbol *sym) { @@ -82,6 +79,10 @@ struct expr *expr_copy(const struct expr *org) e->left.expr = expr_copy(org->left.expr); break; case E_EQUAL: + case E_GEQ: + case E_GTH: + case E_LEQ: + case E_LTH: case E_UNEQUAL: e->left.sym = org->left.sym; e->right.sym = org->right.sym; @@ -114,6 +115,10 @@ void expr_free(struct expr *e) expr_free(e->left.expr); return; case E_EQUAL: + case E_GEQ: + case E_GTH: + case E_LEQ: + case E_LTH: case E_UNEQUAL: break; case E_OR: @@ -200,6 +205,10 @@ static int expr_eq(struct expr *e1, struct expr *e2) return 0; switch (e1->type) { case E_EQUAL: + case E_GEQ: + case E_GTH: + case E_LEQ: + case E_LTH: case E_UNEQUAL: return e1->left.sym == e2->left.sym && e1->right.sym == e2->right.sym; case E_SYMBOL: @@ -559,62 +568,6 @@ static void expr_eliminate_dups1(enum expr_type type, struct expr **ep1, struct #undef e2 } -static void expr_eliminate_dups2(enum expr_type type, struct expr **ep1, struct expr **ep2) -{ -#define e1 (*ep1) -#define e2 (*ep2) - struct expr *tmp, *tmp1, *tmp2; - - if (e1->type == type) { - expr_eliminate_dups2(type, &e1->left.expr, &e2); - expr_eliminate_dups2(type, &e1->right.expr, &e2); - return; - } - if (e2->type == type) { - expr_eliminate_dups2(type, &e1, &e2->left.expr); - expr_eliminate_dups2(type, &e1, &e2->right.expr); - } - if (e1 == e2) - return; - - switch (e1->type) { - case E_OR: - expr_eliminate_dups2(e1->type, &e1, &e1); - // (FOO || BAR) && (!FOO && !BAR) -> n - tmp1 = expr_transform(expr_alloc_one(E_NOT, expr_copy(e1))); - tmp2 = expr_copy(e2); - tmp = expr_extract_eq_and(&tmp1, &tmp2); - if (expr_is_yes(tmp1)) { - expr_free(e1); - e1 = expr_alloc_symbol(&symbol_no); - trans_count++; - } - expr_free(tmp2); - expr_free(tmp1); - expr_free(tmp); - break; - case E_AND: - expr_eliminate_dups2(e1->type, &e1, &e1); - // (FOO && BAR) || (!FOO || !BAR) -> y - tmp1 = expr_transform(expr_alloc_one(E_NOT, expr_copy(e1))); - tmp2 = expr_copy(e2); - tmp = expr_extract_eq_or(&tmp1, &tmp2); - if (expr_is_no(tmp1)) { - expr_free(e1); - e1 = expr_alloc_symbol(&symbol_yes); - trans_count++; - } - expr_free(tmp2); - expr_free(tmp1); - expr_free(tmp); - break; - default: - ; - } -#undef e1 -#undef e2 -} - struct expr *expr_eliminate_dups(struct expr *e) { int oldcount; @@ -627,7 +580,6 @@ struct expr *expr_eliminate_dups(struct expr *e) switch (e->type) { case E_OR: case E_AND: expr_eliminate_dups1(e->type, &e, &e); - expr_eliminate_dups2(e->type, &e, &e); default: ; } @@ -647,6 +599,10 @@ struct expr *expr_transform(struct expr *e) return NULL; switch (e->type) { case E_EQUAL: + case E_GEQ: + case E_GTH: + case E_LEQ: + case E_LTH: case E_UNEQUAL: case E_SYMBOL: case E_LIST: @@ -719,6 +675,22 @@ struct expr *expr_transform(struct expr *e) e = tmp; e->type = e->type == E_EQUAL ? E_UNEQUAL : E_EQUAL; break; + case E_LEQ: + case E_GEQ: + // !a<='x' -> a>'x' + tmp = e->left.expr; + free(e); + e = tmp; + e->type = e->type == E_LEQ ? E_GTH : E_LTH; + break; + case E_LTH: + case E_GTH: + // !a<'x' -> a>='x' + tmp = e->left.expr; + free(e); + e = tmp; + e->type = e->type == E_LTH ? E_GEQ : E_LEQ; + break; case E_OR: // !(a || b) -> !a && !b tmp = e->left.expr; @@ -789,6 +761,10 @@ int expr_contains_symbol(struct expr *dep, struct symbol *sym) case E_SYMBOL: return dep->left.sym == sym; case E_EQUAL: + case E_GEQ: + case E_GTH: + case E_LEQ: + case E_LTH: case E_UNEQUAL: return dep->left.sym == sym || dep->right.sym == sym; @@ -829,57 +805,6 @@ bool expr_depends_symbol(struct expr *dep, struct symbol *sym) return false; } -static struct expr *expr_extract_eq_and(struct expr **ep1, struct expr **ep2) -{ - struct expr *tmp = NULL; - expr_extract_eq(E_AND, &tmp, ep1, ep2); - if (tmp) { - *ep1 = expr_eliminate_yn(*ep1); - *ep2 = expr_eliminate_yn(*ep2); - } - return tmp; -} - -static struct expr *expr_extract_eq_or(struct expr **ep1, struct expr **ep2) -{ - struct expr *tmp = NULL; - expr_extract_eq(E_OR, &tmp, ep1, ep2); - if (tmp) { - *ep1 = expr_eliminate_yn(*ep1); - *ep2 = expr_eliminate_yn(*ep2); - } - return tmp; -} - -static void expr_extract_eq(enum expr_type type, struct expr **ep, struct expr **ep1, struct expr **ep2) -{ -#define e1 (*ep1) -#define e2 (*ep2) - if (e1->type == type) { - expr_extract_eq(type, ep, &e1->left.expr, &e2); - expr_extract_eq(type, ep, &e1->right.expr, &e2); - return; - } - if (e2->type == type) { - expr_extract_eq(type, ep, ep1, &e2->left.expr); - expr_extract_eq(type, ep, ep1, &e2->right.expr); - return; - } - if (expr_eq(e1, e2)) { - *ep = *ep ? expr_alloc_two(type, *ep, e1) : e1; - expr_free(e2); - if (type == E_AND) { - e1 = expr_alloc_symbol(&symbol_yes); - e2 = expr_alloc_symbol(&symbol_yes); - } else if (type == E_OR) { - e1 = expr_alloc_symbol(&symbol_no); - e2 = expr_alloc_symbol(&symbol_no); - } - } -#undef e1 -#undef e2 -} - struct expr *expr_trans_compare(struct expr *e, enum expr_type type, struct symbol *sym) { struct expr *e1, *e2; @@ -914,6 +839,10 @@ struct expr *expr_trans_compare(struct expr *e, enum expr_type type, struct symb case E_NOT: return expr_trans_compare(e->left.expr, type == E_EQUAL ? E_UNEQUAL : E_EQUAL, sym); case E_UNEQUAL: + case E_LTH: + case E_LEQ: + case E_GTH: + case E_GEQ: case E_EQUAL: if (type == E_EQUAL) { if (sym == &symbol_yes) @@ -941,10 +870,57 @@ struct expr *expr_trans_compare(struct expr *e, enum expr_type type, struct symb return NULL; } +enum string_value_kind { + k_string, + k_signed, + k_unsigned, + k_invalid +}; + +union string_value { + unsigned long long u; + signed long long s; +}; + +static enum string_value_kind expr_parse_string(const char *str, + enum symbol_type type, + union string_value *val) +{ + char *tail; + enum string_value_kind kind; + + errno = 0; + switch (type) { + case S_BOOLEAN: + case S_TRISTATE: + return k_string; + case S_INT: + val->s = strtoll(str, &tail, 10); + kind = k_signed; + break; + case S_HEX: + val->u = strtoull(str, &tail, 16); + kind = k_unsigned; + break; + case S_STRING: + case S_UNKNOWN: + val->s = strtoll(str, &tail, 0); + kind = k_signed; + break; + default: + return k_invalid; + } + return !errno && !*tail && tail > str && isxdigit(tail[-1]) + ? kind : k_string; +} + tristate expr_calc_value(struct expr *e) { tristate val1, val2; const char *str1, *str2; + enum string_value_kind k1 = k_string, k2 = k_string; + union string_value lval = {}, rval = {}; + int res; if (!e) return yes; @@ -965,21 +941,57 @@ tristate expr_calc_value(struct expr *e) val1 = expr_calc_value(e->left.expr); return EXPR_NOT(val1); case E_EQUAL: - sym_calc_value(e->left.sym); - sym_calc_value(e->right.sym); - str1 = sym_get_string_value(e->left.sym); - str2 = sym_get_string_value(e->right.sym); - return !strcmp(str1, str2) ? yes : no; + case E_GEQ: + case E_GTH: + case E_LEQ: + case E_LTH: case E_UNEQUAL: - sym_calc_value(e->left.sym); - sym_calc_value(e->right.sym); - str1 = sym_get_string_value(e->left.sym); - str2 = sym_get_string_value(e->right.sym); - return !strcmp(str1, str2) ? no : yes; + break; default: printf("expr_calc_value: %d?\n", e->type); return no; } + + sym_calc_value(e->left.sym); + sym_calc_value(e->right.sym); + str1 = sym_get_string_value(e->left.sym); + str2 = sym_get_string_value(e->right.sym); + + if (e->left.sym->type != S_STRING || e->right.sym->type != S_STRING) { + k1 = expr_parse_string(str1, e->left.sym->type, &lval); + k2 = expr_parse_string(str2, e->right.sym->type, &rval); + } + + if (k1 == k_string || k2 == k_string) + res = strcmp(str1, str2); + else if (k1 == k_invalid || k2 == k_invalid) { + if (e->type != E_EQUAL && e->type != E_UNEQUAL) { + printf("Cannot compare \"%s\" and \"%s\"\n", str1, str2); + return no; + } + res = strcmp(str1, str2); + } else if (k1 == k_unsigned || k2 == k_unsigned) + res = (lval.u > rval.u) - (lval.u < rval.u); + else /* if (k1 == k_signed && k2 == k_signed) */ + res = (lval.s > rval.s) - (lval.s < rval.s); + + switch(e->type) { + case E_EQUAL: + return res ? no : yes; + case E_GEQ: + return res >= 0 ? yes : no; + case E_GTH: + return res > 0 ? yes : no; + case E_LEQ: + return res <= 0 ? yes : no; + case E_LTH: + return res < 0 ? yes : no; + case E_UNEQUAL: + return res ? yes : no; + default: + printf("expr_calc_value: relation %d?\n", e->type); + return no; + } } static int expr_compare_type(enum expr_type t1, enum expr_type t2) @@ -987,6 +999,12 @@ static int expr_compare_type(enum expr_type t1, enum expr_type t2) if (t1 == t2) return 0; switch (t1) { + case E_LEQ: + case E_LTH: + case E_GEQ: + case E_GTH: + if (t2 == E_EQUAL || t2 == E_UNEQUAL) + return 1; case E_EQUAL: case E_UNEQUAL: if (t2 == E_NOT) @@ -1080,6 +1098,24 @@ void expr_print(struct expr *e, void (*fn)(void *, struct symbol *, const char * fn(data, NULL, "="); fn(data, e->right.sym, e->right.sym->name); break; + case E_LEQ: + case E_LTH: + if (e->left.sym->name) + fn(data, e->left.sym, e->left.sym->name); + else + fn(data, NULL, "<choice>"); + fn(data, NULL, e->type == E_LEQ ? "<=" : "<"); + fn(data, e->right.sym, e->right.sym->name); + break; + case E_GEQ: + case E_GTH: + if (e->left.sym->name) + fn(data, e->left.sym, e->left.sym->name); + else + fn(data, NULL, "<choice>"); + fn(data, NULL, e->type == E_LEQ ? ">=" : ">"); + fn(data, e->right.sym, e->right.sym->name); + break; case E_UNEQUAL: if (e->left.sym->name) fn(data, e->left.sym, e->left.sym->name); diff --git a/scripts/kconfig/expr.h b/scripts/kconfig/expr.h index a2fc96a2bd2c..973b6f733368 100644 --- a/scripts/kconfig/expr.h +++ b/scripts/kconfig/expr.h @@ -29,7 +29,9 @@ typedef enum tristate { } tristate; enum expr_type { - E_NONE, E_OR, E_AND, E_NOT, E_EQUAL, E_UNEQUAL, E_LIST, E_SYMBOL, E_RANGE + E_NONE, E_OR, E_AND, E_NOT, + E_EQUAL, E_UNEQUAL, E_LTH, E_LEQ, E_GTH, E_GEQ, + E_LIST, E_SYMBOL, E_RANGE }; union expr_data { diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c index 6731377f9bb2..70c5ee189dce 100644 --- a/scripts/kconfig/symbol.c +++ b/scripts/kconfig/symbol.c @@ -1166,6 +1166,10 @@ static struct symbol *sym_check_expr_deps(struct expr *e) case E_NOT: return sym_check_expr_deps(e->left.expr); case E_EQUAL: + case E_GEQ: + case E_GTH: + case E_LEQ: + case E_LTH: case E_UNEQUAL: sym = sym_check_deps(e->left.sym); if (sym) diff --git a/scripts/kconfig/zconf.l b/scripts/kconfig/zconf.l index 6c62d93b4ffb..200a3fe30091 100644 --- a/scripts/kconfig/zconf.l +++ b/scripts/kconfig/zconf.l @@ -122,6 +122,10 @@ n [A-Za-z0-9_] "!" return T_NOT; "=" return T_EQUAL; "!=" return T_UNEQUAL; + "<=" return T_LESS_EQUAL; + ">=" return T_GREATER_EQUAL; + "<" return T_LESS; + ">" return T_GREATER; \"|\' { str = yytext[0]; new_string(); @@ -141,7 +145,12 @@ n [A-Za-z0-9_] } #.* /* comment */ \\\n current_file->lineno++; - . + [[:blank:]]+ + . { + fprintf(stderr, + "%s:%d:warning: ignoring unsupported character '%c'\n", + zconf_curname(), zconf_lineno(), *yytext); + } <<EOF>> { BEGIN(INITIAL); } diff --git a/scripts/kconfig/zconf.lex.c_shipped b/scripts/kconfig/zconf.lex.c_shipped index 349a7f24315b..dd4e86c82521 100644 --- a/scripts/kconfig/zconf.lex.c_shipped +++ b/scripts/kconfig/zconf.lex.c_shipped @@ -365,323 +365,354 @@ int zconflineno = 1; extern char *zconftext; #define yytext_ptr zconftext -static yyconst flex_int16_t yy_nxt[][17] = +static yyconst flex_int16_t yy_nxt[][19] = { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0 + 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { 11, 12, 13, 14, 12, 12, 15, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 12 + 12, 12, 12, 12, 12, 12, 12, 12, 12 }, { 11, 12, 13, 14, 12, 12, 15, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 12 + 12, 12, 12, 12, 12, 12, 12, 12, 12 }, { 11, 16, 16, 17, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 18, 16, 16, 16 + 16, 16, 16, 18, 16, 16, 16, 16, 16 }, { 11, 16, 16, 17, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 18, 16, 16, 16 + 16, 16, 16, 18, 16, 16, 16, 16, 16 }, { 11, 19, 20, 21, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19 + 19, 19, 19, 19, 19, 19, 19, 19, 19 }, { 11, 19, 20, 21, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19 + 19, 19, 19, 19, 19, 19, 19, 19, 19 }, { 11, 22, 22, 23, 22, 24, 22, 22, 24, 22, - 22, 22, 22, 22, 22, 25, 22 + 22, 22, 22, 22, 22, 22, 22, 25, 22 }, { 11, 22, 22, 23, 22, 24, 22, 22, 24, 22, - 22, 22, 22, 22, 22, 25, 22 + 22, 22, 22, 22, 22, 22, 22, 25, 22 }, { - 11, 26, 26, 27, 28, 29, 30, 31, 29, 32, - 33, 34, 35, 35, 36, 37, 38 + 11, 26, 27, 28, 29, 30, 31, 32, 30, 33, + 34, 35, 36, 36, 37, 38, 39, 40, 41 }, { - 11, 26, 26, 27, 28, 29, 30, 31, 29, 32, - 33, 34, 35, 35, 36, 37, 38 + 11, 26, 27, 28, 29, 30, 31, 32, 30, 33, + 34, 35, 36, 36, 37, 38, 39, 40, 41 }, { -11, -11, -11, -11, -11, -11, -11, -11, -11, -11, - -11, -11, -11, -11, -11, -11, -11 + -11, -11, -11, -11, -11, -11, -11, -11, -11 }, { 11, -12, -12, -12, -12, -12, -12, -12, -12, -12, - -12, -12, -12, -12, -12, -12, -12 + -12, -12, -12, -12, -12, -12, -12, -12, -12 }, { - 11, -13, 39, 40, -13, -13, 41, -13, -13, -13, - -13, -13, -13, -13, -13, -13, -13 + 11, -13, 42, 43, -13, -13, 44, -13, -13, -13, + -13, -13, -13, -13, -13, -13, -13, -13, -13 }, { 11, -14, -14, -14, -14, -14, -14, -14, -14, -14, - -14, -14, -14, -14, -14, -14, -14 + -14, -14, -14, -14, -14, -14, -14, -14, -14 }, { - 11, 42, 42, 43, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42 + 11, 45, 45, 46, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45 }, { 11, -16, -16, -16, -16, -16, -16, -16, -16, -16, - -16, -16, -16, -16, -16, -16, -16 + -16, -16, -16, -16, -16, -16, -16, -16, -16 }, { 11, -17, -17, -17, -17, -17, -17, -17, -17, -17, - -17, -17, -17, -17, -17, -17, -17 + -17, -17, -17, -17, -17, -17, -17, -17, -17 }, { 11, -18, -18, -18, -18, -18, -18, -18, -18, -18, - -18, -18, -18, 44, -18, -18, -18 + -18, -18, -18, 47, -18, -18, -18, -18, -18 }, { - 11, 45, 45, -19, 45, 45, 45, 45, 45, 45, - 45, 45, 45, 45, 45, 45, 45 + 11, 48, 48, -19, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48 }, { - 11, -20, 46, 47, -20, -20, -20, -20, -20, -20, - -20, -20, -20, -20, -20, -20, -20 + 11, -20, 49, 50, -20, -20, -20, -20, -20, -20, + -20, -20, -20, -20, -20, -20, -20, -20, -20 }, { - 11, 48, -21, -21, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48 + 11, 51, -21, -21, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51 }, { - 11, 49, 49, 50, 49, -22, 49, 49, -22, 49, - 49, 49, 49, 49, 49, -22, 49 + 11, 52, 52, 53, 52, -22, 52, 52, -22, 52, + 52, 52, 52, 52, 52, 52, 52, -22, 52 }, { 11, -23, -23, -23, -23, -23, -23, -23, -23, -23, - -23, -23, -23, -23, -23, -23, -23 + -23, -23, -23, -23, -23, -23, -23, -23, -23 }, { 11, -24, -24, -24, -24, -24, -24, -24, -24, -24, - -24, -24, -24, -24, -24, -24, -24 + -24, -24, -24, -24, -24, -24, -24, -24, -24 }, { - 11, 51, 51, 52, 51, 51, 51, 51, 51, 51, - 51, 51, 51, 51, 51, 51, 51 + 11, 54, 54, 55, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, 54 }, { 11, -26, -26, -26, -26, -26, -26, -26, -26, -26, - -26, -26, -26, -26, -26, -26, -26 + -26, -26, -26, -26, -26, -26, -26, -26, -26 }, { - 11, -27, -27, -27, -27, -27, -27, -27, -27, -27, - -27, -27, -27, -27, -27, -27, -27 + 11, -27, 56, -27, -27, -27, -27, -27, -27, -27, + -27, -27, -27, -27, -27, -27, -27, -27, -27 }, { 11, -28, -28, -28, -28, -28, -28, -28, -28, -28, - -28, -28, -28, -28, 53, -28, -28 + -28, -28, -28, -28, -28, -28, -28, -28, -28 }, { 11, -29, -29, -29, -29, -29, -29, -29, -29, -29, - -29, -29, -29, -29, -29, -29, -29 + -29, -29, -29, -29, -29, 57, -29, -29, -29 }, { - 11, 54, 54, -30, 54, 54, 54, 54, 54, 54, - 54, 54, 54, 54, 54, 54, 54 + 11, -30, -30, -30, -30, -30, -30, -30, -30, -30, + -30, -30, -30, -30, -30, -30, -30, -30, -30 }, { - 11, -31, -31, -31, -31, -31, -31, 55, -31, -31, - -31, -31, -31, -31, -31, -31, -31 + 11, 58, 58, -31, 58, 58, 58, 58, 58, 58, + 58, 58, 58, 58, 58, 58, 58, 58, 58 }, { - 11, -32, -32, -32, -32, -32, -32, -32, -32, -32, - -32, -32, -32, -32, -32, -32, -32 + 11, -32, -32, -32, -32, -32, -32, 59, -32, -32, + -32, -32, -32, -32, -32, -32, -32, -32, -32 }, { 11, -33, -33, -33, -33, -33, -33, -33, -33, -33, - -33, -33, -33, -33, -33, -33, -33 + -33, -33, -33, -33, -33, -33, -33, -33, -33 }, { 11, -34, -34, -34, -34, -34, -34, -34, -34, -34, - -34, 56, 57, 57, -34, -34, -34 + -34, -34, -34, -34, -34, -34, -34, -34, -34 }, { 11, -35, -35, -35, -35, -35, -35, -35, -35, -35, - -35, 57, 57, 57, -35, -35, -35 + -35, 60, 61, 61, -35, -35, -35, -35, -35 }, { 11, -36, -36, -36, -36, -36, -36, -36, -36, -36, - -36, -36, -36, -36, -36, -36, -36 + -36, 61, 61, 61, -36, -36, -36, -36, -36 }, { - 11, -37, -37, 58, -37, -37, -37, -37, -37, -37, - -37, -37, -37, -37, -37, -37, -37 + 11, -37, -37, -37, -37, -37, -37, -37, -37, -37, + -37, -37, -37, -37, -37, 62, -37, -37, -37 }, { 11, -38, -38, -38, -38, -38, -38, -38, -38, -38, - -38, -38, -38, -38, -38, -38, 59 + -38, -38, -38, -38, -38, -38, -38, -38, -38 }, { - 11, -39, 39, 40, -39, -39, 41, -39, -39, -39, - -39, -39, -39, -39, -39, -39, -39 + 11, -39, -39, -39, -39, -39, -39, -39, -39, -39, + -39, -39, -39, -39, -39, 63, -39, -39, -39 }, { - 11, -40, -40, -40, -40, -40, -40, -40, -40, -40, - -40, -40, -40, -40, -40, -40, -40 + 11, -40, -40, 64, -40, -40, -40, -40, -40, -40, + -40, -40, -40, -40, -40, -40, -40, -40, -40 }, { - 11, 42, 42, 43, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42 + 11, -41, -41, -41, -41, -41, -41, -41, -41, -41, + -41, -41, -41, -41, -41, -41, -41, -41, 65 }, { - 11, 42, 42, 43, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42 + 11, -42, 42, 43, -42, -42, 44, -42, -42, -42, + -42, -42, -42, -42, -42, -42, -42, -42, -42 }, { 11, -43, -43, -43, -43, -43, -43, -43, -43, -43, - -43, -43, -43, -43, -43, -43, -43 + -43, -43, -43, -43, -43, -43, -43, -43, -43 }, { - 11, -44, -44, -44, -44, -44, -44, -44, -44, -44, - -44, -44, -44, 44, -44, -44, -44 + 11, 45, 45, 46, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45 }, { - 11, 45, 45, -45, 45, 45, 45, 45, 45, 45, - 45, 45, 45, 45, 45, 45, 45 + 11, 45, 45, 46, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45 }, { - 11, -46, 46, 47, -46, -46, -46, -46, -46, -46, - -46, -46, -46, -46, -46, -46, -46 + 11, -46, -46, -46, -46, -46, -46, -46, -46, -46, + -46, -46, -46, -46, -46, -46, -46, -46, -46 }, { - 11, 48, -47, -47, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48 + 11, -47, -47, -47, -47, -47, -47, -47, -47, -47, + -47, -47, -47, 47, -47, -47, -47, -47, -47 }, { - 11, -48, -48, -48, -48, -48, -48, -48, -48, -48, - -48, -48, -48, -48, -48, -48, -48 + 11, 48, 48, -48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48 }, { - 11, 49, 49, 50, 49, -49, 49, 49, -49, 49, - 49, 49, 49, 49, 49, -49, 49 + 11, -49, 49, 50, -49, -49, -49, -49, -49, -49, + -49, -49, -49, -49, -49, -49, -49, -49, -49 }, { - 11, -50, -50, -50, -50, -50, -50, -50, -50, -50, - -50, -50, -50, -50, -50, -50, -50 + 11, 51, -50, -50, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 51, 51, 51 }, { - 11, -51, -51, 52, -51, -51, -51, -51, -51, -51, - -51, -51, -51, -51, -51, -51, -51 + 11, -51, -51, -51, -51, -51, -51, -51, -51, -51, + -51, -51, -51, -51, -51, -51, -51, -51, -51 }, { - 11, -52, -52, -52, -52, -52, -52, -52, -52, -52, - -52, -52, -52, -52, -52, -52, -52 + 11, 52, 52, 53, 52, -52, 52, 52, -52, 52, + 52, 52, 52, 52, 52, 52, 52, -52, 52 }, { 11, -53, -53, -53, -53, -53, -53, -53, -53, -53, - -53, -53, -53, -53, -53, -53, -53 + -53, -53, -53, -53, -53, -53, -53, -53, -53 }, { - 11, 54, 54, -54, 54, 54, 54, 54, 54, 54, - 54, 54, 54, 54, 54, 54, 54 + 11, -54, -54, 55, -54, -54, -54, -54, -54, -54, + -54, -54, -54, -54, -54, -54, -54, -54, -54 }, { 11, -55, -55, -55, -55, -55, -55, -55, -55, -55, - -55, -55, -55, -55, -55, -55, -55 + -55, -55, -55, -55, -55, -55, -55, -55, -55 }, { - 11, -56, -56, -56, -56, -56, -56, -56, -56, -56, - -56, 60, 57, 57, -56, -56, -56 + 11, -56, 56, -56, -56, -56, -56, -56, -56, -56, + -56, -56, -56, -56, -56, -56, -56, -56, -56 }, { 11, -57, -57, -57, -57, -57, -57, -57, -57, -57, - -57, 57, 57, 57, -57, -57, -57 + -57, -57, -57, -57, -57, -57, -57, -57, -57 }, { - 11, -58, -58, -58, -58, -58, -58, -58, -58, -58, - -58, -58, -58, -58, -58, -58, -58 + 11, 58, 58, -58, 58, 58, 58, 58, 58, 58, + 58, 58, 58, 58, 58, 58, 58, 58, 58 }, { 11, -59, -59, -59, -59, -59, -59, -59, -59, -59, - -59, -59, -59, -59, -59, -59, -59 + -59, -59, -59, -59, -59, -59, -59, -59, -59 }, { 11, -60, -60, -60, -60, -60, -60, -60, -60, -60, - -60, 57, 57, 57, -60, -60, -60 + -60, 66, 61, 61, -60, -60, -60, -60, -60 + }, + + { + 11, -61, -61, -61, -61, -61, -61, -61, -61, -61, + -61, 61, 61, 61, -61, -61, -61, -61, -61 + }, + + { + 11, -62, -62, -62, -62, -62, -62, -62, -62, -62, + -62, -62, -62, -62, -62, -62, -62, -62, -62 + }, + + { + 11, -63, -63, -63, -63, -63, -63, -63, -63, -63, + -63, -63, -63, -63, -63, -63, -63, -63, -63 + }, + + { + 11, -64, -64, -64, -64, -64, -64, -64, -64, -64, + -64, -64, -64, -64, -64, -64, -64, -64, -64 + + }, + + { + 11, -65, -65, -65, -65, -65, -65, -65, -65, -65, + -65, -65, -65, -65, -65, -65, -65, -65, -65 + }, + + { + 11, -66, -66, -66, -66, -66, -66, -66, -66, -66, + -66, 61, 61, 61, -66, -66, -66, -66, -66 }, } ; @@ -701,8 +732,8 @@ static void yy_fatal_error (yyconst char msg[] ); *yy_cp = '\0'; \ (yy_c_buf_p) = yy_cp; -#define YY_NUM_RULES 33 -#define YY_END_OF_BUFFER 34 +#define YY_NUM_RULES 38 +#define YY_END_OF_BUFFER 39 /* This struct is not used in this scanner, but its presence is necessary. */ struct yy_trans_info @@ -710,14 +741,15 @@ struct yy_trans_info flex_int32_t yy_verify; flex_int32_t yy_nxt; }; -static yyconst flex_int16_t yy_accept[61] = +static yyconst flex_int16_t yy_accept[67] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 34, 5, 4, 2, 3, 7, 8, 6, 32, 29, - 31, 24, 28, 27, 26, 22, 17, 13, 16, 20, - 22, 11, 12, 19, 19, 14, 22, 22, 4, 2, - 3, 3, 1, 6, 32, 29, 31, 30, 24, 23, - 26, 25, 15, 20, 9, 19, 19, 21, 10, 18 + 39, 5, 4, 2, 3, 7, 8, 6, 37, 34, + 36, 29, 33, 32, 31, 27, 26, 21, 13, 20, + 24, 27, 11, 12, 23, 23, 18, 14, 19, 27, + 27, 4, 2, 3, 3, 1, 6, 37, 34, 36, + 35, 29, 28, 31, 30, 26, 15, 24, 9, 23, + 23, 16, 17, 25, 10, 22 } ; static yyconst flex_int32_t yy_ec[256] = @@ -727,15 +759,15 @@ static yyconst flex_int32_t yy_ec[256] = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 5, 6, 1, 1, 7, 8, 9, 10, 1, 1, 1, 11, 12, 12, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 1, 1, 1, - 14, 1, 1, 1, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 1, 1, 14, + 15, 16, 1, 1, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 1, 15, 1, 1, 13, 1, 13, 13, 13, 13, + 1, 17, 1, 1, 13, 1, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 1, 16, 1, 1, 1, 1, 1, 1, + 13, 13, 1, 18, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -920,7 +952,7 @@ static int input (void ); /* This used to be an fputs(), but since the string might contain NUL's, * we now use fwrite(). */ -#define ECHO do { if (fwrite( zconftext, zconfleng, 1, zconfout )) {} } while (0) +#define ECHO fwrite( zconftext, zconfleng, 1, zconfout ) #endif /* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, @@ -1142,22 +1174,38 @@ return T_UNEQUAL; YY_BREAK case 16: YY_RULE_SETUP +return T_LESS_EQUAL; + YY_BREAK +case 17: +YY_RULE_SETUP +return T_GREATER_EQUAL; + YY_BREAK +case 18: +YY_RULE_SETUP +return T_LESS; + YY_BREAK +case 19: +YY_RULE_SETUP +return T_GREATER; + YY_BREAK +case 20: +YY_RULE_SETUP { str = zconftext[0]; new_string(); BEGIN(STRING); } YY_BREAK -case 17: -/* rule 17 can match eol */ +case 21: +/* rule 21 can match eol */ YY_RULE_SETUP BEGIN(INITIAL); current_file->lineno++; return T_EOL; YY_BREAK -case 18: +case 22: YY_RULE_SETUP /* ignore */ YY_BREAK -case 19: +case 23: YY_RULE_SETUP { const struct kconf_id *id = kconf_id_lookup(zconftext, zconfleng); @@ -1170,27 +1218,35 @@ YY_RULE_SETUP return T_WORD; } YY_BREAK -case 20: +case 24: YY_RULE_SETUP /* comment */ YY_BREAK -case 21: -/* rule 21 can match eol */ +case 25: +/* rule 25 can match eol */ YY_RULE_SETUP current_file->lineno++; YY_BREAK -case 22: +case 26: YY_RULE_SETUP YY_BREAK +case 27: +YY_RULE_SETUP +{ + fprintf(stderr, + "%s:%d:warning: ignoring unsupported character '%c'\n", + zconf_curname(), zconf_lineno(), *zconftext); + } + YY_BREAK case YY_STATE_EOF(PARAM): { BEGIN(INITIAL); } YY_BREAK -case 23: -/* rule 23 can match eol */ +case 28: +/* rule 28 can match eol */ *yy_cp = (yy_hold_char); /* undo effects of setting up zconftext */ (yy_c_buf_p) = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up zconftext again */ @@ -1201,14 +1257,14 @@ YY_RULE_SETUP return T_WORD_QUOTE; } YY_BREAK -case 24: +case 29: YY_RULE_SETUP { append_string(zconftext, zconfleng); } YY_BREAK -case 25: -/* rule 25 can match eol */ +case 30: +/* rule 30 can match eol */ *yy_cp = (yy_hold_char); /* undo effects of setting up zconftext */ (yy_c_buf_p) = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up zconftext again */ @@ -1219,13 +1275,13 @@ YY_RULE_SETUP return T_WORD_QUOTE; } YY_BREAK -case 26: +case 31: YY_RULE_SETUP { append_string(zconftext + 1, zconfleng - 1); } YY_BREAK -case 27: +case 32: YY_RULE_SETUP { if (str == zconftext[0]) { @@ -1236,8 +1292,8 @@ YY_RULE_SETUP append_string(zconftext, 1); } YY_BREAK -case 28: -/* rule 28 can match eol */ +case 33: +/* rule 33 can match eol */ YY_RULE_SETUP { printf("%s:%d:warning: multi-line strings not supported\n", zconf_curname(), zconf_lineno()); @@ -1252,7 +1308,7 @@ case YY_STATE_EOF(STRING): } YY_BREAK -case 29: +case 34: YY_RULE_SETUP { ts = 0; @@ -1277,8 +1333,8 @@ YY_RULE_SETUP } } YY_BREAK -case 30: -/* rule 30 can match eol */ +case 35: +/* rule 35 can match eol */ *yy_cp = (yy_hold_char); /* undo effects of setting up zconftext */ (yy_c_buf_p) = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up zconftext again */ @@ -1289,15 +1345,15 @@ YY_RULE_SETUP return T_HELPTEXT; } YY_BREAK -case 31: -/* rule 31 can match eol */ +case 36: +/* rule 36 can match eol */ YY_RULE_SETUP { current_file->lineno++; append_string("\n", 1); } YY_BREAK -case 32: +case 37: YY_RULE_SETUP { while (zconfleng) { @@ -1328,7 +1384,7 @@ case YY_STATE_EOF(COMMAND): yyterminate(); } YY_BREAK -case 33: +case 38: YY_RULE_SETUP YY_FATAL_ERROR( "flex scanner jammed" ); YY_BREAK diff --git a/scripts/kconfig/zconf.tab.c_shipped b/scripts/kconfig/zconf.tab.c_shipped index de5e84ed3f96..7a4d658c2066 100644 --- a/scripts/kconfig/zconf.tab.c_shipped +++ b/scripts/kconfig/zconf.tab.c_shipped @@ -1,8 +1,8 @@ -/* A Bison parser, made by GNU Bison 2.5. */ +/* A Bison parser, made by GNU Bison 2.5.1. */ /* Bison implementation for Yacc-like parsers in C - Copyright (C) 1984, 1989-1990, 2000-2011 Free Software Foundation, Inc. + Copyright (C) 1984, 1989-1990, 2000-2012 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -44,7 +44,7 @@ #define YYBISON 1 /* Bison version. */ -#define YYBISON_VERSION "2.5" +#define YYBISON_VERSION "2.5.1" /* Skeleton name. */ #define YYSKELETON_NAME "yacc.c" @@ -108,6 +108,14 @@ static struct menu *current_menu, *current_entry; +# ifndef YY_NULL +# if defined __cplusplus && 201103L <= __cplusplus +# define YY_NULL nullptr +# else +# define YY_NULL 0 +# endif +# endif + /* Enabling traces. */ #ifndef YYDEBUG # define YYDEBUG 1 @@ -159,13 +167,17 @@ static struct menu *current_menu, *current_entry; T_WORD = 281, T_WORD_QUOTE = 282, T_UNEQUAL = 283, - T_CLOSE_PAREN = 284, - T_OPEN_PAREN = 285, - T_EOL = 286, - T_OR = 287, - T_AND = 288, - T_EQUAL = 289, - T_NOT = 290 + T_LESS = 284, + T_LESS_EQUAL = 285, + T_GREATER = 286, + T_GREATER_EQUAL = 287, + T_CLOSE_PAREN = 288, + T_OPEN_PAREN = 289, + T_EOL = 290, + T_OR = 291, + T_AND = 292, + T_EQUAL = 293, + T_NOT = 294 }; #endif @@ -304,6 +316,7 @@ YYID (yyi) # if ! defined _ALLOCA_H && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \ || defined __cplusplus || defined _MSC_VER) # include <stdlib.h> /* INFRINGES ON USER NAME SPACE */ + /* Use EXIT_SUCCESS as a witness for stdlib.h. */ # ifndef EXIT_SUCCESS # define EXIT_SUCCESS 0 # endif @@ -395,20 +408,20 @@ union yyalloc #endif #if defined YYCOPY_NEEDED && YYCOPY_NEEDED -/* Copy COUNT objects from FROM to TO. The source and destination do +/* Copy COUNT objects from SRC to DST. The source and destination do not overlap. */ # ifndef YYCOPY # if defined __GNUC__ && 1 < __GNUC__ -# define YYCOPY(To, From, Count) \ - __builtin_memcpy (To, From, (Count) * sizeof (*(From))) +# define YYCOPY(Dst, Src, Count) \ + __builtin_memcpy (Dst, Src, (Count) * sizeof (*(Src))) # else -# define YYCOPY(To, From, Count) \ - do \ - { \ - YYSIZE_T yyi; \ - for (yyi = 0; yyi < (Count); yyi++) \ - (To)[yyi] = (From)[yyi]; \ - } \ +# define YYCOPY(Dst, Src, Count) \ + do \ + { \ + YYSIZE_T yyi; \ + for (yyi = 0; yyi < (Count); yyi++) \ + (Dst)[yyi] = (Src)[yyi]; \ + } \ while (YYID (0)) # endif # endif @@ -417,20 +430,20 @@ union yyalloc /* YYFINAL -- State number of the termination state. */ #define YYFINAL 11 /* YYLAST -- Last index in YYTABLE. */ -#define YYLAST 290 +#define YYLAST 298 /* YYNTOKENS -- Number of terminals. */ -#define YYNTOKENS 36 +#define YYNTOKENS 40 /* YYNNTS -- Number of nonterminals. */ #define YYNNTS 50 /* YYNRULES -- Number of rules. */ -#define YYNRULES 118 +#define YYNRULES 122 /* YYNRULES -- Number of states. */ -#define YYNSTATES 191 +#define YYNSTATES 199 /* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */ #define YYUNDEFTOK 2 -#define YYMAXUTOK 290 +#define YYMAXUTOK 294 #define YYTRANSLATE(YYX) \ ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK) @@ -467,7 +480,7 @@ static const yytype_uint8 yytranslate[] = 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, - 35 + 35, 36, 37, 38, 39 }; #if YYDEBUG @@ -486,64 +499,67 @@ static const yytype_uint16 yyprhs[] = 235, 238, 241, 244, 248, 252, 255, 258, 261, 262, 265, 268, 271, 276, 277, 280, 283, 286, 287, 290, 292, 294, 297, 300, 303, 305, 308, 309, 312, 314, - 318, 322, 326, 329, 333, 337, 339, 341, 342 + 318, 322, 326, 330, 334, 338, 342, 345, 349, 353, + 355, 357, 358 }; /* YYRHS -- A `-1'-separated list of the rules' RHS. */ static const yytype_int8 yyrhs[] = { - 37, 0, -1, 81, 38, -1, 38, -1, 63, 39, - -1, 39, -1, -1, 39, 41, -1, 39, 55, -1, - 39, 67, -1, 39, 80, -1, 39, 26, 1, 31, - -1, 39, 40, 1, 31, -1, 39, 1, 31, -1, + 41, 0, -1, 85, 42, -1, 42, -1, 67, 43, + -1, 43, -1, -1, 43, 45, -1, 43, 59, -1, + 43, 71, -1, 43, 84, -1, 43, 26, 1, 35, + -1, 43, 44, 1, 35, -1, 43, 1, 35, -1, 16, -1, 18, -1, 19, -1, 21, -1, 17, -1, - 22, -1, 20, -1, 23, -1, 31, -1, 61, -1, - 71, -1, 44, -1, 46, -1, 69, -1, 26, 1, - 31, -1, 1, 31, -1, 10, 26, 31, -1, 43, - 47, -1, 11, 26, 31, -1, 45, 47, -1, -1, - 47, 48, -1, 47, 49, -1, 47, 75, -1, 47, - 73, -1, 47, 42, -1, 47, 31, -1, 19, 78, - 31, -1, 18, 79, 82, 31, -1, 20, 83, 82, - 31, -1, 21, 26, 82, 31, -1, 22, 84, 84, - 82, 31, -1, 24, 50, 31, -1, -1, 50, 26, - 51, -1, -1, 34, 79, -1, 7, 85, 31, -1, - 52, 56, -1, 80, -1, 53, 58, 54, -1, -1, - 56, 57, -1, 56, 75, -1, 56, 73, -1, 56, - 31, -1, 56, 42, -1, 18, 79, 82, 31, -1, - 19, 78, 31, -1, 17, 31, -1, 20, 26, 82, - 31, -1, -1, 58, 41, -1, 14, 83, 81, -1, - 80, -1, 59, 62, 60, -1, -1, 62, 41, -1, - 62, 67, -1, 62, 55, -1, 3, 79, 81, -1, - 4, 79, 31, -1, 64, 76, 74, -1, 80, -1, - 65, 68, 66, -1, -1, 68, 41, -1, 68, 67, - -1, 68, 55, -1, 6, 79, 31, -1, 9, 79, - 31, -1, 70, 74, -1, 12, 31, -1, 72, 13, - -1, -1, 74, 75, -1, 74, 31, -1, 74, 42, - -1, 16, 25, 83, 31, -1, -1, 76, 77, -1, - 76, 31, -1, 23, 82, -1, -1, 79, 82, -1, - 26, -1, 27, -1, 5, 31, -1, 8, 31, -1, - 15, 31, -1, 31, -1, 81, 31, -1, -1, 14, - 83, -1, 84, -1, 84, 34, 84, -1, 84, 28, - 84, -1, 30, 83, 29, -1, 35, 83, -1, 83, - 32, 83, -1, 83, 33, 83, -1, 26, -1, 27, - -1, -1, 26, -1 + 22, -1, 20, -1, 23, -1, 35, -1, 65, -1, + 75, -1, 48, -1, 50, -1, 73, -1, 26, 1, + 35, -1, 1, 35, -1, 10, 26, 35, -1, 47, + 51, -1, 11, 26, 35, -1, 49, 51, -1, -1, + 51, 52, -1, 51, 53, -1, 51, 79, -1, 51, + 77, -1, 51, 46, -1, 51, 35, -1, 19, 82, + 35, -1, 18, 83, 86, 35, -1, 20, 87, 86, + 35, -1, 21, 26, 86, 35, -1, 22, 88, 88, + 86, 35, -1, 24, 54, 35, -1, -1, 54, 26, + 55, -1, -1, 38, 83, -1, 7, 89, 35, -1, + 56, 60, -1, 84, -1, 57, 62, 58, -1, -1, + 60, 61, -1, 60, 79, -1, 60, 77, -1, 60, + 35, -1, 60, 46, -1, 18, 83, 86, 35, -1, + 19, 82, 35, -1, 17, 35, -1, 20, 26, 86, + 35, -1, -1, 62, 45, -1, 14, 87, 85, -1, + 84, -1, 63, 66, 64, -1, -1, 66, 45, -1, + 66, 71, -1, 66, 59, -1, 3, 83, 85, -1, + 4, 83, 35, -1, 68, 80, 78, -1, 84, -1, + 69, 72, 70, -1, -1, 72, 45, -1, 72, 71, + -1, 72, 59, -1, 6, 83, 35, -1, 9, 83, + 35, -1, 74, 78, -1, 12, 35, -1, 76, 13, + -1, -1, 78, 79, -1, 78, 35, -1, 78, 46, + -1, 16, 25, 87, 35, -1, -1, 80, 81, -1, + 80, 35, -1, 23, 86, -1, -1, 83, 86, -1, + 26, -1, 27, -1, 5, 35, -1, 8, 35, -1, + 15, 35, -1, 35, -1, 85, 35, -1, -1, 14, + 87, -1, 88, -1, 88, 29, 88, -1, 88, 30, + 88, -1, 88, 31, 88, -1, 88, 32, 88, -1, + 88, 38, 88, -1, 88, 28, 88, -1, 34, 87, + 33, -1, 39, 87, -1, 87, 36, 87, -1, 87, + 37, 87, -1, 26, -1, 27, -1, -1, 26, -1 }; /* YYRLINE[YYN] -- source line where rule number YYN was defined. */ static const yytype_uint16 yyrline[] = { - 0, 103, 103, 103, 105, 105, 107, 109, 110, 111, - 112, 113, 114, 118, 122, 122, 122, 122, 122, 122, - 122, 122, 126, 127, 128, 129, 130, 131, 135, 136, - 142, 150, 156, 164, 174, 176, 177, 178, 179, 180, - 181, 184, 192, 198, 208, 214, 220, 223, 225, 236, - 237, 242, 251, 256, 264, 267, 269, 270, 271, 272, - 273, 276, 282, 293, 299, 309, 311, 316, 324, 332, - 335, 337, 338, 339, 344, 351, 358, 363, 371, 374, - 376, 377, 378, 381, 389, 396, 403, 409, 416, 418, - 419, 420, 423, 431, 433, 434, 437, 444, 446, 451, - 452, 455, 456, 457, 461, 462, 465, 466, 469, 470, - 471, 472, 473, 474, 475, 478, 479, 482, 483 + 0, 108, 108, 108, 110, 110, 112, 114, 115, 116, + 117, 118, 119, 123, 127, 127, 127, 127, 127, 127, + 127, 127, 131, 132, 133, 134, 135, 136, 140, 141, + 147, 155, 161, 169, 179, 181, 182, 183, 184, 185, + 186, 189, 197, 203, 213, 219, 225, 228, 230, 241, + 242, 247, 256, 261, 269, 272, 274, 275, 276, 277, + 278, 281, 287, 298, 304, 314, 316, 321, 329, 337, + 340, 342, 343, 344, 349, 356, 363, 368, 376, 379, + 381, 382, 383, 386, 394, 401, 408, 414, 421, 423, + 424, 425, 428, 436, 438, 439, 442, 449, 451, 456, + 457, 460, 461, 462, 466, 467, 470, 471, 474, 475, + 476, 477, 478, 479, 480, 481, 482, 483, 484, 487, + 488, 491, 492 }; #endif @@ -557,6 +573,7 @@ static const char *const yytname[] = "T_MENUCONFIG", "T_HELP", "T_HELPTEXT", "T_IF", "T_ENDIF", "T_DEPENDS", "T_OPTIONAL", "T_PROMPT", "T_TYPE", "T_DEFAULT", "T_SELECT", "T_RANGE", "T_VISIBLE", "T_OPTION", "T_ON", "T_WORD", "T_WORD_QUOTE", "T_UNEQUAL", + "T_LESS", "T_LESS_EQUAL", "T_GREATER", "T_GREATER_EQUAL", "T_CLOSE_PAREN", "T_OPEN_PAREN", "T_EOL", "T_OR", "T_AND", "T_EQUAL", "T_NOT", "$accept", "input", "start", "stmt_list", "option_name", "common_stmt", "option_error", "config_entry_start", "config_stmt", @@ -568,7 +585,7 @@ static const char *const yytname[] = "menu_entry", "menu_end", "menu_stmt", "menu_block", "source_stmt", "comment", "comment_stmt", "help_start", "help", "depends_list", "depends", "visibility_list", "visible", "prompt_stmt_opt", "prompt", - "end", "nl", "if_expr", "expr", "symbol", "word_opt", 0 + "end", "nl", "if_expr", "expr", "symbol", "word_opt", YY_NULL }; #endif @@ -580,25 +597,26 @@ static const yytype_uint16 yytoknum[] = 0, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, - 285, 286, 287, 288, 289, 290 + 285, 286, 287, 288, 289, 290, 291, 292, 293, 294 }; # endif /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ static const yytype_uint8 yyr1[] = { - 0, 36, 37, 37, 38, 38, 39, 39, 39, 39, - 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, - 40, 40, 41, 41, 41, 41, 41, 41, 42, 42, - 43, 44, 45, 46, 47, 47, 47, 47, 47, 47, - 47, 48, 48, 48, 48, 48, 49, 50, 50, 51, - 51, 52, 53, 54, 55, 56, 56, 56, 56, 56, - 56, 57, 57, 57, 57, 58, 58, 59, 60, 61, - 62, 62, 62, 62, 63, 64, 65, 66, 67, 68, - 68, 68, 68, 69, 70, 71, 72, 73, 74, 74, - 74, 74, 75, 76, 76, 76, 77, 78, 78, 79, - 79, 80, 80, 80, 81, 81, 82, 82, 83, 83, - 83, 83, 83, 83, 83, 84, 84, 85, 85 + 0, 40, 41, 41, 42, 42, 43, 43, 43, 43, + 43, 43, 43, 43, 44, 44, 44, 44, 44, 44, + 44, 44, 45, 45, 45, 45, 45, 45, 46, 46, + 47, 48, 49, 50, 51, 51, 51, 51, 51, 51, + 51, 52, 52, 52, 52, 52, 53, 54, 54, 55, + 55, 56, 57, 58, 59, 60, 60, 60, 60, 60, + 60, 61, 61, 61, 61, 62, 62, 63, 64, 65, + 66, 66, 66, 66, 67, 68, 69, 70, 71, 72, + 72, 72, 72, 73, 74, 75, 76, 77, 78, 78, + 78, 78, 79, 80, 80, 80, 81, 82, 82, 83, + 83, 84, 84, 84, 85, 85, 86, 86, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, 87, 88, + 88, 89, 89 }; /* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */ @@ -615,7 +633,8 @@ static const yytype_uint8 yyr2[] = 2, 2, 2, 3, 3, 2, 2, 2, 0, 2, 2, 2, 4, 0, 2, 2, 2, 0, 2, 1, 1, 2, 2, 2, 1, 2, 0, 2, 1, 3, - 3, 3, 2, 3, 3, 1, 1, 0, 1 + 3, 3, 3, 3, 3, 3, 2, 3, 3, 1, + 1, 0, 1 }; /* YYDEFACT[STATE-NAME] -- Default reduction number in state STATE-NUM. @@ -624,72 +643,72 @@ static const yytype_uint8 yyr2[] = static const yytype_uint8 yydefact[] = { 6, 0, 104, 0, 3, 0, 6, 6, 99, 100, - 0, 1, 0, 0, 0, 0, 117, 0, 0, 0, + 0, 1, 0, 0, 0, 0, 121, 0, 0, 0, 0, 0, 0, 14, 18, 15, 16, 20, 17, 19, 21, 0, 22, 0, 7, 34, 25, 34, 26, 55, 65, 8, 70, 23, 93, 79, 9, 27, 88, 24, - 10, 0, 105, 2, 74, 13, 0, 101, 0, 118, - 0, 102, 0, 0, 0, 115, 116, 0, 0, 0, + 10, 0, 105, 2, 74, 13, 0, 101, 0, 122, + 0, 102, 0, 0, 0, 119, 120, 0, 0, 0, 108, 103, 0, 0, 0, 0, 0, 0, 0, 88, - 0, 0, 75, 83, 51, 84, 30, 32, 0, 112, - 0, 0, 67, 0, 0, 11, 12, 0, 0, 0, - 0, 97, 0, 0, 0, 47, 0, 40, 39, 35, - 36, 0, 38, 37, 0, 0, 97, 0, 59, 60, - 56, 58, 57, 66, 54, 53, 71, 73, 69, 72, - 68, 106, 95, 0, 94, 80, 82, 78, 81, 77, - 90, 91, 89, 111, 113, 114, 110, 109, 29, 86, - 0, 106, 0, 106, 106, 106, 0, 0, 0, 87, - 63, 106, 0, 106, 0, 96, 0, 0, 41, 98, - 0, 0, 106, 49, 46, 28, 0, 62, 0, 107, - 92, 42, 43, 44, 0, 0, 48, 61, 64, 45, - 50 + 0, 0, 75, 83, 51, 84, 30, 32, 0, 116, + 0, 0, 67, 0, 0, 0, 0, 0, 0, 11, + 12, 0, 0, 0, 0, 97, 0, 0, 0, 47, + 0, 40, 39, 35, 36, 0, 38, 37, 0, 0, + 97, 0, 59, 60, 56, 58, 57, 66, 54, 53, + 71, 73, 69, 72, 68, 106, 95, 0, 94, 80, + 82, 78, 81, 77, 90, 91, 89, 115, 117, 118, + 114, 109, 110, 111, 112, 113, 29, 86, 0, 106, + 0, 106, 106, 106, 0, 0, 0, 87, 63, 106, + 0, 106, 0, 96, 0, 0, 41, 98, 0, 0, + 106, 49, 46, 28, 0, 62, 0, 107, 92, 42, + 43, 44, 0, 0, 48, 61, 64, 45, 50 }; /* YYDEFGOTO[NTERM-NUM]. */ static const yytype_int16 yydefgoto[] = { - -1, 3, 4, 5, 33, 34, 108, 35, 36, 37, - 38, 74, 109, 110, 157, 186, 39, 40, 124, 41, - 76, 120, 77, 42, 128, 43, 78, 6, 44, 45, - 137, 46, 80, 47, 48, 49, 111, 112, 81, 113, - 79, 134, 152, 153, 50, 7, 165, 69, 70, 60 + -1, 3, 4, 5, 33, 34, 112, 35, 36, 37, + 38, 74, 113, 114, 165, 194, 39, 40, 128, 41, + 76, 124, 77, 42, 132, 43, 78, 6, 44, 45, + 141, 46, 80, 47, 48, 49, 115, 116, 81, 117, + 79, 138, 160, 161, 50, 7, 173, 69, 70, 60 }; /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing STATE-NUM. */ -#define YYPACT_NINF -90 +#define YYPACT_NINF -91 static const yytype_int16 yypact[] = { - 4, 42, -90, 96, -90, 111, -90, 15, -90, -90, - 75, -90, 82, 42, 104, 42, 110, 107, 42, 115, - 125, -4, 121, -90, -90, -90, -90, -90, -90, -90, - -90, 162, -90, 163, -90, -90, -90, -90, -90, -90, - -90, -90, -90, -90, -90, -90, -90, -90, -90, -90, - -90, 139, -90, -90, 138, -90, 142, -90, 143, -90, - 152, -90, 164, 167, 168, -90, -90, -4, -4, 77, - -18, -90, 177, 185, 33, 71, 195, 247, 236, -2, - 236, 171, -90, -90, -90, -90, -90, -90, 41, -90, - -4, -4, 138, 97, 97, -90, -90, 186, 187, 194, - 42, 42, -4, 196, 97, -90, 219, -90, -90, -90, - -90, 210, -90, -90, 204, 42, 42, 199, -90, -90, - -90, -90, -90, -90, -90, -90, -90, -90, -90, -90, - -90, 222, -90, 223, -90, -90, -90, -90, -90, -90, - -90, -90, -90, -90, 215, -90, -90, -90, -90, -90, - -4, 222, 228, 222, -5, 222, 97, 35, 229, -90, - -90, 222, 232, 222, -4, -90, 135, 233, -90, -90, - 234, 235, 222, 240, -90, -90, 237, -90, 239, -13, - -90, -90, -90, -90, 244, 42, -90, -90, -90, -90, - -90 + 19, 37, -91, 13, -91, 79, -91, 20, -91, -91, + -16, -91, 21, 37, 25, 37, 41, 36, 37, 78, + 83, 31, 56, -91, -91, -91, -91, -91, -91, -91, + -91, 116, -91, 127, -91, -91, -91, -91, -91, -91, + -91, -91, -91, -91, -91, -91, -91, -91, -91, -91, + -91, 147, -91, -91, 105, -91, 109, -91, 111, -91, + 114, -91, 136, 137, 142, -91, -91, 31, 31, 76, + 254, -91, 143, 146, 27, 115, 207, 258, 243, -14, + 243, 179, -91, -91, -91, -91, -91, -91, -7, -91, + 31, 31, 105, 51, 51, 51, 51, 51, 51, -91, + -91, 156, 168, 181, 37, 37, 31, 178, 51, -91, + 206, -91, -91, -91, -91, 196, -91, -91, 175, 37, + 37, 185, -91, -91, -91, -91, -91, -91, -91, -91, + -91, -91, -91, -91, -91, 214, -91, 230, -91, -91, + -91, -91, -91, -91, -91, -91, -91, -91, 183, -91, + -91, -91, -91, -91, -91, -91, -91, -91, 31, 214, + 194, 214, 45, 214, 51, 26, 195, -91, -91, 214, + 197, 214, 31, -91, 139, 208, -91, -91, 220, 224, + 214, 222, -91, -91, 226, -91, 227, 123, -91, -91, + -91, -91, 235, 37, -91, -91, -91, -91, -91 }; /* YYPGOTO[NTERM-NUM]. */ static const yytype_int16 yypgoto[] = { - -90, -90, 269, 271, -90, 23, -70, -90, -90, -90, - -90, 243, -90, -90, -90, -90, -90, -90, -90, -48, - -90, -90, -90, -90, -90, -90, -90, -90, -90, -90, - -90, -20, -90, -90, -90, -90, -90, 206, 205, -68, - -90, -90, 169, -1, 27, -7, 118, -66, -89, -90 + -91, -91, 264, 268, -91, 30, -65, -91, -91, -91, + -91, 238, -91, -91, -91, -91, -91, -91, -91, -12, + -91, -91, -91, -91, -91, -91, -91, -91, -91, -91, + -91, -5, -91, -91, -91, -91, -91, 200, 209, -61, + -91, -91, 170, -1, 65, 0, 118, -66, -90, -91 }; /* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If @@ -698,102 +717,102 @@ static const yytype_int16 yypgoto[] = #define YYTABLE_NINF -86 static const yytype_int16 yytable[] = { - 10, 88, 89, 54, 146, 147, 119, 1, 122, 164, - 93, 141, 56, 142, 58, 156, 94, 62, 1, 90, - 91, 131, 65, 66, 144, 145, 67, 90, 91, 132, - 127, 68, 136, -31, 97, 2, 154, -31, -31, -31, - -31, -31, -31, -31, -31, 98, 52, -31, -31, 99, - -31, 100, 101, 102, 103, 104, -31, 105, 129, 106, - 138, 173, 92, 141, 107, 142, 174, 172, 8, 9, - 143, -33, 97, 90, 91, -33, -33, -33, -33, -33, - -33, -33, -33, 98, 166, -33, -33, 99, -33, 100, - 101, 102, 103, 104, -33, 105, 11, 106, 179, 151, - 123, 126, 107, 135, 125, 130, 2, 139, 2, 90, - 91, -5, 12, 55, 161, 13, 14, 15, 16, 17, - 18, 19, 20, 65, 66, 21, 22, 23, 24, 25, - 26, 27, 28, 29, 30, 57, 59, 31, 61, -4, - 12, 63, 32, 13, 14, 15, 16, 17, 18, 19, - 20, 64, 71, 21, 22, 23, 24, 25, 26, 27, - 28, 29, 30, 72, 73, 31, 180, 90, 91, 52, - 32, -85, 97, 82, 83, -85, -85, -85, -85, -85, - -85, -85, -85, 84, 190, -85, -85, 99, -85, -85, - -85, -85, -85, -85, -85, 85, 97, 106, 86, 87, - -52, -52, 140, -52, -52, -52, -52, 98, 95, -52, - -52, 99, 114, 115, 116, 117, 96, 148, 149, 150, - 158, 106, 155, 159, 97, 163, 118, -76, -76, -76, - -76, -76, -76, -76, -76, 160, 164, -76, -76, 99, - 13, 14, 15, 16, 17, 18, 19, 20, 91, 106, - 21, 22, 14, 15, 140, 17, 18, 19, 20, 168, - 175, 21, 22, 177, 181, 182, 183, 32, 187, 167, - 188, 169, 170, 171, 185, 189, 53, 51, 32, 176, - 75, 178, 121, 0, 133, 162, 0, 0, 0, 0, - 184 + 10, 88, 89, 150, 151, 152, 153, 154, 155, 135, + 54, 123, 56, 11, 58, 126, 145, 62, 164, 2, + 146, 136, 1, 1, 148, 149, 147, -31, 101, 90, + 91, -31, -31, -31, -31, -31, -31, -31, -31, 102, + 162, -31, -31, 103, -31, 104, 105, 106, 107, 108, + -31, 109, 181, 110, 2, 52, 55, 65, 66, 172, + 57, 182, 111, 8, 9, 67, 131, 59, 140, 92, + 68, 61, 145, 133, 180, 142, 146, 65, 66, -5, + 12, 90, 91, 13, 14, 15, 16, 17, 18, 19, + 20, 71, 174, 21, 22, 23, 24, 25, 26, 27, + 28, 29, 30, 159, 63, 31, 187, 127, 130, 64, + 139, 2, 90, 91, 32, -33, 101, 72, 169, -33, + -33, -33, -33, -33, -33, -33, -33, 102, 73, -33, + -33, 103, -33, 104, 105, 106, 107, 108, -33, 109, + 52, 110, 129, 134, 82, 143, 83, -4, 12, 84, + 111, 13, 14, 15, 16, 17, 18, 19, 20, 90, + 91, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 85, 86, 31, 188, 90, 91, 87, 99, -85, + 101, 100, 32, -85, -85, -85, -85, -85, -85, -85, + -85, 156, 198, -85, -85, 103, -85, -85, -85, -85, + -85, -85, -85, 157, 163, 110, 158, 166, 101, 167, + 168, 171, -52, -52, 144, -52, -52, -52, -52, 102, + 91, -52, -52, 103, 118, 119, 120, 121, 172, 176, + 183, 101, 185, 110, -76, -76, -76, -76, -76, -76, + -76, -76, 122, 189, -76, -76, 103, 13, 14, 15, + 16, 17, 18, 19, 20, 190, 110, 21, 22, 191, + 193, 195, 196, 14, 15, 144, 17, 18, 19, 20, + 197, 53, 21, 22, 51, 75, 125, 175, 32, 177, + 178, 179, 93, 94, 95, 96, 97, 184, 137, 186, + 170, 0, 98, 32, 0, 0, 0, 0, 192 }; #define yypact_value_is_default(yystate) \ - ((yystate) == (-90)) + ((yystate) == (-91)) #define yytable_value_is_error(yytable_value) \ YYID (0) static const yytype_int16 yycheck[] = { - 1, 67, 68, 10, 93, 94, 76, 3, 76, 14, - 28, 81, 13, 81, 15, 104, 34, 18, 3, 32, - 33, 23, 26, 27, 90, 91, 30, 32, 33, 31, - 78, 35, 80, 0, 1, 31, 102, 4, 5, 6, - 7, 8, 9, 10, 11, 12, 31, 14, 15, 16, - 17, 18, 19, 20, 21, 22, 23, 24, 78, 26, - 80, 26, 69, 133, 31, 133, 31, 156, 26, 27, - 29, 0, 1, 32, 33, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 150, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 0, 26, 164, 100, - 77, 78, 31, 80, 77, 78, 31, 80, 31, 32, - 33, 0, 1, 31, 115, 4, 5, 6, 7, 8, - 9, 10, 11, 26, 27, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 31, 26, 26, 31, 0, - 1, 26, 31, 4, 5, 6, 7, 8, 9, 10, - 11, 26, 31, 14, 15, 16, 17, 18, 19, 20, - 21, 22, 23, 1, 1, 26, 31, 32, 33, 31, - 31, 0, 1, 31, 31, 4, 5, 6, 7, 8, - 9, 10, 11, 31, 185, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 31, 1, 26, 31, 31, - 5, 6, 31, 8, 9, 10, 11, 12, 31, 14, - 15, 16, 17, 18, 19, 20, 31, 31, 31, 25, - 1, 26, 26, 13, 1, 26, 31, 4, 5, 6, - 7, 8, 9, 10, 11, 31, 14, 14, 15, 16, - 4, 5, 6, 7, 8, 9, 10, 11, 33, 26, - 14, 15, 5, 6, 31, 8, 9, 10, 11, 31, - 31, 14, 15, 31, 31, 31, 31, 31, 31, 151, - 31, 153, 154, 155, 34, 31, 7, 6, 31, 161, - 37, 163, 76, -1, 79, 116, -1, -1, -1, -1, - 172 + 1, 67, 68, 93, 94, 95, 96, 97, 98, 23, + 10, 76, 13, 0, 15, 76, 81, 18, 108, 35, + 81, 35, 3, 3, 90, 91, 33, 0, 1, 36, + 37, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 106, 14, 15, 16, 17, 18, 19, 20, 21, 22, + 23, 24, 26, 26, 35, 35, 35, 26, 27, 14, + 35, 35, 35, 26, 27, 34, 78, 26, 80, 69, + 39, 35, 137, 78, 164, 80, 137, 26, 27, 0, + 1, 36, 37, 4, 5, 6, 7, 8, 9, 10, + 11, 35, 158, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 104, 26, 26, 172, 77, 78, 26, + 80, 35, 36, 37, 35, 0, 1, 1, 119, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 1, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 35, 26, 77, 78, 35, 80, 35, 0, 1, 35, + 35, 4, 5, 6, 7, 8, 9, 10, 11, 36, + 37, 14, 15, 16, 17, 18, 19, 20, 21, 22, + 23, 35, 35, 26, 35, 36, 37, 35, 35, 0, + 1, 35, 35, 4, 5, 6, 7, 8, 9, 10, + 11, 35, 193, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 35, 26, 26, 25, 1, 1, 13, + 35, 26, 5, 6, 35, 8, 9, 10, 11, 12, + 37, 14, 15, 16, 17, 18, 19, 20, 14, 35, + 35, 1, 35, 26, 4, 5, 6, 7, 8, 9, + 10, 11, 35, 35, 14, 15, 16, 4, 5, 6, + 7, 8, 9, 10, 11, 35, 26, 14, 15, 35, + 38, 35, 35, 5, 6, 35, 8, 9, 10, 11, + 35, 7, 14, 15, 6, 37, 76, 159, 35, 161, + 162, 163, 28, 29, 30, 31, 32, 169, 79, 171, + 120, -1, 38, 35, -1, -1, -1, -1, 180 }; /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing symbol of state STATE-NUM. */ static const yytype_uint8 yystos[] = { - 0, 3, 31, 37, 38, 39, 63, 81, 26, 27, - 79, 0, 1, 4, 5, 6, 7, 8, 9, 10, + 0, 3, 35, 41, 42, 43, 67, 85, 26, 27, + 83, 0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, - 23, 26, 31, 40, 41, 43, 44, 45, 46, 52, - 53, 55, 59, 61, 64, 65, 67, 69, 70, 71, - 80, 39, 31, 38, 81, 31, 79, 31, 79, 26, - 85, 31, 79, 26, 26, 26, 27, 30, 35, 83, - 84, 31, 1, 1, 47, 47, 56, 58, 62, 76, - 68, 74, 31, 31, 31, 31, 31, 31, 83, 83, - 32, 33, 81, 28, 34, 31, 31, 1, 12, 16, - 18, 19, 20, 21, 22, 24, 26, 31, 42, 48, - 49, 72, 73, 75, 17, 18, 19, 20, 31, 42, - 57, 73, 75, 41, 54, 80, 41, 55, 60, 67, - 80, 23, 31, 74, 77, 41, 55, 66, 67, 80, - 31, 42, 75, 29, 83, 83, 84, 84, 31, 31, - 25, 79, 78, 79, 83, 26, 84, 50, 1, 13, - 31, 79, 78, 26, 14, 82, 83, 82, 31, 82, - 82, 82, 84, 26, 31, 31, 82, 31, 82, 83, - 31, 31, 31, 31, 82, 34, 51, 31, 31, 31, - 79 + 23, 26, 35, 44, 45, 47, 48, 49, 50, 56, + 57, 59, 63, 65, 68, 69, 71, 73, 74, 75, + 84, 43, 35, 42, 85, 35, 83, 35, 83, 26, + 89, 35, 83, 26, 26, 26, 27, 34, 39, 87, + 88, 35, 1, 1, 51, 51, 60, 62, 66, 80, + 72, 78, 35, 35, 35, 35, 35, 35, 87, 87, + 36, 37, 85, 28, 29, 30, 31, 32, 38, 35, + 35, 1, 12, 16, 18, 19, 20, 21, 22, 24, + 26, 35, 46, 52, 53, 76, 77, 79, 17, 18, + 19, 20, 35, 46, 61, 77, 79, 45, 58, 84, + 45, 59, 64, 71, 84, 23, 35, 78, 81, 45, + 59, 70, 71, 84, 35, 46, 79, 33, 87, 87, + 88, 88, 88, 88, 88, 88, 35, 35, 25, 83, + 82, 83, 87, 26, 88, 54, 1, 13, 35, 83, + 82, 26, 14, 86, 87, 86, 35, 86, 86, 86, + 88, 26, 35, 35, 86, 35, 86, 87, 35, 35, + 35, 35, 86, 38, 55, 35, 35, 35, 83 }; #define yyerrok (yyerrstatus = 0) @@ -823,17 +842,18 @@ static const yytype_uint8 yystos[] = #define YYRECOVERING() (!!yyerrstatus) -#define YYBACKUP(Token, Value) \ -do \ - if (yychar == YYEMPTY && yylen == 1) \ - { \ - yychar = (Token); \ - yylval = (Value); \ - YYPOPSTACK (1); \ - goto yybackup; \ - } \ - else \ - { \ +#define YYBACKUP(Token, Value) \ +do \ + if (yychar == YYEMPTY) \ + { \ + yychar = (Token); \ + yylval = (Value); \ + YYPOPSTACK (yylen); \ + yystate = *yyssp; \ + goto yybackup; \ + } \ + else \ + { \ yyerror (YY_("syntax error: cannot back up")); \ YYERROR; \ } \ @@ -928,6 +948,8 @@ yy_symbol_value_print (yyoutput, yytype, yyvaluep) YYSTYPE const * const yyvaluep; #endif { + FILE *yyo = yyoutput; + YYUSE (yyo); if (!yyvaluep) return; # ifdef YYPRINT @@ -1179,12 +1201,12 @@ static int yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg, yytype_int16 *yyssp, int yytoken) { - YYSIZE_T yysize0 = yytnamerr (0, yytname[yytoken]); + YYSIZE_T yysize0 = yytnamerr (YY_NULL, yytname[yytoken]); YYSIZE_T yysize = yysize0; YYSIZE_T yysize1; enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 }; /* Internationalized format string. */ - const char *yyformat = 0; + const char *yyformat = YY_NULL; /* Arguments of yyformat. */ char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM]; /* Number of reported tokens (one for the "unexpected", one per @@ -1244,7 +1266,7 @@ yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg, break; } yyarg[yycount++] = yytname[yyx]; - yysize1 = yysize + yytnamerr (0, yytname[yyx]); + yysize1 = yysize + yytnamerr (YY_NULL, yytname[yyx]); if (! (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM)) return 2; @@ -1329,7 +1351,7 @@ yydestruct (yymsg, yytype, yyvaluep) switch (yytype) { - case 53: /* "choice_entry" */ + case 57: /* "choice_entry" */ { fprintf(stderr, "%s:%d: missing end statement for this entry\n", @@ -1339,7 +1361,7 @@ yydestruct (yymsg, yytype, yyvaluep) }; break; - case 59: /* "if_entry" */ + case 63: /* "if_entry" */ { fprintf(stderr, "%s:%d: missing end statement for this entry\n", @@ -1349,7 +1371,7 @@ yydestruct (yymsg, yytype, yyvaluep) }; break; - case 65: /* "menu_entry" */ + case 69: /* "menu_entry" */ { fprintf(stderr, "%s:%d: missing end statement for this entry\n", @@ -1426,7 +1448,7 @@ yyparse () `yyss': related to states. `yyvs': related to semantic values. - Refer to the stacks thru separate pointers, to allow yyoverflow + Refer to the stacks through separate pointers, to allow yyoverflow to reallocate them elsewhere. */ /* The state stack. */ @@ -2012,46 +2034,66 @@ yyreduce: case 109: - { (yyval.expr) = expr_alloc_comp(E_EQUAL, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); } + { (yyval.expr) = expr_alloc_comp(E_LTH, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); } break; case 110: - { (yyval.expr) = expr_alloc_comp(E_UNEQUAL, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); } + { (yyval.expr) = expr_alloc_comp(E_LEQ, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); } break; case 111: - { (yyval.expr) = (yyvsp[(2) - (3)].expr); } + { (yyval.expr) = expr_alloc_comp(E_GTH, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); } break; case 112: - { (yyval.expr) = expr_alloc_one(E_NOT, (yyvsp[(2) - (2)].expr)); } + { (yyval.expr) = expr_alloc_comp(E_GEQ, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); } break; case 113: - { (yyval.expr) = expr_alloc_two(E_OR, (yyvsp[(1) - (3)].expr), (yyvsp[(3) - (3)].expr)); } + { (yyval.expr) = expr_alloc_comp(E_EQUAL, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); } break; case 114: - { (yyval.expr) = expr_alloc_two(E_AND, (yyvsp[(1) - (3)].expr), (yyvsp[(3) - (3)].expr)); } + { (yyval.expr) = expr_alloc_comp(E_UNEQUAL, (yyvsp[(1) - (3)].symbol), (yyvsp[(3) - (3)].symbol)); } break; case 115: - { (yyval.symbol) = sym_lookup((yyvsp[(1) - (1)].string), 0); free((yyvsp[(1) - (1)].string)); } + { (yyval.expr) = (yyvsp[(2) - (3)].expr); } break; case 116: - { (yyval.symbol) = sym_lookup((yyvsp[(1) - (1)].string), SYMBOL_CONST); free((yyvsp[(1) - (1)].string)); } + { (yyval.expr) = expr_alloc_one(E_NOT, (yyvsp[(2) - (2)].expr)); } break; case 117: + { (yyval.expr) = expr_alloc_two(E_OR, (yyvsp[(1) - (3)].expr), (yyvsp[(3) - (3)].expr)); } + break; + + case 118: + + { (yyval.expr) = expr_alloc_two(E_AND, (yyvsp[(1) - (3)].expr), (yyvsp[(3) - (3)].expr)); } + break; + + case 119: + + { (yyval.symbol) = sym_lookup((yyvsp[(1) - (1)].string), 0); free((yyvsp[(1) - (1)].string)); } + break; + + case 120: + + { (yyval.symbol) = sym_lookup((yyvsp[(1) - (1)].string), SYMBOL_CONST); free((yyvsp[(1) - (1)].string)); } + break; + + case 121: + { (yyval.string) = NULL; } break; @@ -2243,7 +2285,7 @@ yyabortlab: yyresult = 1; goto yyreturn; -#if !defined(yyoverflow) || YYERROR_VERBOSE +#if !defined yyoverflow || YYERROR_VERBOSE /*-------------------------------------------------. | yyexhaustedlab -- memory exhaustion comes here. | `-------------------------------------------------*/ diff --git a/scripts/kconfig/zconf.y b/scripts/kconfig/zconf.y index 0f683cfa53e9..71bf8bff696a 100644 --- a/scripts/kconfig/zconf.y +++ b/scripts/kconfig/zconf.y @@ -69,6 +69,10 @@ static struct menu *current_menu, *current_entry; %token <string> T_WORD %token <string> T_WORD_QUOTE %token T_UNEQUAL +%token T_LESS +%token T_LESS_EQUAL +%token T_GREATER +%token T_GREATER_EQUAL %token T_CLOSE_PAREN %token T_OPEN_PAREN %token T_EOL @@ -76,6 +80,7 @@ static struct menu *current_menu, *current_entry; %left T_OR %left T_AND %left T_EQUAL T_UNEQUAL +%left T_LESS T_LESS_EQUAL T_GREATER T_GREATER_EQUAL %nonassoc T_NOT %type <string> prompt @@ -467,6 +472,10 @@ if_expr: /* empty */ { $$ = NULL; } ; expr: symbol { $$ = expr_alloc_symbol($1); } + | symbol T_LESS symbol { $$ = expr_alloc_comp(E_LTH, $1, $3); } + | symbol T_LESS_EQUAL symbol { $$ = expr_alloc_comp(E_LEQ, $1, $3); } + | symbol T_GREATER symbol { $$ = expr_alloc_comp(E_GTH, $1, $3); } + | symbol T_GREATER_EQUAL symbol { $$ = expr_alloc_comp(E_GEQ, $1, $3); } | symbol T_EQUAL symbol { $$ = expr_alloc_comp(E_EQUAL, $1, $3); } | symbol T_UNEQUAL symbol { $$ = expr_alloc_comp(E_UNEQUAL, $1, $3); } | T_OPEN_PAREN expr T_CLOSE_PAREN { $$ = $2; } diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 86a4fe75f453..1a10d8ac8162 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -82,7 +82,7 @@ kallsyms() kallsymopt="${kallsymopt} --all-symbols" fi - if [ -n "${CONFIG_ARM}" ] && [ -n "${CONFIG_PAGE_OFFSET}" ]; then + if [ -n "${CONFIG_ARM}" ] && [ -z "${CONFIG_XIP_KERNEL}" ] && [ -n "${CONFIG_PAGE_OFFSET}" ]; then kallsymopt="${kallsymopt} --page-offset=$CONFIG_PAGE_OFFSET" fi @@ -111,7 +111,6 @@ sortextable() } # Delete output files in case of error -trap cleanup SIGHUP SIGINT SIGQUIT SIGTERM ERR cleanup() { rm -f .old_version @@ -124,6 +123,20 @@ cleanup() rm -f vmlinux.o } +on_exit() +{ + if [ $? -ne 0 ]; then + cleanup + fi +} +trap on_exit EXIT + +on_signals() +{ + exit 1 +} +trap on_signals HUP INT QUIT TERM + # # # Use "make V=1" to debug this script @@ -231,7 +244,6 @@ if [ -n "${CONFIG_KALLSYMS}" ]; then if ! cmp -s System.map .tmp_System.map; then echo >&2 Inconsistent kallsyms data echo >&2 Try "make KALLSYMS_EXTRA_PASS=1" as a workaround - cleanup exit 1 fi fi diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c index eff7de1fc82e..e70fcd12eeeb 100644 --- a/scripts/mod/devicetable-offsets.c +++ b/scripts/mod/devicetable-offsets.c @@ -63,6 +63,8 @@ int main(void) DEVID(acpi_device_id); DEVID_FIELD(acpi_device_id, id); + DEVID_FIELD(acpi_device_id, cls); + DEVID_FIELD(acpi_device_id, cls_msk); DEVID(pnp_device_id); DEVID_FIELD(pnp_device_id, id); diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 84c86f3cd6cd..5f2088209132 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -523,12 +523,40 @@ static int do_serio_entry(const char *filename, } ADD_TO_DEVTABLE("serio", serio_device_id, do_serio_entry); -/* looks like: "acpi:ACPI0003 or acpi:PNP0C0B" or "acpi:LNXVIDEO" */ +/* looks like: "acpi:ACPI0003" or "acpi:PNP0C0B" or "acpi:LNXVIDEO" or + * "acpi:bbsspp" (bb=base-class, ss=sub-class, pp=prog-if) + * + * NOTE: Each driver should use one of the following : _HID, _CIDs + * or _CLS. Also, bb, ss, and pp can be substituted with ?? + * as don't care byte. + */ static int do_acpi_entry(const char *filename, void *symval, char *alias) { DEF_FIELD_ADDR(symval, acpi_device_id, id); - sprintf(alias, "acpi*:%s:*", *id); + DEF_FIELD_ADDR(symval, acpi_device_id, cls); + DEF_FIELD_ADDR(symval, acpi_device_id, cls_msk); + + if (id && strlen((const char *)*id)) + sprintf(alias, "acpi*:%s:*", *id); + else if (cls) { + int i, byte_shift, cnt = 0; + unsigned int msk; + + sprintf(&alias[cnt], "acpi*:"); + cnt = 6; + for (i = 1; i <= 3; i++) { + byte_shift = 8 * (3-i); + msk = (*cls_msk >> byte_shift) & 0xFF; + if (msk) + sprintf(&alias[cnt], "%02x", + (*cls >> byte_shift) & 0xFF); + else + sprintf(&alias[cnt], "??"); + cnt += 2; + } + sprintf(&alias[cnt], ":*"); + } return 1; } ADD_TO_DEVTABLE("acpi", acpi_device_id, do_acpi_entry); diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 91ee1b2e0f9a..12d3db3bd46b 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -886,7 +886,8 @@ static void check_section(const char *modname, struct elf_info *elf, #define TEXT_SECTIONS ".text", ".text.unlikely", ".sched.text", \ ".kprobes.text" #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \ - ".fixup", ".entry.text", ".exception.text", ".text.*" + ".fixup", ".entry.text", ".exception.text", ".text.*", \ + ".coldtext" #define INIT_SECTIONS ".init.*" #define MEM_INIT_SECTIONS ".meminit.*" diff --git a/security/inode.c b/security/inode.c index 91503b79c5f8..16622aef9bde 100644 --- a/security/inode.c +++ b/security/inode.c @@ -25,11 +25,6 @@ static struct vfsmount *mount; static int mount_count; -static inline int positive(struct dentry *dentry) -{ - return d_really_is_positive(dentry) && !d_unhashed(dentry); -} - static int fill_super(struct super_block *sb, void *data, int silent) { static struct tree_descr files[] = {{""}}; @@ -201,33 +196,29 @@ void securityfs_remove(struct dentry *dentry) return; mutex_lock(&d_inode(parent)->i_mutex); - if (positive(dentry)) { - if (d_really_is_positive(dentry)) { - if (d_is_dir(dentry)) - simple_rmdir(d_inode(parent), dentry); - else - simple_unlink(d_inode(parent), dentry); - dput(dentry); - } + if (simple_positive(dentry)) { + if (d_is_dir(dentry)) + simple_rmdir(d_inode(parent), dentry); + else + simple_unlink(d_inode(parent), dentry); + dput(dentry); } mutex_unlock(&d_inode(parent)->i_mutex); simple_release_fs(&mount, &mount_count); } EXPORT_SYMBOL_GPL(securityfs_remove); -static struct kobject *security_kobj; - static int __init securityfs_init(void) { int retval; - security_kobj = kobject_create_and_add("security", kernel_kobj); - if (!security_kobj) - return -EINVAL; + retval = sysfs_create_mount_point(kernel_kobj, "security"); + if (retval) + return retval; retval = register_filesystem(&fs_type); if (retval) - kobject_put(security_kobj); + sysfs_remove_mount_point(kernel_kobj, "security"); return retval; } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 623108199641..564079c5c49d 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3283,7 +3283,8 @@ static int file_map_prot_check(struct file *file, unsigned long prot, int shared int rc = 0; if (default_noexec && - (prot & PROT_EXEC) && (!file || (!shared && (prot & PROT_WRITE)))) { + (prot & PROT_EXEC) && (!file || IS_PRIVATE(file_inode(file)) || + (!shared && (prot & PROT_WRITE)))) { /* * We are making executable an anonymous mapping or a * private file mapping that will also be writable. diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index d2787cca1fcb..3d2201413028 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1853,7 +1853,6 @@ static struct file_system_type sel_fs_type = { }; struct vfsmount *selinuxfs_mount; -static struct kobject *selinuxfs_kobj; static int __init init_sel_fs(void) { @@ -1862,13 +1861,13 @@ static int __init init_sel_fs(void) if (!selinux_enabled) return 0; - selinuxfs_kobj = kobject_create_and_add("selinux", fs_kobj); - if (!selinuxfs_kobj) - return -ENOMEM; + err = sysfs_create_mount_point(fs_kobj, "selinux"); + if (err) + return err; err = register_filesystem(&sel_fs_type); if (err) { - kobject_put(selinuxfs_kobj); + sysfs_remove_mount_point(fs_kobj, "selinux"); return err; } @@ -1887,7 +1886,7 @@ __initcall(init_sel_fs); #ifdef CONFIG_SECURITY_SELINUX_DISABLE void exit_sel_fs(void) { - kobject_put(selinuxfs_kobj); + sysfs_remove_mount_point(fs_kobj, "selinux"); kern_unmount(selinuxfs_mount); unregister_filesystem(&sel_fs_type); } diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c index afe6a269ec17..57644b1dc42e 100644 --- a/security/selinux/ss/ebitmap.c +++ b/security/selinux/ss/ebitmap.c @@ -153,6 +153,12 @@ int ebitmap_netlbl_import(struct ebitmap *ebmap, if (offset == (u32)-1) return 0; + /* don't waste ebitmap space if the netlabel bitmap is empty */ + if (bitmap == 0) { + offset += EBITMAP_UNIT_SIZE; + continue; + } + if (e_iter == NULL || offset >= e_iter->startbit + EBITMAP_SIZE) { e_prev = e_iter; diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index 5e0a64ebdf23..2716d02119f3 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -2314,16 +2314,16 @@ static const struct file_operations smk_revoke_subj_ops = { .llseek = generic_file_llseek, }; -static struct kset *smackfs_kset; /** * smk_init_sysfs - initialize /sys/fs/smackfs * */ static int smk_init_sysfs(void) { - smackfs_kset = kset_create_and_add("smackfs", NULL, fs_kobj); - if (!smackfs_kset) - return -ENOMEM; + int err; + err = sysfs_create_mount_point(fs_kobj, "smackfs"); + if (err) + return err; return 0; } diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index a51244a8022f..faca2bf6a430 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -25,7 +25,7 @@ build-dir := $(srctree)/tools/build include $(build-dir)/Build.include # do not force detected configuration --include .config-detected +-include $(OUTPUT).config-detected # Init all relevant variables used in build files so # 1) they have correct type diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index f0e72674c52d..9098083869c8 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -41,4 +41,62 @@ #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) +#include <linux/types.h> + +static __always_inline void __read_once_size(const volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(__u8 *)res = *(volatile __u8 *)p; break; + case 2: *(__u16 *)res = *(volatile __u16 *)p; break; + case 4: *(__u32 *)res = *(volatile __u32 *)p; break; + case 8: *(__u64 *)res = *(volatile __u64 *)p; break; + default: + barrier(); + __builtin_memcpy((void *)res, (const void *)p, size); + barrier(); + } +} + +static __always_inline void __write_once_size(volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(volatile __u8 *)p = *(__u8 *)res; break; + case 2: *(volatile __u16 *)p = *(__u16 *)res; break; + case 4: *(volatile __u32 *)p = *(__u32 *)res; break; + case 8: *(volatile __u64 *)p = *(__u64 *)res; break; + default: + barrier(); + __builtin_memcpy((void *)p, (const void *)res, size); + barrier(); + } +} + +/* + * Prevent the compiler from merging or refetching reads or writes. The + * compiler is also forbidden from reordering successive instances of + * READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the + * compiler is aware of some particular ordering. One way to make the + * compiler aware of ordering is to put the two invocations of READ_ONCE, + * WRITE_ONCE or ACCESS_ONCE() in different C statements. + * + * In contrast to ACCESS_ONCE these two macros will also work on aggregate + * data types like structs or unions. If the size of the accessed data + * type exceeds the word size of the machine (e.g., 32 bits or 64 bits) + * READ_ONCE() and WRITE_ONCE() will fall back to memcpy and print a + * compile-time warning. + * + * Their two major use cases are: (1) Mediating communication between + * process-level code and irq/NMI handlers, all running on the same CPU, + * and (2) Ensuring that the compiler does not fold, spindle, or otherwise + * mutilate accesses that either do not require ordering or that interact + * with an explicit memory barrier or atomic instruction that provides the + * required ordering. + */ + +#define READ_ONCE(x) \ + ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) + +#define WRITE_ONCE(x, val) \ + ({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) + #endif /* _TOOLS_LINUX_COMPILER_H */ diff --git a/tools/include/linux/export.h b/tools/include/linux/export.h deleted file mode 100644 index d07e586b9ba0..000000000000 --- a/tools/include/linux/export.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _TOOLS_LINUX_EXPORT_H_ -#define _TOOLS_LINUX_EXPORT_H_ - -#define EXPORT_SYMBOL(sym) -#define EXPORT_SYMBOL_GPL(sym) -#define EXPORT_SYMBOL_GPL_FUTURE(sym) -#define EXPORT_UNUSED_SYMBOL(sym) -#define EXPORT_UNUSED_SYMBOL_GPL(sym) - -#endif diff --git a/tools/include/linux/rbtree.h b/tools/include/linux/rbtree.h new file mode 100644 index 000000000000..112582253dd0 --- /dev/null +++ b/tools/include/linux/rbtree.h @@ -0,0 +1,104 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli <andrea@suse.de> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/include/linux/rbtree.h + + To use rbtrees you'll have to implement your own insert and search cores. + This will avoid us to use callbacks and to drop drammatically performances. + I know it's not the cleaner way, but in C (not in C++) to get + performances and genericity... + + See Documentation/rbtree.txt for documentation and samples. +*/ + +#ifndef __TOOLS_LINUX_PERF_RBTREE_H +#define __TOOLS_LINUX_PERF_RBTREE_H + +#include <linux/kernel.h> +#include <linux/stddef.h> + +struct rb_node { + unsigned long __rb_parent_color; + struct rb_node *rb_right; + struct rb_node *rb_left; +} __attribute__((aligned(sizeof(long)))); + /* The alignment might seem pointless, but allegedly CRIS needs it */ + +struct rb_root { + struct rb_node *rb_node; +}; + + +#define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3)) + +#define RB_ROOT (struct rb_root) { NULL, } +#define rb_entry(ptr, type, member) container_of(ptr, type, member) + +#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) + +/* 'empty' nodes are nodes that are known not to be inserted in an rbtree */ +#define RB_EMPTY_NODE(node) \ + ((node)->__rb_parent_color == (unsigned long)(node)) +#define RB_CLEAR_NODE(node) \ + ((node)->__rb_parent_color = (unsigned long)(node)) + + +extern void rb_insert_color(struct rb_node *, struct rb_root *); +extern void rb_erase(struct rb_node *, struct rb_root *); + + +/* Find logical next and previous nodes in a tree */ +extern struct rb_node *rb_next(const struct rb_node *); +extern struct rb_node *rb_prev(const struct rb_node *); +extern struct rb_node *rb_first(const struct rb_root *); +extern struct rb_node *rb_last(const struct rb_root *); + +/* Postorder iteration - always visit the parent after its children */ +extern struct rb_node *rb_first_postorder(const struct rb_root *); +extern struct rb_node *rb_next_postorder(const struct rb_node *); + +/* Fast replacement of a single node without remove/rebalance/add/rebalance */ +extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, + struct rb_root *root); + +static inline void rb_link_node(struct rb_node *node, struct rb_node *parent, + struct rb_node **rb_link) +{ + node->__rb_parent_color = (unsigned long)parent; + node->rb_left = node->rb_right = NULL; + + *rb_link = node; +} + +#define rb_entry_safe(ptr, type, member) \ + ({ typeof(ptr) ____ptr = (ptr); \ + ____ptr ? rb_entry(____ptr, type, member) : NULL; \ + }) + + +/* + * Handy for checking that we are not deleting an entry that is + * already in a list, found in block/{blk-throttle,cfq-iosched}.c, + * probably should be moved to lib/rbtree.c... + */ +static inline void rb_erase_init(struct rb_node *n, struct rb_root *root) +{ + rb_erase(n, root); + RB_CLEAR_NODE(n); +} +#endif /* __TOOLS_LINUX_PERF_RBTREE_H */ diff --git a/tools/include/linux/rbtree_augmented.h b/tools/include/linux/rbtree_augmented.h new file mode 100644 index 000000000000..43be941db695 --- /dev/null +++ b/tools/include/linux/rbtree_augmented.h @@ -0,0 +1,245 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli <andrea@suse.de> + (C) 2002 David Woodhouse <dwmw2@infradead.org> + (C) 2012 Michel Lespinasse <walken@google.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + tools/linux/include/linux/rbtree_augmented.h + + Copied from: + linux/include/linux/rbtree_augmented.h +*/ + +#ifndef _TOOLS_LINUX_RBTREE_AUGMENTED_H +#define _TOOLS_LINUX_RBTREE_AUGMENTED_H + +#include <linux/compiler.h> +#include <linux/rbtree.h> + +/* + * Please note - only struct rb_augment_callbacks and the prototypes for + * rb_insert_augmented() and rb_erase_augmented() are intended to be public. + * The rest are implementation details you are not expected to depend on. + * + * See Documentation/rbtree.txt for documentation and samples. + */ + +struct rb_augment_callbacks { + void (*propagate)(struct rb_node *node, struct rb_node *stop); + void (*copy)(struct rb_node *old, struct rb_node *new); + void (*rotate)(struct rb_node *old, struct rb_node *new); +}; + +extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); +/* + * Fixup the rbtree and update the augmented information when rebalancing. + * + * On insertion, the user must update the augmented information on the path + * leading to the inserted node, then call rb_link_node() as usual and + * rb_augment_inserted() instead of the usual rb_insert_color() call. + * If rb_augment_inserted() rebalances the rbtree, it will callback into + * a user provided function to update the augmented information on the + * affected subtrees. + */ +static inline void +rb_insert_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + __rb_insert_augmented(node, root, augment->rotate); +} + +#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield, \ + rbtype, rbaugmented, rbcompute) \ +static inline void \ +rbname ## _propagate(struct rb_node *rb, struct rb_node *stop) \ +{ \ + while (rb != stop) { \ + rbstruct *node = rb_entry(rb, rbstruct, rbfield); \ + rbtype augmented = rbcompute(node); \ + if (node->rbaugmented == augmented) \ + break; \ + node->rbaugmented = augmented; \ + rb = rb_parent(&node->rbfield); \ + } \ +} \ +static inline void \ +rbname ## _copy(struct rb_node *rb_old, struct rb_node *rb_new) \ +{ \ + rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \ + rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \ + new->rbaugmented = old->rbaugmented; \ +} \ +static void \ +rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \ +{ \ + rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \ + rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \ + new->rbaugmented = old->rbaugmented; \ + old->rbaugmented = rbcompute(old); \ +} \ +rbstatic const struct rb_augment_callbacks rbname = { \ + rbname ## _propagate, rbname ## _copy, rbname ## _rotate \ +}; + + +#define RB_RED 0 +#define RB_BLACK 1 + +#define __rb_parent(pc) ((struct rb_node *)(pc & ~3)) + +#define __rb_color(pc) ((pc) & 1) +#define __rb_is_black(pc) __rb_color(pc) +#define __rb_is_red(pc) (!__rb_color(pc)) +#define rb_color(rb) __rb_color((rb)->__rb_parent_color) +#define rb_is_red(rb) __rb_is_red((rb)->__rb_parent_color) +#define rb_is_black(rb) __rb_is_black((rb)->__rb_parent_color) + +static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p) +{ + rb->__rb_parent_color = rb_color(rb) | (unsigned long)p; +} + +static inline void rb_set_parent_color(struct rb_node *rb, + struct rb_node *p, int color) +{ + rb->__rb_parent_color = (unsigned long)p | color; +} + +static inline void +__rb_change_child(struct rb_node *old, struct rb_node *new, + struct rb_node *parent, struct rb_root *root) +{ + if (parent) { + if (parent->rb_left == old) + parent->rb_left = new; + else + parent->rb_right = new; + } else + root->rb_node = new; +} + +extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); + +static __always_inline struct rb_node * +__rb_erase_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + struct rb_node *child = node->rb_right, *tmp = node->rb_left; + struct rb_node *parent, *rebalance; + unsigned long pc; + + if (!tmp) { + /* + * Case 1: node to erase has no more than 1 child (easy!) + * + * Note that if there is one child it must be red due to 5) + * and node must be black due to 4). We adjust colors locally + * so as to bypass __rb_erase_color() later on. + */ + pc = node->__rb_parent_color; + parent = __rb_parent(pc); + __rb_change_child(node, child, parent, root); + if (child) { + child->__rb_parent_color = pc; + rebalance = NULL; + } else + rebalance = __rb_is_black(pc) ? parent : NULL; + tmp = parent; + } else if (!child) { + /* Still case 1, but this time the child is node->rb_left */ + tmp->__rb_parent_color = pc = node->__rb_parent_color; + parent = __rb_parent(pc); + __rb_change_child(node, tmp, parent, root); + rebalance = NULL; + tmp = parent; + } else { + struct rb_node *successor = child, *child2; + tmp = child->rb_left; + if (!tmp) { + /* + * Case 2: node's successor is its right child + * + * (n) (s) + * / \ / \ + * (x) (s) -> (x) (c) + * \ + * (c) + */ + parent = successor; + child2 = successor->rb_right; + augment->copy(node, successor); + } else { + /* + * Case 3: node's successor is leftmost under + * node's right child subtree + * + * (n) (s) + * / \ / \ + * (x) (y) -> (x) (y) + * / / + * (p) (p) + * / / + * (s) (c) + * \ + * (c) + */ + do { + parent = successor; + successor = tmp; + tmp = tmp->rb_left; + } while (tmp); + parent->rb_left = child2 = successor->rb_right; + successor->rb_right = child; + rb_set_parent(child, successor); + augment->copy(node, successor); + augment->propagate(parent, successor); + } + + successor->rb_left = tmp = node->rb_left; + rb_set_parent(tmp, successor); + + pc = node->__rb_parent_color; + tmp = __rb_parent(pc); + __rb_change_child(node, successor, tmp, root); + if (child2) { + successor->__rb_parent_color = pc; + rb_set_parent_color(child2, parent, RB_BLACK); + rebalance = NULL; + } else { + unsigned long pc2 = successor->__rb_parent_color; + successor->__rb_parent_color = pc; + rebalance = __rb_is_black(pc2) ? parent : NULL; + } + tmp = successor; + } + + augment->propagate(tmp, NULL); + return rebalance; +} + +static __always_inline void +rb_erase_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + struct rb_node *rebalance = __rb_erase_augmented(node, root, augment); + if (rebalance) + __rb_erase_color(rebalance, root, augment->rotate); +} + +#endif /* _TOOLS_LINUX_RBTREE_AUGMENTED_H */ diff --git a/tools/lib/rbtree.c b/tools/lib/rbtree.c new file mode 100644 index 000000000000..17c2b596f043 --- /dev/null +++ b/tools/lib/rbtree.c @@ -0,0 +1,548 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli <andrea@suse.de> + (C) 2002 David Woodhouse <dwmw2@infradead.org> + (C) 2012 Michel Lespinasse <walken@google.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/lib/rbtree.c +*/ + +#include <linux/rbtree_augmented.h> + +/* + * red-black trees properties: http://en.wikipedia.org/wiki/Rbtree + * + * 1) A node is either red or black + * 2) The root is black + * 3) All leaves (NULL) are black + * 4) Both children of every red node are black + * 5) Every simple path from root to leaves contains the same number + * of black nodes. + * + * 4 and 5 give the O(log n) guarantee, since 4 implies you cannot have two + * consecutive red nodes in a path and every red node is therefore followed by + * a black. So if B is the number of black nodes on every simple path (as per + * 5), then the longest possible path due to 4 is 2B. + * + * We shall indicate color with case, where black nodes are uppercase and red + * nodes will be lowercase. Unknown color nodes shall be drawn as red within + * parentheses and have some accompanying text comment. + */ + +static inline void rb_set_black(struct rb_node *rb) +{ + rb->__rb_parent_color |= RB_BLACK; +} + +static inline struct rb_node *rb_red_parent(struct rb_node *red) +{ + return (struct rb_node *)red->__rb_parent_color; +} + +/* + * Helper function for rotations: + * - old's parent and color get assigned to new + * - old gets assigned new as a parent and 'color' as a color. + */ +static inline void +__rb_rotate_set_parents(struct rb_node *old, struct rb_node *new, + struct rb_root *root, int color) +{ + struct rb_node *parent = rb_parent(old); + new->__rb_parent_color = old->__rb_parent_color; + rb_set_parent_color(old, new, color); + __rb_change_child(old, new, parent, root); +} + +static __always_inline void +__rb_insert(struct rb_node *node, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + struct rb_node *parent = rb_red_parent(node), *gparent, *tmp; + + while (true) { + /* + * Loop invariant: node is red + * + * If there is a black parent, we are done. + * Otherwise, take some corrective action as we don't + * want a red root or two consecutive red nodes. + */ + if (!parent) { + rb_set_parent_color(node, NULL, RB_BLACK); + break; + } else if (rb_is_black(parent)) + break; + + gparent = rb_red_parent(parent); + + tmp = gparent->rb_right; + if (parent != tmp) { /* parent == gparent->rb_left */ + if (tmp && rb_is_red(tmp)) { + /* + * Case 1 - color flips + * + * G g + * / \ / \ + * p u --> P U + * / / + * n n + * + * However, since g's parent might be red, and + * 4) does not allow this, we need to recurse + * at g. + */ + rb_set_parent_color(tmp, gparent, RB_BLACK); + rb_set_parent_color(parent, gparent, RB_BLACK); + node = gparent; + parent = rb_parent(node); + rb_set_parent_color(node, parent, RB_RED); + continue; + } + + tmp = parent->rb_right; + if (node == tmp) { + /* + * Case 2 - left rotate at parent + * + * G G + * / \ / \ + * p U --> n U + * \ / + * n p + * + * This still leaves us in violation of 4), the + * continuation into Case 3 will fix that. + */ + parent->rb_right = tmp = node->rb_left; + node->rb_left = parent; + if (tmp) + rb_set_parent_color(tmp, parent, + RB_BLACK); + rb_set_parent_color(parent, node, RB_RED); + augment_rotate(parent, node); + parent = node; + tmp = node->rb_right; + } + + /* + * Case 3 - right rotate at gparent + * + * G P + * / \ / \ + * p U --> n g + * / \ + * n U + */ + gparent->rb_left = tmp; /* == parent->rb_right */ + parent->rb_right = gparent; + if (tmp) + rb_set_parent_color(tmp, gparent, RB_BLACK); + __rb_rotate_set_parents(gparent, parent, root, RB_RED); + augment_rotate(gparent, parent); + break; + } else { + tmp = gparent->rb_left; + if (tmp && rb_is_red(tmp)) { + /* Case 1 - color flips */ + rb_set_parent_color(tmp, gparent, RB_BLACK); + rb_set_parent_color(parent, gparent, RB_BLACK); + node = gparent; + parent = rb_parent(node); + rb_set_parent_color(node, parent, RB_RED); + continue; + } + + tmp = parent->rb_left; + if (node == tmp) { + /* Case 2 - right rotate at parent */ + parent->rb_left = tmp = node->rb_right; + node->rb_right = parent; + if (tmp) + rb_set_parent_color(tmp, parent, + RB_BLACK); + rb_set_parent_color(parent, node, RB_RED); + augment_rotate(parent, node); + parent = node; + tmp = node->rb_left; + } + + /* Case 3 - left rotate at gparent */ + gparent->rb_right = tmp; /* == parent->rb_left */ + parent->rb_left = gparent; + if (tmp) + rb_set_parent_color(tmp, gparent, RB_BLACK); + __rb_rotate_set_parents(gparent, parent, root, RB_RED); + augment_rotate(gparent, parent); + break; + } + } +} + +/* + * Inline version for rb_erase() use - we want to be able to inline + * and eliminate the dummy_rotate callback there + */ +static __always_inline void +____rb_erase_color(struct rb_node *parent, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + struct rb_node *node = NULL, *sibling, *tmp1, *tmp2; + + while (true) { + /* + * Loop invariants: + * - node is black (or NULL on first iteration) + * - node is not the root (parent is not NULL) + * - All leaf paths going through parent and node have a + * black node count that is 1 lower than other leaf paths. + */ + sibling = parent->rb_right; + if (node != sibling) { /* node == parent->rb_left */ + if (rb_is_red(sibling)) { + /* + * Case 1 - left rotate at parent + * + * P S + * / \ / \ + * N s --> p Sr + * / \ / \ + * Sl Sr N Sl + */ + parent->rb_right = tmp1 = sibling->rb_left; + sibling->rb_left = parent; + rb_set_parent_color(tmp1, parent, RB_BLACK); + __rb_rotate_set_parents(parent, sibling, root, + RB_RED); + augment_rotate(parent, sibling); + sibling = tmp1; + } + tmp1 = sibling->rb_right; + if (!tmp1 || rb_is_black(tmp1)) { + tmp2 = sibling->rb_left; + if (!tmp2 || rb_is_black(tmp2)) { + /* + * Case 2 - sibling color flip + * (p could be either color here) + * + * (p) (p) + * / \ / \ + * N S --> N s + * / \ / \ + * Sl Sr Sl Sr + * + * This leaves us violating 5) which + * can be fixed by flipping p to black + * if it was red, or by recursing at p. + * p is red when coming from Case 1. + */ + rb_set_parent_color(sibling, parent, + RB_RED); + if (rb_is_red(parent)) + rb_set_black(parent); + else { + node = parent; + parent = rb_parent(node); + if (parent) + continue; + } + break; + } + /* + * Case 3 - right rotate at sibling + * (p could be either color here) + * + * (p) (p) + * / \ / \ + * N S --> N Sl + * / \ \ + * sl Sr s + * \ + * Sr + */ + sibling->rb_left = tmp1 = tmp2->rb_right; + tmp2->rb_right = sibling; + parent->rb_right = tmp2; + if (tmp1) + rb_set_parent_color(tmp1, sibling, + RB_BLACK); + augment_rotate(sibling, tmp2); + tmp1 = sibling; + sibling = tmp2; + } + /* + * Case 4 - left rotate at parent + color flips + * (p and sl could be either color here. + * After rotation, p becomes black, s acquires + * p's color, and sl keeps its color) + * + * (p) (s) + * / \ / \ + * N S --> P Sr + * / \ / \ + * (sl) sr N (sl) + */ + parent->rb_right = tmp2 = sibling->rb_left; + sibling->rb_left = parent; + rb_set_parent_color(tmp1, sibling, RB_BLACK); + if (tmp2) + rb_set_parent(tmp2, parent); + __rb_rotate_set_parents(parent, sibling, root, + RB_BLACK); + augment_rotate(parent, sibling); + break; + } else { + sibling = parent->rb_left; + if (rb_is_red(sibling)) { + /* Case 1 - right rotate at parent */ + parent->rb_left = tmp1 = sibling->rb_right; + sibling->rb_right = parent; + rb_set_parent_color(tmp1, parent, RB_BLACK); + __rb_rotate_set_parents(parent, sibling, root, + RB_RED); + augment_rotate(parent, sibling); + sibling = tmp1; + } + tmp1 = sibling->rb_left; + if (!tmp1 || rb_is_black(tmp1)) { + tmp2 = sibling->rb_right; + if (!tmp2 || rb_is_black(tmp2)) { + /* Case 2 - sibling color flip */ + rb_set_parent_color(sibling, parent, + RB_RED); + if (rb_is_red(parent)) + rb_set_black(parent); + else { + node = parent; + parent = rb_parent(node); + if (parent) + continue; + } + break; + } + /* Case 3 - right rotate at sibling */ + sibling->rb_right = tmp1 = tmp2->rb_left; + tmp2->rb_left = sibling; + parent->rb_left = tmp2; + if (tmp1) + rb_set_parent_color(tmp1, sibling, + RB_BLACK); + augment_rotate(sibling, tmp2); + tmp1 = sibling; + sibling = tmp2; + } + /* Case 4 - left rotate at parent + color flips */ + parent->rb_left = tmp2 = sibling->rb_right; + sibling->rb_right = parent; + rb_set_parent_color(tmp1, sibling, RB_BLACK); + if (tmp2) + rb_set_parent(tmp2, parent); + __rb_rotate_set_parents(parent, sibling, root, + RB_BLACK); + augment_rotate(parent, sibling); + break; + } + } +} + +/* Non-inline version for rb_erase_augmented() use */ +void __rb_erase_color(struct rb_node *parent, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + ____rb_erase_color(parent, root, augment_rotate); +} + +/* + * Non-augmented rbtree manipulation functions. + * + * We use dummy augmented callbacks here, and have the compiler optimize them + * out of the rb_insert_color() and rb_erase() function definitions. + */ + +static inline void dummy_propagate(struct rb_node *node, struct rb_node *stop) {} +static inline void dummy_copy(struct rb_node *old, struct rb_node *new) {} +static inline void dummy_rotate(struct rb_node *old, struct rb_node *new) {} + +static const struct rb_augment_callbacks dummy_callbacks = { + dummy_propagate, dummy_copy, dummy_rotate +}; + +void rb_insert_color(struct rb_node *node, struct rb_root *root) +{ + __rb_insert(node, root, dummy_rotate); +} + +void rb_erase(struct rb_node *node, struct rb_root *root) +{ + struct rb_node *rebalance; + rebalance = __rb_erase_augmented(node, root, &dummy_callbacks); + if (rebalance) + ____rb_erase_color(rebalance, root, dummy_rotate); +} + +/* + * Augmented rbtree manipulation functions. + * + * This instantiates the same __always_inline functions as in the non-augmented + * case, but this time with user-defined callbacks. + */ + +void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + __rb_insert(node, root, augment_rotate); +} + +/* + * This function returns the first node (in sort order) of the tree. + */ +struct rb_node *rb_first(const struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_left) + n = n->rb_left; + return n; +} + +struct rb_node *rb_last(const struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_right) + n = n->rb_right; + return n; +} + +struct rb_node *rb_next(const struct rb_node *node) +{ + struct rb_node *parent; + + if (RB_EMPTY_NODE(node)) + return NULL; + + /* + * If we have a right-hand child, go down and then left as far + * as we can. + */ + if (node->rb_right) { + node = node->rb_right; + while (node->rb_left) + node=node->rb_left; + return (struct rb_node *)node; + } + + /* + * No right-hand children. Everything down and left is smaller than us, + * so any 'next' node must be in the general direction of our parent. + * Go up the tree; any time the ancestor is a right-hand child of its + * parent, keep going up. First time it's a left-hand child of its + * parent, said parent is our 'next' node. + */ + while ((parent = rb_parent(node)) && node == parent->rb_right) + node = parent; + + return parent; +} + +struct rb_node *rb_prev(const struct rb_node *node) +{ + struct rb_node *parent; + + if (RB_EMPTY_NODE(node)) + return NULL; + + /* + * If we have a left-hand child, go down and then right as far + * as we can. + */ + if (node->rb_left) { + node = node->rb_left; + while (node->rb_right) + node=node->rb_right; + return (struct rb_node *)node; + } + + /* + * No left-hand children. Go up till we find an ancestor which + * is a right-hand child of its parent. + */ + while ((parent = rb_parent(node)) && node == parent->rb_left) + node = parent; + + return parent; +} + +void rb_replace_node(struct rb_node *victim, struct rb_node *new, + struct rb_root *root) +{ + struct rb_node *parent = rb_parent(victim); + + /* Set the surrounding nodes to point to the replacement */ + __rb_change_child(victim, new, parent, root); + if (victim->rb_left) + rb_set_parent(victim->rb_left, new); + if (victim->rb_right) + rb_set_parent(victim->rb_right, new); + + /* Copy the pointers/colour from the victim to the replacement */ + *new = *victim; +} + +static struct rb_node *rb_left_deepest_node(const struct rb_node *node) +{ + for (;;) { + if (node->rb_left) + node = node->rb_left; + else if (node->rb_right) + node = node->rb_right; + else + return (struct rb_node *)node; + } +} + +struct rb_node *rb_next_postorder(const struct rb_node *node) +{ + const struct rb_node *parent; + if (!node) + return NULL; + parent = rb_parent(node); + + /* If we're sitting on node, we've already seen our children */ + if (parent && node == parent->rb_left && parent->rb_right) { + /* If we are the parent's left node, go to the parent's right + * node then all the way down to the left */ + return rb_left_deepest_node(parent->rb_right); + } else + /* Otherwise we are the parent's right node, and the parent + * should be next */ + return (struct rb_node *)parent; +} + +struct rb_node *rb_first_postorder(const struct rb_root *root) +{ + if (!root->rb_node) + return NULL; + + return rb_left_deepest_node(root->rb_node); +} diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 04e150d83e7d..47469abdcc1c 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -144,6 +144,10 @@ is a useful mode to detect imbalance between physical cores. To enable this mod use --per-core in addition to -a. (system-wide). The output includes the core number and the number of online logical processors on that physical processor. +--per-thread:: +Aggregate counts per monitored threads, when monitoring threads (-t option) +or processes (-p option). + -D msecs:: --delay msecs:: After starting the program, wait msecs before measuring. This is useful to diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index fe50a1b34aa0..09dc0aabb515 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -18,6 +18,7 @@ tools/arch/x86/include/asm/atomic.h tools/arch/x86/include/asm/rmwcc.h tools/lib/traceevent tools/lib/api +tools/lib/rbtree.c tools/lib/symbol/kallsyms.c tools/lib/symbol/kallsyms.h tools/lib/util/find_next_bit.c @@ -44,6 +45,8 @@ tools/include/linux/kernel.h tools/include/linux/list.h tools/include/linux/log2.h tools/include/linux/poison.h +tools/include/linux/rbtree.h +tools/include/linux/rbtree_augmented.h tools/include/linux/types.h include/asm-generic/bitops/arch_hweight.h include/asm-generic/bitops/const_hweight.h @@ -51,12 +54,10 @@ include/asm-generic/bitops/fls64.h include/asm-generic/bitops/__fls.h include/asm-generic/bitops/fls.h include/linux/perf_event.h -include/linux/rbtree.h include/linux/list.h include/linux/hash.h include/linux/stringify.h lib/hweight.c -lib/rbtree.c include/linux/swab.h arch/*/include/asm/unistd*.h arch/*/include/uapi/asm/unistd*.h @@ -65,7 +66,6 @@ arch/*/lib/memcpy*.S arch/*/lib/memset*.S include/linux/poison.h include/linux/hw_breakpoint.h -include/linux/rbtree_augmented.h include/uapi/linux/perf_event.h include/uapi/linux/const.h include/uapi/linux/swab.h diff --git a/tools/perf/Makefile b/tools/perf/Makefile index d31a7bbd7cee..480546d5f13b 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -83,8 +83,8 @@ build-test: # # All other targets get passed through: # -%: +%: FORCE $(print_msg) $(make) -.PHONY: tags TAGS +.PHONY: tags TAGS FORCE Makefile diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 1af0cfeb7a57..7a4b549214e3 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -110,7 +110,7 @@ $(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD $(Q)touch $(OUTPUT)PERF-VERSION-FILE CC = $(CROSS_COMPILE)gcc -LD = $(CROSS_COMPILE)ld +LD ?= $(CROSS_COMPILE)ld AR = $(CROSS_COMPILE)ar PKG_CONFIG = $(CROSS_COMPILE)pkg-config @@ -545,7 +545,7 @@ config-clean: clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean config-clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) $(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete - $(Q)$(RM) .config-detected + $(Q)$(RM) $(OUTPUT).config-detected $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 52ec66b23607..01b06492bd6a 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -630,12 +630,13 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) if (inject.session == NULL) return -1; - if (symbol__init(&inject.session->header.env) < 0) - return -1; + ret = symbol__init(&inject.session->header.env); + if (ret < 0) + goto out_delete; ret = __cmd_inject(&inject); +out_delete: perf_session__delete(inject.session); - return ret; } diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 950f296dfcf7..23b1faaaa4cc 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -1916,7 +1916,7 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) if (!perf_evlist__find_tracepoint_by_name(session->evlist, "kmem:kmalloc")) { pr_err(errmsg, "slab", "slab"); - return -1; + goto out_delete; } } @@ -1927,7 +1927,7 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) "kmem:mm_page_alloc"); if (evsel == NULL) { pr_err(errmsg, "page", "page"); - return -1; + goto out_delete; } kmem_page_size = pevent_get_page_size(evsel->tp_format->pevent); diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 74878cd75078..fc1cffb1b7a2 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1061,8 +1061,10 @@ static int read_events(struct perf_kvm_stat *kvm) symbol__init(&kvm->session->header.env); - if (!perf_session__has_traces(kvm->session, "kvm record")) - return -EINVAL; + if (!perf_session__has_traces(kvm->session, "kvm record")) { + ret = -EINVAL; + goto out_delete; + } /* * Do not use 'isa' recorded in kvm_exit tracepoint since it is not @@ -1070,9 +1072,13 @@ static int read_events(struct perf_kvm_stat *kvm) */ ret = cpu_isa_config(kvm); if (ret < 0) - return ret; + goto out_delete; - return perf_session__process_events(kvm->session); + ret = perf_session__process_events(kvm->session); + +out_delete: + perf_session__delete(kvm->session); + return ret; } static int parse_target_str(struct perf_kvm_stat *kvm) diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index da2ec06f0742..80170aace5d4 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -124,7 +124,6 @@ static int report_raw_events(struct perf_mem *mem) .mode = PERF_DATA_MODE_READ, .force = mem->force, }; - int err = -EINVAL; int ret; struct perf_session *session = perf_session__new(&file, false, &mem->tool); @@ -135,24 +134,21 @@ static int report_raw_events(struct perf_mem *mem) if (mem->cpu_list) { ret = perf_session__cpu_bitmap(session, mem->cpu_list, mem->cpu_bitmap); - if (ret) + if (ret < 0) goto out_delete; } - if (symbol__init(&session->header.env) < 0) - return -1; + ret = symbol__init(&session->header.env); + if (ret < 0) + goto out_delete; printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n"); - err = perf_session__process_events(session); - if (err) - return err; - - return 0; + ret = perf_session__process_events(session); out_delete: perf_session__delete(session); - return err; + return ret; } static int report_events(int argc, const char **argv, struct perf_mem *mem) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 32626ea3e227..95a47719aec3 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -742,6 +742,17 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) argc = parse_options(argc, argv, options, report_usage, 0); + if (symbol_conf.vmlinux_name && + access(symbol_conf.vmlinux_name, R_OK)) { + pr_err("Invalid file: %s\n", symbol_conf.vmlinux_name); + return -EINVAL; + } + if (symbol_conf.kallsyms_name && + access(symbol_conf.kallsyms_name, R_OK)) { + pr_err("Invalid file: %s\n", symbol_conf.kallsyms_name); + return -EINVAL; + } + if (report.use_stdio) use_browser = 0; else if (report.use_tui) @@ -828,8 +839,10 @@ repeat: if (report.header || report.header_only) { perf_session__fprintf_info(session, stdout, report.show_full_info); - if (report.header_only) - return 0; + if (report.header_only) { + ret = 0; + goto error; + } } else if (use_browser == 0) { fputs("# To display the perf.data header info, please use --header/--header-only options.\n#\n", stdout); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index fcf99bdeb19e..37e301a32f43 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -67,10 +67,7 @@ #define CNTR_NOT_SUPPORTED "<not supported>" #define CNTR_NOT_COUNTED "<not counted>" -static void print_stat(int argc, const char **argv); -static void print_counter_aggr(struct perf_evsel *counter, char *prefix); -static void print_counter(struct perf_evsel *counter, char *prefix); -static void print_aggr(char *prefix); +static void print_counters(struct timespec *ts, int argc, const char **argv); /* Default events used for perf stat -T */ static const char *transaction_attrs = { @@ -141,96 +138,9 @@ static inline void diff_timespec(struct timespec *r, struct timespec *a, } } -static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel) +static void perf_stat__reset_stats(void) { - return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus; -} - -static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel) -{ - return perf_evsel__cpus(evsel)->nr; -} - -static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) -{ - int i; - struct perf_stat *ps = evsel->priv; - - for (i = 0; i < 3; i++) - init_stats(&ps->res_stats[i]); - - perf_stat_evsel_id_init(evsel); -} - -static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) -{ - evsel->priv = zalloc(sizeof(struct perf_stat)); - if (evsel->priv == NULL) - return -ENOMEM; - perf_evsel__reset_stat_priv(evsel); - return 0; -} - -static void perf_evsel__free_stat_priv(struct perf_evsel *evsel) -{ - zfree(&evsel->priv); -} - -static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel) -{ - struct perf_counts *counts; - - counts = perf_counts__new(perf_evsel__nr_cpus(evsel)); - if (counts) - evsel->prev_raw_counts = counts; - - return counts ? 0 : -ENOMEM; -} - -static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel) -{ - perf_counts__delete(evsel->prev_raw_counts); - evsel->prev_raw_counts = NULL; -} - -static void perf_evlist__free_stats(struct perf_evlist *evlist) -{ - struct perf_evsel *evsel; - - evlist__for_each(evlist, evsel) { - perf_evsel__free_stat_priv(evsel); - perf_evsel__free_counts(evsel); - perf_evsel__free_prev_raw_counts(evsel); - } -} - -static int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw) -{ - struct perf_evsel *evsel; - - evlist__for_each(evlist, evsel) { - if (perf_evsel__alloc_stat_priv(evsel) < 0 || - perf_evsel__alloc_counts(evsel, perf_evsel__nr_cpus(evsel)) < 0 || - (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel) < 0)) - goto out_free; - } - - return 0; - -out_free: - perf_evlist__free_stats(evlist); - return -1; -} - -static void perf_stat__reset_stats(struct perf_evlist *evlist) -{ - struct perf_evsel *evsel; - - evlist__for_each(evlist, evsel) { - perf_evsel__reset_stat_priv(evsel); - perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel)); - } - + perf_evlist__reset_stats(evsel_list); perf_stat__reset_shadow_stats(); } @@ -304,8 +214,9 @@ static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip) return 0; } -static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused, - struct perf_counts_values *count) +static int +process_counter_values(struct perf_evsel *evsel, int cpu, int thread, + struct perf_counts_values *count) { struct perf_counts_values *aggr = &evsel->counts->aggr; static struct perf_counts_values zero; @@ -320,13 +231,13 @@ static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused, count = &zero; switch (aggr_mode) { + case AGGR_THREAD: case AGGR_CORE: case AGGR_SOCKET: case AGGR_NONE: if (!evsel->snapshot) - perf_evsel__compute_deltas(evsel, cpu, count); + perf_evsel__compute_deltas(evsel, cpu, thread, count); perf_counts_values__scale(count, scale, NULL); - evsel->counts->cpu[cpu] = *count; if (aggr_mode == AGGR_NONE) perf_stat__update_shadow_stats(evsel, count->values, cpu); break; @@ -343,26 +254,48 @@ static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused, return 0; } -static int read_counter(struct perf_evsel *counter); +static int process_counter_maps(struct perf_evsel *counter) +{ + int nthreads = thread_map__nr(counter->threads); + int ncpus = perf_evsel__nr_cpus(counter); + int cpu, thread; -/* - * Read out the results of a single counter: - * aggregate counts across CPUs in system-wide mode - */ -static int read_counter_aggr(struct perf_evsel *counter) + if (counter->system_wide) + nthreads = 1; + + for (thread = 0; thread < nthreads; thread++) { + for (cpu = 0; cpu < ncpus; cpu++) { + if (process_counter_values(counter, cpu, thread, + perf_counts(counter->counts, cpu, thread))) + return -1; + } + } + + return 0; +} + +static int process_counter(struct perf_evsel *counter) { struct perf_counts_values *aggr = &counter->counts->aggr; struct perf_stat *ps = counter->priv; u64 *count = counter->counts->aggr.values; - int i; + int i, ret; aggr->val = aggr->ena = aggr->run = 0; + init_stats(ps->res_stats); - if (read_counter(counter)) - return -1; + if (counter->per_pkg) + zero_per_pkg(counter); + + ret = process_counter_maps(counter); + if (ret) + return ret; + + if (aggr_mode != AGGR_GLOBAL) + return 0; if (!counter->snapshot) - perf_evsel__compute_deltas(counter, -1, aggr); + perf_evsel__compute_deltas(counter, -1, -1, aggr); perf_counts_values__scale(aggr, scale, &counter->counts->scaled); for (i = 0; i < 3; i++) @@ -397,12 +330,12 @@ static int read_counter(struct perf_evsel *counter) if (counter->system_wide) nthreads = 1; - if (counter->per_pkg) - zero_per_pkg(counter); - for (thread = 0; thread < nthreads; thread++) { for (cpu = 0; cpu < ncpus; cpu++) { - if (perf_evsel__read_cb(counter, cpu, thread, read_cb)) + struct perf_counts_values *count; + + count = perf_counts(counter->counts, cpu, thread); + if (perf_evsel__read(counter, cpu, thread, count)) return -1; } } @@ -410,68 +343,34 @@ static int read_counter(struct perf_evsel *counter) return 0; } -static void print_interval(void) +static void read_counters(bool close) { - static int num_print_interval; struct perf_evsel *counter; - struct perf_stat *ps; - struct timespec ts, rs; - char prefix[64]; - if (aggr_mode == AGGR_GLOBAL) { - evlist__for_each(evsel_list, counter) { - ps = counter->priv; - memset(ps->res_stats, 0, sizeof(ps->res_stats)); - read_counter_aggr(counter); - } - } else { - evlist__for_each(evsel_list, counter) { - ps = counter->priv; - memset(ps->res_stats, 0, sizeof(ps->res_stats)); - read_counter(counter); - } - } + evlist__for_each(evsel_list, counter) { + if (read_counter(counter)) + pr_warning("failed to read counter %s\n", counter->name); - clock_gettime(CLOCK_MONOTONIC, &ts); - diff_timespec(&rs, &ts, &ref_time); - sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep); + if (process_counter(counter)) + pr_warning("failed to process counter %s\n", counter->name); - if (num_print_interval == 0 && !csv_output) { - switch (aggr_mode) { - case AGGR_SOCKET: - fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); - break; - case AGGR_CORE: - fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit"); - break; - case AGGR_NONE: - fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit"); - break; - case AGGR_GLOBAL: - default: - fprintf(output, "# time counts %*s events\n", unit_width, "unit"); + if (close) { + perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), + thread_map__nr(evsel_list->threads)); } } +} - if (++num_print_interval == 25) - num_print_interval = 0; +static void process_interval(void) +{ + struct timespec ts, rs; - switch (aggr_mode) { - case AGGR_CORE: - case AGGR_SOCKET: - print_aggr(prefix); - break; - case AGGR_NONE: - evlist__for_each(evsel_list, counter) - print_counter(counter, prefix); - break; - case AGGR_GLOBAL: - default: - evlist__for_each(evsel_list, counter) - print_counter_aggr(counter, prefix); - } + read_counters(false); - fflush(output); + clock_gettime(CLOCK_MONOTONIC, &ts); + diff_timespec(&rs, &ts, &ref_time); + + print_counters(&rs, 0, NULL); } static void handle_initial_delay(void) @@ -586,7 +485,7 @@ static int __run_perf_stat(int argc, const char **argv) if (interval) { while (!waitpid(child_pid, &status, WNOHANG)) { nanosleep(&ts, NULL); - print_interval(); + process_interval(); } } wait(&status); @@ -604,7 +503,7 @@ static int __run_perf_stat(int argc, const char **argv) while (!done) { nanosleep(&ts, NULL); if (interval) - print_interval(); + process_interval(); } } @@ -612,18 +511,7 @@ static int __run_perf_stat(int argc, const char **argv) update_stats(&walltime_nsecs_stats, t1 - t0); - if (aggr_mode == AGGR_GLOBAL) { - evlist__for_each(evsel_list, counter) { - read_counter_aggr(counter); - perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), - thread_map__nr(evsel_list->threads)); - } - } else { - evlist__for_each(evsel_list, counter) { - read_counter(counter); - perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1); - } - } + read_counters(true); return WEXITSTATUS(status); } @@ -715,6 +603,14 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) csv_output ? 0 : -4, perf_evsel__cpus(evsel)->map[id], csv_sep); break; + case AGGR_THREAD: + fprintf(output, "%*s-%*d%s", + csv_output ? 0 : 16, + thread_map__comm(evsel->threads, id), + csv_output ? 0 : -8, + thread_map__pid(evsel->threads, id), + csv_sep); + break; case AGGR_GLOBAL: default: break; @@ -815,9 +711,9 @@ static void print_aggr(char *prefix) s2 = aggr_get_id(evsel_list->cpus, cpu2); if (s2 != id) continue; - val += counter->counts->cpu[cpu].val; - ena += counter->counts->cpu[cpu].ena; - run += counter->counts->cpu[cpu].run; + val += perf_counts(counter->counts, cpu, 0)->val; + ena += perf_counts(counter->counts, cpu, 0)->ena; + run += perf_counts(counter->counts, cpu, 0)->run; nr++; } if (prefix) @@ -863,6 +759,40 @@ static void print_aggr(char *prefix) } } +static void print_aggr_thread(struct perf_evsel *counter, char *prefix) +{ + int nthreads = thread_map__nr(counter->threads); + int ncpus = cpu_map__nr(counter->cpus); + int cpu, thread; + double uval; + + for (thread = 0; thread < nthreads; thread++) { + u64 ena = 0, run = 0, val = 0; + + for (cpu = 0; cpu < ncpus; cpu++) { + val += perf_counts(counter->counts, cpu, thread)->val; + ena += perf_counts(counter->counts, cpu, thread)->ena; + run += perf_counts(counter->counts, cpu, thread)->run; + } + + if (prefix) + fprintf(output, "%s", prefix); + + uval = val * counter->scale; + + if (nsec_counter(counter)) + nsec_printout(thread, 0, counter, uval); + else + abs_printout(thread, 0, counter, uval); + + if (!csv_output) + print_noise(counter, 1.0); + + print_running(run, ena); + fputc('\n', output); + } +} + /* * Print out the results of a single counter: * aggregated counts in system-wide mode @@ -925,9 +855,9 @@ static void print_counter(struct perf_evsel *counter, char *prefix) int cpu; for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { - val = counter->counts->cpu[cpu].val; - ena = counter->counts->cpu[cpu].ena; - run = counter->counts->cpu[cpu].run; + val = perf_counts(counter->counts, cpu, 0)->val; + ena = perf_counts(counter->counts, cpu, 0)->ena; + run = perf_counts(counter->counts, cpu, 0)->run; if (prefix) fprintf(output, "%s", prefix); @@ -972,9 +902,38 @@ static void print_counter(struct perf_evsel *counter, char *prefix) } } -static void print_stat(int argc, const char **argv) +static void print_interval(char *prefix, struct timespec *ts) +{ + static int num_print_interval; + + sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); + + if (num_print_interval == 0 && !csv_output) { + switch (aggr_mode) { + case AGGR_SOCKET: + fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); + break; + case AGGR_CORE: + fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit"); + break; + case AGGR_NONE: + fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit"); + break; + case AGGR_THREAD: + fprintf(output, "# time comm-pid counts %*s events\n", unit_width, "unit"); + break; + case AGGR_GLOBAL: + default: + fprintf(output, "# time counts %*s events\n", unit_width, "unit"); + } + } + + if (++num_print_interval == 25) + num_print_interval = 0; +} + +static void print_header(int argc, const char **argv) { - struct perf_evsel *counter; int i; fflush(stdout); @@ -1000,36 +959,57 @@ static void print_stat(int argc, const char **argv) fprintf(output, " (%d runs)", run_count); fprintf(output, ":\n\n"); } +} + +static void print_footer(void) +{ + if (!null_run) + fprintf(output, "\n"); + fprintf(output, " %17.9f seconds time elapsed", + avg_stats(&walltime_nsecs_stats)/1e9); + if (run_count > 1) { + fprintf(output, " "); + print_noise_pct(stddev_stats(&walltime_nsecs_stats), + avg_stats(&walltime_nsecs_stats)); + } + fprintf(output, "\n\n"); +} + +static void print_counters(struct timespec *ts, int argc, const char **argv) +{ + struct perf_evsel *counter; + char buf[64], *prefix = NULL; + + if (interval) + print_interval(prefix = buf, ts); + else + print_header(argc, argv); switch (aggr_mode) { case AGGR_CORE: case AGGR_SOCKET: - print_aggr(NULL); + print_aggr(prefix); + break; + case AGGR_THREAD: + evlist__for_each(evsel_list, counter) + print_aggr_thread(counter, prefix); break; case AGGR_GLOBAL: evlist__for_each(evsel_list, counter) - print_counter_aggr(counter, NULL); + print_counter_aggr(counter, prefix); break; case AGGR_NONE: evlist__for_each(evsel_list, counter) - print_counter(counter, NULL); + print_counter(counter, prefix); break; default: break; } - if (!csv_output) { - if (!null_run) - fprintf(output, "\n"); - fprintf(output, " %17.9f seconds time elapsed", - avg_stats(&walltime_nsecs_stats)/1e9); - if (run_count > 1) { - fprintf(output, " "); - print_noise_pct(stddev_stats(&walltime_nsecs_stats), - avg_stats(&walltime_nsecs_stats)); - } - fprintf(output, "\n\n"); - } + if (!interval && !csv_output) + print_footer(); + + fflush(output); } static volatile int signr = -1; @@ -1101,6 +1081,7 @@ static int perf_stat_init_aggr_mode(void) break; case AGGR_NONE: case AGGR_GLOBAL: + case AGGR_THREAD: default: break; } @@ -1325,6 +1306,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) "aggregate counts per processor socket", AGGR_SOCKET), OPT_SET_UINT(0, "per-core", &aggr_mode, "aggregate counts per physical processor core", AGGR_CORE), + OPT_SET_UINT(0, "per-thread", &aggr_mode, + "aggregate counts per thread", AGGR_THREAD), OPT_UINTEGER('D', "delay", &initial_delay, "ms to wait before starting measurement after program start"), OPT_END() @@ -1416,8 +1399,19 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) run_count = 1; } - /* no_aggr, cgroup are for system-wide only */ - if ((aggr_mode != AGGR_GLOBAL || nr_cgroups) && + if ((aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { + fprintf(stderr, "The --per-thread option is only available " + "when monitoring via -p -t options.\n"); + parse_options_usage(NULL, options, "p", 1); + parse_options_usage(NULL, options, "t", 1); + goto out; + } + + /* + * no_aggr, cgroup are for system-wide only + * --per-thread is aggregated per thread, we dont mix it with cpu mode + */ + if (((aggr_mode != AGGR_GLOBAL && aggr_mode != AGGR_THREAD) || nr_cgroups) && !target__has_cpu(&target)) { fprintf(stderr, "both cgroup and no-aggregation " "modes only available in system-wide mode\n"); @@ -1445,6 +1439,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) } goto out; } + + /* + * Initialize thread_map with comm names, + * so we could print it out on output. + */ + if (aggr_mode == AGGR_THREAD) + thread_map__read_comms(evsel_list->threads); + if (interval && interval < 100) { pr_err("print interval must be >= 100ms\n"); parse_options_usage(stat_usage, options, "I", 1); @@ -1478,13 +1480,13 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) status = run_perf_stat(argc, argv); if (forever && status != -1) { - print_stat(argc, argv); - perf_stat__reset_stats(evsel_list); + print_counters(NULL, argc, argv); + perf_stat__reset_stats(); } } if (!forever && status != -1 && !interval) - print_stat(argc, argv); + print_counters(NULL, argc, argv); perf_evlist__free_stats(evsel_list); out: diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 619a8696fda7..ecf319728f25 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -586,27 +586,9 @@ static void *display_thread_tui(void *arg) hists->uid_filter_str = top->record_opts.target.uid_str; } - while (true) { - int key = perf_evlist__tui_browse_hists(top->evlist, help, &hbt, - top->min_percent, - &top->session->header.env); - - if (key != 'f') - break; - - perf_evlist__toggle_enable(top->evlist); - /* - * No need to refresh, resort/decay histogram entries - * if we are not collecting samples: - */ - if (top->evlist->enabled) { - hbt.refresh = top->delay_secs; - help = "Press 'f' to disable the events or 'h' to see other hotkeys"; - } else { - help = "Press 'f' again to re-enable the events"; - hbt.refresh = 0; - } - } + perf_evlist__tui_browse_hists(top->evlist, help, &hbt, + top->min_percent, + &top->session->header.env); done = 1; return NULL; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index de5d277d1ad7..39ad4d0ca884 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1617,6 +1617,34 @@ static int trace__read_syscall_info(struct trace *trace, int id) return syscall__set_arg_fmts(sc); } +static int trace__validate_ev_qualifier(struct trace *trace) +{ + int err = 0; + struct str_node *pos; + + strlist__for_each(pos, trace->ev_qualifier) { + const char *sc = pos->s; + + if (audit_name_to_syscall(sc, trace->audit.machine) < 0) { + if (err == 0) { + fputs("Error:\tInvalid syscall ", trace->output); + err = -EINVAL; + } else { + fputs(", ", trace->output); + } + + fputs(sc, trace->output); + } + } + + if (err < 0) { + fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'" + "\nHint:\tand: 'man syscalls'\n", trace->output); + } + + return err; +} + /* * args is to be interpreted as a series of longs but we need to handle * 8-byte unaligned accesses. args points to raw_data within the event @@ -2325,7 +2353,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) */ if (trace->filter_pids.nr > 0) err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries); - else if (evlist->threads->map[0] == -1) + else if (thread_map__pid(evlist->threads, 0) == -1) err = perf_evlist__set_filter_pid(evlist, getpid()); if (err < 0) { @@ -2343,7 +2371,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (forks) perf_evlist__start_workload(evlist); - trace->multiple_threads = evlist->threads->map[0] == -1 || + trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 || evlist->threads->nr > 1 || perf_evlist__first(evlist)->attr.inherit; again: @@ -2862,6 +2890,10 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) err = -ENOMEM; goto out_close; } + + err = trace__validate_ev_qualifier(&trace); + if (err) + goto out_close; } err = target__validate(&trace.opts.target); diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 317001c94660..094ddaee104c 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -11,9 +11,9 @@ ifneq ($(obj-perf),) obj-perf := $(abspath $(obj-perf))/ endif -$(shell echo -n > .config-detected) -detected = $(shell echo "$(1)=y" >> .config-detected) -detected_var = $(shell echo "$(1)=$($(1))" >> .config-detected) +$(shell echo -n > $(OUTPUT).config-detected) +detected = $(shell echo "$(1)=y" >> $(OUTPUT).config-detected) +detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected) CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS) diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index ee41e705b2eb..d20d6e6ab65b 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -31,6 +31,7 @@ perf-y += code-reading.o perf-y += sample-parsing.o perf-y += parse-no-sample-id-all.o perf-y += kmod-path.o +perf-y += thread-map.o perf-$(CONFIG_X86) += perf-time-to-tsc.o diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 87b9961646e4..c1dde733c3a6 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -171,6 +171,10 @@ static struct test { .func = test__kmod_path__parse, }, { + .desc = "Test thread map", + .func = test__thread_map, + }, + { .func = NULL, }, }; diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 22f8a00446e1..39c784a100a9 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -545,8 +545,8 @@ out_err: if (evlist) { perf_evlist__delete(evlist); } else { - cpu_map__delete(cpus); - thread_map__delete(threads); + cpu_map__put(cpus); + thread_map__put(threads); } machines__destroy_kernel_maps(&machines); machine__delete_threads(machine); diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index 5b171d1e338b..4d4b9837b630 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -144,8 +144,8 @@ out_err: perf_evlist__disable(evlist); perf_evlist__delete(evlist); } else { - cpu_map__delete(cpus); - thread_map__delete(threads); + cpu_map__put(cpus); + thread_map__put(threads); } return err; diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 65280d28662e..729112f4cfaa 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -1,5 +1,16 @@ +ifndef MK +ifeq ($(MAKECMDGOALS),) +# no target specified, trigger the whole suite +all: + @echo "Testing Makefile"; $(MAKE) -sf tests/make MK=Makefile + @echo "Testing Makefile.perf"; $(MAKE) -sf tests/make MK=Makefile.perf +else +# run only specific test over 'Makefile' +%: + @echo "Testing Makefile"; $(MAKE) -sf tests/make MK=Makefile $@ +endif +else PERF := . -MK := Makefile include config/Makefile.arch @@ -47,6 +58,7 @@ make_install_man := install-man make_install_html := install-html make_install_info := install-info make_install_pdf := install-pdf +make_install_prefix := install prefix=/tmp/krava make_static := LDFLAGS=-static # all the NO_* variable combined @@ -57,7 +69,12 @@ make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 # $(run) contains all available tests run := make_pure +# Targets 'clean all' can be run together only through top level +# Makefile because we detect clean target in Makefile.perf and +# disable features detection +ifeq ($(MK),Makefile) run += make_clean_all +endif run += make_python_perf_so run += make_debug run += make_no_libperl @@ -83,6 +100,7 @@ run += make_util_map_o run += make_util_pmu_bison_o run += make_install run += make_install_bin +run += make_install_prefix # FIXME 'install-*' commented out till they're fixed # run += make_install_doc # run += make_install_man @@ -157,6 +175,12 @@ test_make_install_O := $(call test_dest_files,$(installed_files_all)) test_make_install_bin := $(call test_dest_files,$(installed_files_bin)) test_make_install_bin_O := $(call test_dest_files,$(installed_files_bin)) +# We prefix all installed files for make_install_prefix +# with '/tmp/krava' to match installed/prefix-ed files. +installed_files_all_prefix := $(addprefix /tmp/krava/,$(installed_files_all)) +test_make_install_prefix := $(call test_dest_files,$(installed_files_all_prefix)) +test_make_install_prefix_O := $(call test_dest_files,$(installed_files_all_prefix)) + # FIXME nothing gets installed test_make_install_man := test -f $$TMP_DEST/share/man/man1/perf.1 test_make_install_man_O := $(test_make_install_man) @@ -226,13 +250,13 @@ tarpkg: ( eval $$cmd ) >> $@ 2>&1 make_kernelsrc: - @echo " - make -C <kernelsrc> tools/perf" + @echo "- make -C <kernelsrc> tools/perf" $(call clean); \ (make -C ../.. tools/perf) > $@ 2>&1 && \ test -x perf && rm -f $@ || (cat $@ ; false) make_kernelsrc_tools: - @echo " - make -C <kernelsrc>/tools perf" + @echo "- make -C <kernelsrc>/tools perf" $(call clean); \ (make -C ../../tools perf) > $@ 2>&1 && \ test -x perf && rm -f $@ || (cat $@ ; false) @@ -244,3 +268,4 @@ out: $(run_O) @echo OK .PHONY: all $(run) $(run_O) tarpkg clean +endif # ifndef MK diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 5855cf471210..666b67a4df9d 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -140,8 +140,8 @@ out_delete_evlist: cpus = NULL; threads = NULL; out_free_cpus: - cpu_map__delete(cpus); + cpu_map__put(cpus); out_free_threads: - thread_map__delete(threads); + thread_map__put(threads); return err; } diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c index 7f48efa7e295..145050e2e544 100644 --- a/tools/perf/tests/mmap-thread-lookup.c +++ b/tools/perf/tests/mmap-thread-lookup.c @@ -143,7 +143,7 @@ static int synth_process(struct machine *machine) perf_event__process, machine, 0, 500); - thread_map__delete(map); + thread_map__put(map); return err; } diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index 9a7a116e09b8..a572f87e9c8d 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -78,7 +78,7 @@ int test__openat_syscall_event_on_all_cpus(void) * we use the auto allocation it will allocate just for 1 cpu, * as we start by cpu 0. */ - if (perf_evsel__alloc_counts(evsel, cpus->nr) < 0) { + if (perf_evsel__alloc_counts(evsel, cpus->nr, 1) < 0) { pr_debug("perf_evsel__alloc_counts(ncpus=%d)\n", cpus->nr); goto out_close_fd; } @@ -98,9 +98,9 @@ int test__openat_syscall_event_on_all_cpus(void) } expected = nr_openat_calls + cpu; - if (evsel->counts->cpu[cpu].val != expected) { + if (perf_counts(evsel->counts, cpu, 0)->val != expected) { pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n", - expected, cpus->map[cpu], evsel->counts->cpu[cpu].val); + expected, cpus->map[cpu], perf_counts(evsel->counts, cpu, 0)->val); err = -1; } } @@ -111,6 +111,6 @@ out_close_fd: out_evsel_delete: perf_evsel__delete(evsel); out_thread_map_delete: - thread_map__delete(threads); + thread_map__put(threads); return err; } diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index 6245221479d7..01a19626c846 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -45,7 +45,7 @@ int test__syscall_openat_tp_fields(void) perf_evsel__config(evsel, &opts); - evlist->threads->map[0] = getpid(); + thread_map__set_pid(evlist->threads, 0, getpid()); err = perf_evlist__open(evlist); if (err < 0) { diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c index 9f9491bb8e48..c9a37bc6b33a 100644 --- a/tools/perf/tests/openat-syscall.c +++ b/tools/perf/tests/openat-syscall.c @@ -44,9 +44,9 @@ int test__openat_syscall_event(void) goto out_close_fd; } - if (evsel->counts->cpu[0].val != nr_openat_calls) { + if (perf_counts(evsel->counts, 0, 0)->val != nr_openat_calls) { pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls, got %" PRIu64 "\n", - nr_openat_calls, evsel->counts->cpu[0].val); + nr_openat_calls, perf_counts(evsel->counts, 0, 0)->val); goto out_close_fd; } @@ -56,6 +56,6 @@ out_close_fd: out_evsel_delete: perf_evsel__delete(evsel); out_thread_map_delete: - thread_map__delete(threads); + thread_map__put(threads); return err; } diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 0d31403ea593..e698742d4fec 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -560,8 +560,8 @@ out: perf_evlist__disable(evlist); perf_evlist__delete(evlist); } else { - cpu_map__delete(cpus); - thread_map__delete(threads); + cpu_map__put(cpus); + thread_map__put(threads); } return err; diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 8e5038b48ba8..ebb47d96bc0b 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -61,6 +61,7 @@ int test__switch_tracking(void); int test__fdarray__filter(void); int test__fdarray__add(void); int test__kmod_path__parse(void); +int test__thread_map(void); #if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__) #ifdef HAVE_DWARF_UNWIND_SUPPORT diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c new file mode 100644 index 000000000000..5acf000939ea --- /dev/null +++ b/tools/perf/tests/thread-map.c @@ -0,0 +1,38 @@ +#include <sys/types.h> +#include <unistd.h> +#include "tests.h" +#include "thread_map.h" +#include "debug.h" + +int test__thread_map(void) +{ + struct thread_map *map; + + /* test map on current pid */ + map = thread_map__new_by_pid(getpid()); + TEST_ASSERT_VAL("failed to alloc map", map); + + thread_map__read_comms(map); + + TEST_ASSERT_VAL("wrong nr", map->nr == 1); + TEST_ASSERT_VAL("wrong pid", + thread_map__pid(map, 0) == getpid()); + TEST_ASSERT_VAL("wrong comm", + thread_map__comm(map, 0) && + !strcmp(thread_map__comm(map, 0), "perf")); + thread_map__put(map); + + /* test dummy pid */ + map = thread_map__new_dummy(); + TEST_ASSERT_VAL("failed to alloc map", map); + + thread_map__read_comms(map); + + TEST_ASSERT_VAL("wrong nr", map->nr == 1); + TEST_ASSERT_VAL("wrong pid", thread_map__pid(map, 0) == -1); + TEST_ASSERT_VAL("wrong comm", + thread_map__comm(map, 0) && + !strcmp(thread_map__comm(map, 0), "dummy")); + thread_map__put(map); + return 0; +} diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index c42adb600091..7629bef2fd79 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1902,8 +1902,23 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, case CTRL('c'): goto out_free_stack; case 'f': - if (!is_report_browser(hbt)) - goto out_free_stack; + if (!is_report_browser(hbt)) { + struct perf_top *top = hbt->arg; + + perf_evlist__toggle_enable(top->evlist); + /* + * No need to refresh, resort/decay histogram + * entries if we are not collecting samples: + */ + if (top->evlist->enabled) { + helpline = "Press 'f' to disable the events or 'h' to see other hotkeys"; + hbt->refresh = delay_secs; + } else { + helpline = "Press 'f' again to re-enable the events"; + hbt->refresh = 0; + } + continue; + } /* Fall thru */ default: helpline = "Press '?' for help on key bindings"; diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 586a59d46022..601d11440596 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -139,7 +139,7 @@ $(OUTPUT)util/find_next_bit.o: ../lib/util/find_next_bit.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) -$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c FORCE +$(OUTPUT)util/rbtree.o: ../lib/rbtree.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index df66966cfde7..7e7405c9b936 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -119,12 +119,12 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, if (per_cpu) { mp->cpu = evlist->cpus->map[idx]; if (evlist->threads) - mp->tid = evlist->threads->map[0]; + mp->tid = thread_map__pid(evlist->threads, 0); else mp->tid = -1; } else { mp->cpu = -1; - mp->tid = evlist->threads->map[idx]; + mp->tid = thread_map__pid(evlist->threads, idx); } } @@ -1182,6 +1182,13 @@ static int __auxtrace_mmap__read(struct auxtrace_mmap *mm, data2 = NULL; } + if (itr->alignment) { + unsigned int unwanted = len1 % itr->alignment; + + len1 -= unwanted; + size -= unwanted; + } + /* padding must be written by fn() e.g. record__process_auxtrace() */ padding = size & 7; if (padding) diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index a171abbe7301..471aecbc4d68 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -303,6 +303,7 @@ struct auxtrace_record { const char *str); u64 (*reference)(struct auxtrace_record *itr); int (*read_finish)(struct auxtrace_record *itr, int idx); + unsigned int alignment; }; #ifdef HAVE_AUXTRACE_SUPPORT diff --git a/tools/perf/util/cloexec.c b/tools/perf/util/cloexec.c index 85b523885f9d..2babddaa2481 100644 --- a/tools/perf/util/cloexec.c +++ b/tools/perf/util/cloexec.c @@ -7,11 +7,15 @@ static unsigned long flag = PERF_FLAG_FD_CLOEXEC; +#ifdef __GLIBC_PREREQ +#if !__GLIBC_PREREQ(2, 6) int __weak sched_getcpu(void) { errno = ENOSYS; return -1; } +#endif +#endif static int perf_flag_probe(void) { diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index c4e55b71010c..3667e2123e5b 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -5,6 +5,7 @@ #include <assert.h> #include <stdio.h> #include <stdlib.h> +#include "asm/bug.h" static struct cpu_map *cpu_map__default_new(void) { @@ -22,6 +23,7 @@ static struct cpu_map *cpu_map__default_new(void) cpus->map[i] = i; cpus->nr = nr_cpus; + atomic_set(&cpus->refcnt, 1); } return cpus; @@ -35,6 +37,7 @@ static struct cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus) if (cpus != NULL) { cpus->nr = nr_cpus; memcpy(cpus->map, tmp_cpus, payload_size); + atomic_set(&cpus->refcnt, 1); } return cpus; @@ -194,14 +197,32 @@ struct cpu_map *cpu_map__dummy_new(void) if (cpus != NULL) { cpus->nr = 1; cpus->map[0] = -1; + atomic_set(&cpus->refcnt, 1); } return cpus; } -void cpu_map__delete(struct cpu_map *map) +static void cpu_map__delete(struct cpu_map *map) { - free(map); + if (map) { + WARN_ONCE(atomic_read(&map->refcnt) != 0, + "cpu_map refcnt unbalanced\n"); + free(map); + } +} + +struct cpu_map *cpu_map__get(struct cpu_map *map) +{ + if (map) + atomic_inc(&map->refcnt); + return map; +} + +void cpu_map__put(struct cpu_map *map) +{ + if (map && atomic_dec_and_test(&map->refcnt)) + cpu_map__delete(map); } int cpu_map__get_socket(struct cpu_map *map, int idx) @@ -263,6 +284,7 @@ static int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res, /* ensure we process id in increasing order */ qsort(c->map, c->nr, sizeof(int), cmp_ids); + atomic_set(&cpus->refcnt, 1); *res = c; return 0; } diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 61a654849002..0af9cecb4c51 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -3,18 +3,19 @@ #include <stdio.h> #include <stdbool.h> +#include <linux/atomic.h> #include "perf.h" #include "util/debug.h" struct cpu_map { + atomic_t refcnt; int nr; int map[]; }; struct cpu_map *cpu_map__new(const char *cpu_list); struct cpu_map *cpu_map__dummy_new(void); -void cpu_map__delete(struct cpu_map *map); struct cpu_map *cpu_map__read(FILE *file); size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); int cpu_map__get_socket(struct cpu_map *map, int idx); @@ -22,6 +23,9 @@ int cpu_map__get_core(struct cpu_map *map, int idx); int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp); int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep); +struct cpu_map *cpu_map__get(struct cpu_map *map); +void cpu_map__put(struct cpu_map *map); + static inline int cpu_map__socket(struct cpu_map *sock, int s) { if (!sock || s > sock->nr || s < 0) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index d7d986d8f23e..67a977e5d0ab 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -504,7 +504,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, for (thread = 0; thread < threads->nr; ++thread) { if (__event__synthesize_thread(comm_event, mmap_event, fork_event, - threads->map[thread], 0, + thread_map__pid(threads, thread), 0, process, tool, machine, mmap_data, proc_map_timeout)) { err = -1; @@ -515,12 +515,12 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, * comm.pid is set to thread group id by * perf_event__synthesize_comm */ - if ((int) comm_event->comm.pid != threads->map[thread]) { + if ((int) comm_event->comm.pid != thread_map__pid(threads, thread)) { bool need_leader = true; /* is thread group leader in thread_map? */ for (j = 0; j < threads->nr; ++j) { - if ((int) comm_event->comm.pid == threads->map[j]) { + if ((int) comm_event->comm.pid == thread_map__pid(threads, j)) { need_leader = false; break; } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 8366511b45f8..6cfdee68e763 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -114,8 +114,8 @@ void perf_evlist__delete(struct perf_evlist *evlist) { perf_evlist__munmap(evlist); perf_evlist__close(evlist); - cpu_map__delete(evlist->cpus); - thread_map__delete(evlist->threads); + cpu_map__put(evlist->cpus); + thread_map__put(evlist->threads); evlist->cpus = NULL; evlist->threads = NULL; perf_evlist__purge(evlist); @@ -548,7 +548,7 @@ static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, else sid->cpu = -1; if (!evsel->system_wide && evlist->threads && thread >= 0) - sid->tid = evlist->threads->map[thread]; + sid->tid = thread_map__pid(evlist->threads, thread); else sid->tid = -1; } @@ -1101,6 +1101,31 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); } +static int perf_evlist__propagate_maps(struct perf_evlist *evlist, + struct target *target) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) { + /* + * We already have cpus for evsel (via PMU sysfs) so + * keep it, if there's no target cpu list defined. + */ + if (evsel->cpus && target->cpu_list) + cpu_map__put(evsel->cpus); + + if (!evsel->cpus || target->cpu_list) + evsel->cpus = cpu_map__get(evlist->cpus); + + evsel->threads = thread_map__get(evlist->threads); + + if (!evsel->cpus || !evsel->threads) + return -ENOMEM; + } + + return 0; +} + int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) { evlist->threads = thread_map__new_str(target->pid, target->tid, @@ -1117,10 +1142,10 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) if (evlist->cpus == NULL) goto out_delete_threads; - return 0; + return perf_evlist__propagate_maps(evlist, target); out_delete_threads: - thread_map__delete(evlist->threads); + thread_map__put(evlist->threads); evlist->threads = NULL; return -1; } @@ -1353,7 +1378,7 @@ static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) out: return err; out_free_cpus: - cpu_map__delete(evlist->cpus); + cpu_map__put(evlist->cpus); evlist->cpus = NULL; goto out; } @@ -1475,7 +1500,7 @@ int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *tar __func__, __LINE__); goto out_close_pipes; } - evlist->threads->map[0] = evlist->workload.pid; + thread_map__set_pid(evlist->threads, 0, evlist->workload.pid); } close(child_ready_pipe[1]); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index a8489b9d2812..037633c1da9d 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -289,5 +289,4 @@ void perf_evlist__to_front(struct perf_evlist *evlist, void perf_evlist__set_tracking_event(struct perf_evlist *evlist, struct perf_evsel *tracking_evsel); - #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 33449decf7bd..2936b3080722 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -885,6 +885,8 @@ void perf_evsel__exit(struct perf_evsel *evsel) perf_evsel__free_fd(evsel); perf_evsel__free_id(evsel); close_cgroup(evsel->cgrp); + cpu_map__put(evsel->cpus); + thread_map__put(evsel->threads); zfree(&evsel->group_name); zfree(&evsel->name); perf_evsel__object.fini(evsel); @@ -896,7 +898,7 @@ void perf_evsel__delete(struct perf_evsel *evsel) free(evsel); } -void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, +void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread, struct perf_counts_values *count) { struct perf_counts_values tmp; @@ -908,8 +910,8 @@ void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, tmp = evsel->prev_raw_counts->aggr; evsel->prev_raw_counts->aggr = *count; } else { - tmp = evsel->prev_raw_counts->cpu[cpu]; - evsel->prev_raw_counts->cpu[cpu] = *count; + tmp = *perf_counts(evsel->prev_raw_counts, cpu, thread); + *perf_counts(evsel->prev_raw_counts, cpu, thread) = *count; } count->val = count->val - tmp.val; @@ -937,20 +939,18 @@ void perf_counts_values__scale(struct perf_counts_values *count, *pscaled = scaled; } -int perf_evsel__read_cb(struct perf_evsel *evsel, int cpu, int thread, - perf_evsel__read_cb_t cb) +int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, + struct perf_counts_values *count) { - struct perf_counts_values count; - - memset(&count, 0, sizeof(count)); + memset(count, 0, sizeof(*count)); if (FD(evsel, cpu, thread) < 0) return -EINVAL; - if (readn(FD(evsel, cpu, thread), &count, sizeof(count)) < 0) + if (readn(FD(evsel, cpu, thread), count, sizeof(*count)) < 0) return -errno; - return cb(evsel, cpu, thread, &count); + return 0; } int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, @@ -962,15 +962,15 @@ int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, if (FD(evsel, cpu, thread) < 0) return -EINVAL; - if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0) + if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0) return -ENOMEM; if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0) return -errno; - perf_evsel__compute_deltas(evsel, cpu, &count); + perf_evsel__compute_deltas(evsel, cpu, thread, &count); perf_counts_values__scale(&count, scale, NULL); - evsel->counts->cpu[cpu] = count; + *perf_counts(evsel->counts, cpu, thread) = count; return 0; } @@ -1167,7 +1167,7 @@ retry_sample_id: int group_fd; if (!evsel->cgrp && !evsel->system_wide) - pid = threads->map[thread]; + pid = thread_map__pid(threads, thread); group_fd = get_group_fd(evsel, cpu, thread); retry_open: diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index bb0579e8a10a..4a7ed5656cf0 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -8,23 +8,8 @@ #include <linux/types.h> #include "xyarray.h" #include "symbol.h" - -struct perf_counts_values { - union { - struct { - u64 val; - u64 ena; - u64 run; - }; - u64 values[3]; - }; -}; - -struct perf_counts { - s8 scaled; - struct perf_counts_values aggr; - struct perf_counts_values cpu[]; -}; +#include "cpumap.h" +#include "stat.h" struct perf_evsel; @@ -82,6 +67,7 @@ struct perf_evsel { struct cgroup_sel *cgrp; void *handler; struct cpu_map *cpus; + struct thread_map *threads; unsigned int sample_size; int id_pos; int is_pos; @@ -113,10 +99,20 @@ struct thread_map; struct perf_evlist; struct record_opts; +static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel) +{ + return evsel->cpus; +} + +static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel) +{ + return perf_evsel__cpus(evsel)->nr; +} + void perf_counts_values__scale(struct perf_counts_values *count, bool scale, s8 *pscaled); -void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, +void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread, struct perf_counts_values *count); int perf_evsel__object_config(size_t object_size, @@ -233,12 +229,8 @@ static inline bool perf_evsel__match2(struct perf_evsel *e1, (a)->attr.type == (b)->attr.type && \ (a)->attr.config == (b)->attr.config) -typedef int (perf_evsel__read_cb_t)(struct perf_evsel *evsel, - int cpu, int thread, - struct perf_counts_values *count); - -int perf_evsel__read_cb(struct perf_evsel *evsel, int cpu, int thread, - perf_evsel__read_cb_t cb); +int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, + struct perf_counts_values *count); int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, int cpu, int thread, bool scale); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 21a77e7a171e..03ace57a800c 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1063,8 +1063,7 @@ out: free(buf); return events; error: - if (events) - free_event_desc(events); + free_event_desc(events); events = NULL; goto out; } diff --git a/tools/perf/util/include/linux/rbtree.h b/tools/perf/util/include/linux/rbtree.h deleted file mode 100644 index f06d89f0b867..000000000000 --- a/tools/perf/util/include/linux/rbtree.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef __TOOLS_LINUX_PERF_RBTREE_H -#define __TOOLS_LINUX_PERF_RBTREE_H -#include <stdbool.h> -#include "../../../../include/linux/rbtree.h" - -/* - * Handy for checking that we are not deleting an entry that is - * already in a list, found in block/{blk-throttle,cfq-iosched}.c, - * probably should be moved to lib/rbtree.c... - */ -static inline void rb_erase_init(struct rb_node *n, struct rb_root *root) -{ - rb_erase(n, root); - RB_CLEAR_NODE(n); -} -#endif /* __TOOLS_LINUX_PERF_RBTREE_H */ diff --git a/tools/perf/util/include/linux/rbtree_augmented.h b/tools/perf/util/include/linux/rbtree_augmented.h deleted file mode 100644 index 9d6fcdf1788b..000000000000 --- a/tools/perf/util/include/linux/rbtree_augmented.h +++ /dev/null @@ -1,2 +0,0 @@ -#include <stdbool.h> -#include "../../../../include/linux/rbtree_augmented.h" diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 4744673aff1b..7ff682770fdb 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1448,10 +1448,9 @@ int machine__process_event(struct machine *machine, union perf_event *event, case PERF_RECORD_AUX: ret = machine__process_aux_event(machine, event); break; case PERF_RECORD_ITRACE_START: - ret = machine__process_itrace_start_event(machine, event); + ret = machine__process_itrace_start_event(machine, event); break; case PERF_RECORD_LOST_SAMPLES: ret = machine__process_lost_samples_event(machine, event, sample); break; - break; default: ret = -1; break; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 2a4d1ec02846..09f8d2357108 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -17,6 +17,7 @@ #include "parse-events-flex.h" #include "pmu.h" #include "thread_map.h" +#include "cpumap.h" #include "asm/bug.h" #define MAX_NAME_LEN 100 @@ -285,7 +286,9 @@ __add_event(struct list_head *list, int *idx, if (!evsel) return NULL; - evsel->cpus = cpus; + if (cpus) + evsel->cpus = cpu_map__get(cpus); + if (name) evsel->name = strdup(name); list_add_tail(&evsel->node, list); diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 09e738fe9ea2..13cef3c65565 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -119,8 +119,8 @@ event [^,{}/]+ num_dec [0-9]+ num_hex 0x[a-fA-F0-9]+ num_raw_hex [a-fA-F0-9]+ -name [a-zA-Z_*?][a-zA-Z0-9_*?]* -name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?]* +name [a-zA-Z_*?][a-zA-Z0-9_*?.]* +name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.]* /* If you add a modifier you need to update check_modifier() */ modifier_event [ukhpGHSDI]+ modifier_bp [rwx]{1,3} @@ -165,7 +165,6 @@ modifier_bp [rwx]{1,3} return PE_EVENT_NAME; } -. | <<EOF>> { BEGIN(INITIAL); REWIND(0); diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 0fcc624eb767..7bcb8c315615 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1,4 +1,5 @@ #include <linux/list.h> +#include <linux/compiler.h> #include <sys/types.h> #include <unistd.h> #include <stdio.h> @@ -205,17 +206,12 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, return 0; } -static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FILE *file) +static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, + char *desc __maybe_unused, char *val) { struct perf_pmu_alias *alias; - char buf[256]; int ret; - ret = fread(buf, 1, sizeof(buf), file); - if (ret == 0) - return -EINVAL; - buf[ret] = 0; - alias = malloc(sizeof(*alias)); if (!alias) return -ENOMEM; @@ -225,26 +221,43 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI alias->unit[0] = '\0'; alias->per_pkg = false; - ret = parse_events_terms(&alias->terms, buf); + ret = parse_events_terms(&alias->terms, val); if (ret) { + pr_err("Cannot parse alias %s: %d\n", val, ret); free(alias); return ret; } alias->name = strdup(name); - /* - * load unit name and scale if available - */ - perf_pmu__parse_unit(alias, dir, name); - perf_pmu__parse_scale(alias, dir, name); - perf_pmu__parse_per_pkg(alias, dir, name); - perf_pmu__parse_snapshot(alias, dir, name); + if (dir) { + /* + * load unit name and scale if available + */ + perf_pmu__parse_unit(alias, dir, name); + perf_pmu__parse_scale(alias, dir, name); + perf_pmu__parse_per_pkg(alias, dir, name); + perf_pmu__parse_snapshot(alias, dir, name); + } list_add_tail(&alias->list, list); return 0; } +static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FILE *file) +{ + char buf[256]; + int ret; + + ret = fread(buf, 1, sizeof(buf), file); + if (ret == 0) + return -EINVAL; + + buf[ret] = 0; + + return __perf_pmu__new_alias(list, dir, name, NULL, buf); +} + static inline bool pmu_alias_info_file(char *name) { size_t len; @@ -436,7 +449,7 @@ static struct cpu_map *pmu_cpumask(const char *name) return cpus; } -struct perf_event_attr *__attribute__((weak)) +struct perf_event_attr * __weak perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) { return NULL; diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 076527b639bd..381f23a443c7 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -249,8 +249,12 @@ static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs) static bool kprobe_blacklist__listed(unsigned long address); static bool kprobe_warn_out_range(const char *symbol, unsigned long address) { + u64 etext_addr; + /* Get the address of _etext for checking non-probable text symbol */ - if (kernel_get_symbol_address_by_name("_etext", false) < address) + etext_addr = kernel_get_symbol_address_by_name("_etext", false); + + if (etext_addr != 0 && etext_addr < address) pr_warning("%s is out of .text, skip it.\n", symbol); else if (kprobe_blacklist__listed(address)) pr_warning("%s is blacklisted function, skip it.\n", symbol); diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 5925fec90562..e23ded40c79e 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -20,3 +20,4 @@ util/stat.c util/strlist.c util/trace-event.c ../../lib/rbtree.c +util/string.c diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index d906d0ad5d40..626422eda727 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -384,7 +384,7 @@ static int pyrf_cpu_map__init(struct pyrf_cpu_map *pcpus, static void pyrf_cpu_map__delete(struct pyrf_cpu_map *pcpus) { - cpu_map__delete(pcpus->cpus); + cpu_map__put(pcpus->cpus); pcpus->ob_type->tp_free((PyObject*)pcpus); } @@ -453,7 +453,7 @@ static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads, static void pyrf_thread_map__delete(struct pyrf_thread_map *pthreads) { - thread_map__delete(pthreads->threads); + thread_map__put(pthreads->threads); pthreads->ob_type->tp_free((PyObject*)pthreads); } diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index d457c523a33d..1f7becbe5e18 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -64,7 +64,7 @@ static bool perf_probe_api(setup_probe_fn_t fn) if (!cpus) return false; cpu = cpus->map[0]; - cpu_map__delete(cpus); + cpu_map__put(cpus); do { ret = perf_do_probe_api(fn, cpu, try[i++]); @@ -226,7 +226,7 @@ bool perf_evlist__can_select_event(struct perf_evlist *evlist, const char *str) struct cpu_map *cpus = cpu_map__new(NULL); cpu = cpus ? cpus->map[0] : 0; - cpu_map__delete(cpus); + cpu_map__put(cpus); } else { cpu = evlist->cpus->map[0]; } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index aa482c10469d..ed9dc2555ec7 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -686,6 +686,8 @@ static int process_finished_round(struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, struct ordered_events *oe) { + if (dump_trace) + fprintf(stdout, "\n"); return ordered_events__flush(oe, OE_FLUSH__ROUND); } @@ -1726,7 +1728,7 @@ size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp) if (perf_header__has_feat(&session->header, HEADER_AUXTRACE)) msg = " (excludes AUX area (e.g. instruction trace) decoded / synthesized events)"; - ret = fprintf(fp, "Aggregated stats:%s\n", msg); + ret = fprintf(fp, "\nAggregated stats:%s\n", msg); ret += events_stats__fprintf(&session->evlist->stats, fp); return ret; @@ -1893,7 +1895,7 @@ int perf_session__cpu_bitmap(struct perf_session *session, err = 0; out_delete_map: - cpu_map__delete(map); + cpu_map__put(map); return err; } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 4014b709f956..f2a0d1521e26 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -1,6 +1,8 @@ #include <math.h> #include "stat.h" +#include "evlist.h" #include "evsel.h" +#include "thread_map.h" void update_stats(struct stats *stats, u64 val) { @@ -95,33 +97,46 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel) } } -struct perf_counts *perf_counts__new(int ncpus) +struct perf_counts *perf_counts__new(int ncpus, int nthreads) { - int size = sizeof(struct perf_counts) + - ncpus * sizeof(struct perf_counts_values); + struct perf_counts *counts = zalloc(sizeof(*counts)); - return zalloc(size); + if (counts) { + struct xyarray *values; + + values = xyarray__new(ncpus, nthreads, sizeof(struct perf_counts_values)); + if (!values) { + free(counts); + return NULL; + } + + counts->values = values; + } + + return counts; } void perf_counts__delete(struct perf_counts *counts) { - free(counts); + if (counts) { + xyarray__delete(counts->values); + free(counts); + } } -static void perf_counts__reset(struct perf_counts *counts, int ncpus) +static void perf_counts__reset(struct perf_counts *counts) { - memset(counts, 0, (sizeof(*counts) + - (ncpus * sizeof(struct perf_counts_values)))); + xyarray__reset(counts->values); } -void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus) +void perf_evsel__reset_counts(struct perf_evsel *evsel) { - perf_counts__reset(evsel->counts, ncpus); + perf_counts__reset(evsel->counts); } -int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus) +int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads) { - evsel->counts = perf_counts__new(ncpus); + evsel->counts = perf_counts__new(ncpus, nthreads); return evsel->counts != NULL ? 0 : -ENOMEM; } @@ -130,3 +145,96 @@ void perf_evsel__free_counts(struct perf_evsel *evsel) perf_counts__delete(evsel->counts); evsel->counts = NULL; } + +void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) +{ + int i; + struct perf_stat *ps = evsel->priv; + + for (i = 0; i < 3; i++) + init_stats(&ps->res_stats[i]); + + perf_stat_evsel_id_init(evsel); +} + +int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) +{ + evsel->priv = zalloc(sizeof(struct perf_stat)); + if (evsel->priv == NULL) + return -ENOMEM; + perf_evsel__reset_stat_priv(evsel); + return 0; +} + +void perf_evsel__free_stat_priv(struct perf_evsel *evsel) +{ + zfree(&evsel->priv); +} + +int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, + int ncpus, int nthreads) +{ + struct perf_counts *counts; + + counts = perf_counts__new(ncpus, nthreads); + if (counts) + evsel->prev_raw_counts = counts; + + return counts ? 0 : -ENOMEM; +} + +void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel) +{ + perf_counts__delete(evsel->prev_raw_counts); + evsel->prev_raw_counts = NULL; +} + +int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw) +{ + int ncpus = perf_evsel__nr_cpus(evsel); + int nthreads = thread_map__nr(evsel->threads); + + if (perf_evsel__alloc_stat_priv(evsel) < 0 || + perf_evsel__alloc_counts(evsel, ncpus, nthreads) < 0 || + (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0)) + return -ENOMEM; + + return 0; +} + +int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) { + if (perf_evsel__alloc_stats(evsel, alloc_raw)) + goto out_free; + } + + return 0; + +out_free: + perf_evlist__free_stats(evlist); + return -1; +} + +void perf_evlist__free_stats(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) { + perf_evsel__free_stat_priv(evsel); + perf_evsel__free_counts(evsel); + perf_evsel__free_prev_raw_counts(evsel); + } +} + +void perf_evlist__reset_stats(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) { + perf_evsel__reset_stat_priv(evsel); + perf_evsel__reset_counts(evsel); + } +} diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 093dc3cb28dd..1cfbe0a980ac 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -3,6 +3,7 @@ #include <linux/types.h> #include <stdio.h> +#include "xyarray.h" struct stats { @@ -29,8 +30,32 @@ enum aggr_mode { AGGR_GLOBAL, AGGR_SOCKET, AGGR_CORE, + AGGR_THREAD, }; +struct perf_counts_values { + union { + struct { + u64 val; + u64 ena; + u64 run; + }; + u64 values[3]; + }; +}; + +struct perf_counts { + s8 scaled; + struct perf_counts_values aggr; + struct xyarray *values; +}; + +static inline struct perf_counts_values* +perf_counts(struct perf_counts *counts, int cpu, int thread) +{ + return xyarray__entry(counts->values, cpu, thread); +} + void update_stats(struct stats *stats, u64 val); double avg_stats(struct stats *stats); double stddev_stats(struct stats *stats); @@ -46,6 +71,8 @@ static inline void init_stats(struct stats *stats) } struct perf_evsel; +struct perf_evlist; + bool __perf_evsel_stat__is(struct perf_evsel *evsel, enum perf_stat_evsel_id id); @@ -62,10 +89,24 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, double avg, int cpu, enum aggr_mode aggr); -struct perf_counts *perf_counts__new(int ncpus); +struct perf_counts *perf_counts__new(int ncpus, int nthreads); void perf_counts__delete(struct perf_counts *counts); -void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus); -int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus); +void perf_evsel__reset_counts(struct perf_evsel *evsel); +int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads); void perf_evsel__free_counts(struct perf_evsel *evsel); + +void perf_evsel__reset_stat_priv(struct perf_evsel *evsel); +int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel); +void perf_evsel__free_stat_priv(struct perf_evsel *evsel); + +int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, + int ncpus, int nthreads); +void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel); + +int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw); + +int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); +void perf_evlist__free_stats(struct perf_evlist *evlist); +void perf_evlist__reset_stats(struct perf_evlist *evlist); #endif diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c index 283d3e73e2f2..eec6c1149f44 100644 --- a/tools/perf/util/svghelper.c +++ b/tools/perf/util/svghelper.c @@ -748,7 +748,7 @@ static int str_to_bitmap(char *s, cpumask_t *b) set_bit(c, cpumask_bits(b)); } - cpu_map__delete(m); + cpu_map__put(m); return ret; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 504f2d73b7ee..48b588c6951a 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1132,8 +1132,11 @@ static int dso__load_kcore(struct dso *dso, struct map *map, INIT_LIST_HEAD(&md.maps); fd = open(kcore_filename, O_RDONLY); - if (fd < 0) + if (fd < 0) { + pr_err("%s requires CAP_SYS_RAWIO capability to access.\n", + kcore_filename); return -EINVAL; + } /* Read new maps into temporary lists */ err = file__read_maps(fd, md.type == MAP__FUNCTION, kcore_mapfn, &md, diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index f4822bd03709..da7646d767fe 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -8,8 +8,11 @@ #include <unistd.h> #include "strlist.h" #include <string.h> +#include <api/fs/fs.h> +#include "asm/bug.h" #include "thread_map.h" #include "util.h" +#include "debug.h" /* Skip "." and ".." directories */ static int filter(const struct dirent *dir) @@ -20,11 +23,26 @@ static int filter(const struct dirent *dir) return 1; } +static void thread_map__reset(struct thread_map *map, int start, int nr) +{ + size_t size = (nr - start) * sizeof(map->map[0]); + + memset(&map->map[start], 0, size); +} + static struct thread_map *thread_map__realloc(struct thread_map *map, int nr) { - size_t size = sizeof(*map) + sizeof(pid_t) * nr; + size_t size = sizeof(*map) + sizeof(map->map[0]) * nr; + int start = map ? map->nr : 0; + + map = realloc(map, size); + /* + * We only realloc to add more items, let's reset new items. + */ + if (map) + thread_map__reset(map, start, nr); - return realloc(map, size); + return map; } #define thread_map__alloc(__nr) thread_map__realloc(NULL, __nr) @@ -45,8 +63,9 @@ struct thread_map *thread_map__new_by_pid(pid_t pid) threads = thread_map__alloc(items); if (threads != NULL) { for (i = 0; i < items; i++) - threads->map[i] = atoi(namelist[i]->d_name); + thread_map__set_pid(threads, i, atoi(namelist[i]->d_name)); threads->nr = items; + atomic_set(&threads->refcnt, 1); } for (i=0; i<items; i++) @@ -61,8 +80,9 @@ struct thread_map *thread_map__new_by_tid(pid_t tid) struct thread_map *threads = thread_map__alloc(1); if (threads != NULL) { - threads->map[0] = tid; - threads->nr = 1; + thread_map__set_pid(threads, 0, tid); + threads->nr = 1; + atomic_set(&threads->refcnt, 1); } return threads; @@ -84,6 +104,7 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) goto out_free_threads; threads->nr = 0; + atomic_set(&threads->refcnt, 1); while (!readdir_r(proc, &dirent, &next) && next) { char *end; @@ -123,8 +144,10 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) threads = tmp; } - for (i = 0; i < items; i++) - threads->map[threads->nr + i] = atoi(namelist[i]->d_name); + for (i = 0; i < items; i++) { + thread_map__set_pid(threads, threads->nr + i, + atoi(namelist[i]->d_name)); + } for (i = 0; i < items; i++) zfree(&namelist[i]); @@ -201,7 +224,7 @@ static struct thread_map *thread_map__new_by_pid_str(const char *pid_str) threads = nt; for (i = 0; i < items; i++) { - threads->map[j++] = atoi(namelist[i]->d_name); + thread_map__set_pid(threads, j++, atoi(namelist[i]->d_name)); zfree(&namelist[i]); } threads->nr = total_tasks; @@ -210,6 +233,8 @@ static struct thread_map *thread_map__new_by_pid_str(const char *pid_str) out: strlist__delete(slist); + if (threads) + atomic_set(&threads->refcnt, 1); return threads; out_free_namelist: @@ -227,8 +252,9 @@ struct thread_map *thread_map__new_dummy(void) struct thread_map *threads = thread_map__alloc(1); if (threads != NULL) { - threads->map[0] = -1; - threads->nr = 1; + thread_map__set_pid(threads, 0, -1); + threads->nr = 1; + atomic_set(&threads->refcnt, 1); } return threads; } @@ -267,10 +293,12 @@ static struct thread_map *thread_map__new_by_tid_str(const char *tid_str) goto out_free_threads; threads = nt; - threads->map[ntasks - 1] = tid; - threads->nr = ntasks; + thread_map__set_pid(threads, ntasks - 1, tid); + threads->nr = ntasks; } out: + if (threads) + atomic_set(&threads->refcnt, 1); return threads; out_free_threads: @@ -290,9 +318,30 @@ struct thread_map *thread_map__new_str(const char *pid, const char *tid, return thread_map__new_by_tid_str(tid); } -void thread_map__delete(struct thread_map *threads) +static void thread_map__delete(struct thread_map *threads) { - free(threads); + if (threads) { + int i; + + WARN_ONCE(atomic_read(&threads->refcnt) != 0, + "thread map refcnt unbalanced\n"); + for (i = 0; i < threads->nr; i++) + free(thread_map__comm(threads, i)); + free(threads); + } +} + +struct thread_map *thread_map__get(struct thread_map *map) +{ + if (map) + atomic_inc(&map->refcnt); + return map; +} + +void thread_map__put(struct thread_map *map) +{ + if (map && atomic_dec_and_test(&map->refcnt)) + thread_map__delete(map); } size_t thread_map__fprintf(struct thread_map *threads, FILE *fp) @@ -301,7 +350,60 @@ size_t thread_map__fprintf(struct thread_map *threads, FILE *fp) size_t printed = fprintf(fp, "%d thread%s: ", threads->nr, threads->nr > 1 ? "s" : ""); for (i = 0; i < threads->nr; ++i) - printed += fprintf(fp, "%s%d", i ? ", " : "", threads->map[i]); + printed += fprintf(fp, "%s%d", i ? ", " : "", thread_map__pid(threads, i)); return printed + fprintf(fp, "\n"); } + +static int get_comm(char **comm, pid_t pid) +{ + char *path; + size_t size; + int err; + + if (asprintf(&path, "%s/%d/comm", procfs__mountpoint(), pid) == -1) + return -ENOMEM; + + err = filename__read_str(path, comm, &size); + if (!err) { + /* + * We're reading 16 bytes, while filename__read_str + * allocates data per BUFSIZ bytes, so we can safely + * mark the end of the string. + */ + (*comm)[size] = 0; + rtrim(*comm); + } + + free(path); + return err; +} + +static void comm_init(struct thread_map *map, int i) +{ + pid_t pid = thread_map__pid(map, i); + char *comm = NULL; + + /* dummy pid comm initialization */ + if (pid == -1) { + map->map[i].comm = strdup("dummy"); + return; + } + + /* + * The comm name is like extra bonus ;-), + * so just warn if we fail for any reason. + */ + if (get_comm(&comm, pid)) + pr_warning("Couldn't resolve comm name for pid %d\n", pid); + + map->map[i].comm = comm; +} + +void thread_map__read_comms(struct thread_map *threads) +{ + int i; + + for (i = 0; i < threads->nr; ++i) + comm_init(threads, i); +} diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index 95313f43cc0f..af679d8a50f8 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -3,10 +3,17 @@ #include <sys/types.h> #include <stdio.h> +#include <linux/atomic.h> + +struct thread_map_data { + pid_t pid; + char *comm; +}; struct thread_map { + atomic_t refcnt; int nr; - pid_t map[]; + struct thread_map_data map[]; }; struct thread_map *thread_map__new_dummy(void); @@ -15,11 +22,12 @@ struct thread_map *thread_map__new_by_tid(pid_t tid); struct thread_map *thread_map__new_by_uid(uid_t uid); struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid); +struct thread_map *thread_map__get(struct thread_map *map); +void thread_map__put(struct thread_map *map); + struct thread_map *thread_map__new_str(const char *pid, const char *tid, uid_t uid); -void thread_map__delete(struct thread_map *threads); - size_t thread_map__fprintf(struct thread_map *threads, FILE *fp); static inline int thread_map__nr(struct thread_map *threads) @@ -27,4 +35,21 @@ static inline int thread_map__nr(struct thread_map *threads) return threads ? threads->nr : 1; } +static inline pid_t thread_map__pid(struct thread_map *map, int thread) +{ + return map->map[thread].pid; +} + +static inline void +thread_map__set_pid(struct thread_map *map, int thread, pid_t pid) +{ + map->map[thread].pid = pid; +} + +static inline char *thread_map__comm(struct thread_map *map, int thread) +{ + return map->map[thread].comm; +} + +void thread_map__read_comms(struct thread_map *threads); #endif /* __PERF_THREAD_MAP_H */ diff --git a/tools/power/acpi/common/getopt.c b/tools/power/acpi/common/getopt.c index 5da129e10aa2..326e826a5d20 100644 --- a/tools/power/acpi/common/getopt.c +++ b/tools/power/acpi/common/getopt.c @@ -127,7 +127,7 @@ int acpi_getopt(int argc, char **argv, char *opts) argv[acpi_gbl_optind][0] != '-' || argv[acpi_gbl_optind][1] == '\0') { return (ACPI_OPT_END); - } else if (ACPI_STRCMP(argv[acpi_gbl_optind], "--") == 0) { + } else if (strcmp(argv[acpi_gbl_optind], "--") == 0) { acpi_gbl_optind++; return (ACPI_OPT_END); } @@ -140,7 +140,7 @@ int acpi_getopt(int argc, char **argv, char *opts) /* Make sure that the option is legal */ if (current_char == ':' || - (opts_ptr = ACPI_STRCHR(opts, current_char)) == NULL) { + (opts_ptr = strchr(opts, current_char)) == NULL) { ACPI_OPTION_ERROR("Illegal option: -", current_char); if (argv[acpi_gbl_optind][++current_char_ptr] == '\0') { diff --git a/tools/power/acpi/man/acpidump.8 b/tools/power/acpi/man/acpidump.8 index 38f095d86b52..79e2d1d435d1 100644 --- a/tools/power/acpi/man/acpidump.8 +++ b/tools/power/acpi/man/acpidump.8 @@ -22,9 +22,6 @@ acpidump options are as follow: .B \-b Dump tables to binary files .TP -.B \-c -Dump customized tables -.TP .B \-h \-? This help message .TP @@ -48,15 +45,25 @@ Verbose mode .B \-a <Address> Get table via a physical address .TP +.B \-c <on|off> +Turning on/off customized table dumping +.TP .B \-f <BinaryFile> Get table via a binary file .TP .B \-n <Signature> Get table via a name/signature .TP -Invocation without parameters dumps all available tables +.B \-x +Do not use but dump XSDT +.TP +.B \-x \-x +Do not use or dump XSDT +.TP +.fi +Invocation without parameters dumps all available tables. .TP -Multiple mixed instances of -a, -f, and -n are supported +Multiple mixed instances of -a, -f, and -n are supported. .SH EXAMPLES diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c index db15c9d2049e..dd5008b0617a 100644 --- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c +++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c @@ -222,7 +222,7 @@ acpi_os_get_table_by_address(acpi_physical_address address, goto exit; } - ACPI_MEMCPY(local_table, mapped_table, table_length); + memcpy(local_table, mapped_table, table_length); exit: osl_unmap_table(mapped_table); @@ -531,7 +531,7 @@ static acpi_status osl_load_rsdp(void) gbl_rsdp_address = rsdp_base + (ACPI_CAST8(mapped_table) - rsdp_address); - ACPI_MEMCPY(&gbl_rsdp, mapped_table, sizeof(struct acpi_table_rsdp)); + memcpy(&gbl_rsdp, mapped_table, sizeof(struct acpi_table_rsdp)); acpi_os_unmap_memory(rsdp_address, rsdp_size); return (AE_OK); @@ -582,64 +582,67 @@ static acpi_status osl_table_initialize(void) return (AE_OK); } - /* Get RSDP from memory */ + if (!gbl_dump_customized_tables) { - status = osl_load_rsdp(); - if (ACPI_FAILURE(status)) { - return (status); - } + /* Get RSDP from memory */ + + status = osl_load_rsdp(); + if (ACPI_FAILURE(status)) { + return (status); + } - /* Get XSDT from memory */ + /* Get XSDT from memory */ - if (gbl_rsdp.revision && !gbl_do_not_dump_xsdt) { - if (gbl_xsdt) { - free(gbl_xsdt); - gbl_xsdt = NULL; + if (gbl_rsdp.revision && !gbl_do_not_dump_xsdt) { + if (gbl_xsdt) { + free(gbl_xsdt); + gbl_xsdt = NULL; + } + + gbl_revision = 2; + status = osl_get_bios_table(ACPI_SIG_XSDT, 0, + ACPI_CAST_PTR(struct + acpi_table_header + *, &gbl_xsdt), + &address); + if (ACPI_FAILURE(status)) { + return (status); + } } - gbl_revision = 2; - status = osl_get_bios_table(ACPI_SIG_XSDT, 0, - ACPI_CAST_PTR(struct - acpi_table_header *, - &gbl_xsdt), &address); - if (ACPI_FAILURE(status)) { - return (status); + /* Get RSDT from memory */ + + if (gbl_rsdp.rsdt_physical_address) { + if (gbl_rsdt) { + free(gbl_rsdt); + gbl_rsdt = NULL; + } + + status = osl_get_bios_table(ACPI_SIG_RSDT, 0, + ACPI_CAST_PTR(struct + acpi_table_header + *, &gbl_rsdt), + &address); + if (ACPI_FAILURE(status)) { + return (status); + } } - } - /* Get RSDT from memory */ + /* Get FADT from memory */ - if (gbl_rsdp.rsdt_physical_address) { - if (gbl_rsdt) { - free(gbl_rsdt); - gbl_rsdt = NULL; + if (gbl_fadt) { + free(gbl_fadt); + gbl_fadt = NULL; } - status = osl_get_bios_table(ACPI_SIG_RSDT, 0, + status = osl_get_bios_table(ACPI_SIG_FADT, 0, ACPI_CAST_PTR(struct acpi_table_header *, - &gbl_rsdt), &address); + &gbl_fadt), + &gbl_fadt_address); if (ACPI_FAILURE(status)) { return (status); } - } - - /* Get FADT from memory */ - - if (gbl_fadt) { - free(gbl_fadt); - gbl_fadt = NULL; - } - - status = osl_get_bios_table(ACPI_SIG_FADT, 0, - ACPI_CAST_PTR(struct acpi_table_header *, - &gbl_fadt), - &gbl_fadt_address); - if (ACPI_FAILURE(status)) { - return (status); - } - - if (!gbl_dump_customized_tables) { /* Add mandatory tables to global table list first */ @@ -961,7 +964,7 @@ osl_get_bios_table(char *signature, goto exit; } - ACPI_MEMCPY(local_table, mapped_table, table_length); + memcpy(local_table, mapped_table, table_length); *address = table_address; *table = local_table; diff --git a/tools/power/acpi/os_specific/service_layers/osunixmap.c b/tools/power/acpi/os_specific/service_layers/osunixmap.c index 0b1fa290245a..44ad4889d468 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixmap.c +++ b/tools/power/acpi/os_specific/service_layers/osunixmap.c @@ -54,7 +54,7 @@ ACPI_MODULE_NAME("osunixmap") #ifndef O_BINARY #define O_BINARY 0 #endif -#ifdef _free_BSD +#if defined(_dragon_fly) || defined(_free_BSD) #define MMAP_FLAGS MAP_SHARED #else #define MMAP_FLAGS MAP_PRIVATE diff --git a/tools/power/acpi/tools/acpidump/acpidump.h b/tools/power/acpi/tools/acpidump/acpidump.h index 84bdef0136cb..eed534481434 100644 --- a/tools/power/acpi/tools/acpidump/acpidump.h +++ b/tools/power/acpi/tools/acpidump/acpidump.h @@ -66,7 +66,7 @@ EXTERN u8 INIT_GLOBAL(gbl_summary_mode, FALSE); EXTERN u8 INIT_GLOBAL(gbl_verbose_mode, FALSE); EXTERN u8 INIT_GLOBAL(gbl_binary_mode, FALSE); -EXTERN u8 INIT_GLOBAL(gbl_dump_customized_tables, FALSE); +EXTERN u8 INIT_GLOBAL(gbl_dump_customized_tables, TRUE); EXTERN u8 INIT_GLOBAL(gbl_do_not_dump_xsdt, FALSE); EXTERN ACPI_FILE INIT_GLOBAL(gbl_output_file, NULL); EXTERN char INIT_GLOBAL(*gbl_output_filename, NULL); diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c index c736adf5fb55..61d0de804b70 100644 --- a/tools/power/acpi/tools/acpidump/apdump.c +++ b/tools/power/acpi/tools/acpidump/apdump.c @@ -329,7 +329,7 @@ int ap_dump_table_by_name(char *signature) acpi_status status; int table_status; - if (ACPI_STRLEN(signature) != ACPI_NAME_SIZE) { + if (strlen(signature) != ACPI_NAME_SIZE) { acpi_log_error ("Invalid table signature [%s]: must be exactly 4 characters\n", signature); @@ -338,15 +338,15 @@ int ap_dump_table_by_name(char *signature) /* Table signatures are expected to be uppercase */ - ACPI_STRCPY(local_signature, signature); + strcpy(local_signature, signature); acpi_ut_strupr(local_signature); /* To be friendly, handle tables whose signatures do not match the name */ if (ACPI_COMPARE_NAME(local_signature, "FADT")) { - ACPI_STRCPY(local_signature, ACPI_SIG_FADT); + strcpy(local_signature, ACPI_SIG_FADT); } else if (ACPI_COMPARE_NAME(local_signature, "MADT")) { - ACPI_STRCPY(local_signature, ACPI_SIG_MADT); + strcpy(local_signature, ACPI_SIG_MADT); } /* Dump all instances of this signature (to handle multiple SSDTs) */ diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c index 8f2fe168228e..a37f9702b2a9 100644 --- a/tools/power/acpi/tools/acpidump/apfiles.c +++ b/tools/power/acpi/tools/acpidump/apfiles.c @@ -136,10 +136,10 @@ int ap_write_to_binary_file(struct acpi_table_header *table, u32 instance) } else { ACPI_MOVE_NAME(filename, table->signature); } - filename[0] = (char)ACPI_TOLOWER(filename[0]); - filename[1] = (char)ACPI_TOLOWER(filename[1]); - filename[2] = (char)ACPI_TOLOWER(filename[2]); - filename[3] = (char)ACPI_TOLOWER(filename[3]); + filename[0] = (char)tolower((int)filename[0]); + filename[1] = (char)tolower((int)filename[1]); + filename[2] = (char)tolower((int)filename[2]); + filename[3] = (char)tolower((int)filename[3]); filename[ACPI_NAME_SIZE] = 0; /* Handle multiple SSDts - create different filenames for each */ @@ -147,10 +147,10 @@ int ap_write_to_binary_file(struct acpi_table_header *table, u32 instance) if (instance > 0) { acpi_ut_snprintf(instance_str, sizeof(instance_str), "%u", instance); - ACPI_STRCAT(filename, instance_str); + strcat(filename, instance_str); } - ACPI_STRCAT(filename, ACPI_TABLE_FILE_SUFFIX); + strcat(filename, ACPI_TABLE_FILE_SUFFIX); if (gbl_verbose_mode) { acpi_log_error diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c index d0ba6535f5af..57620f66ae6c 100644 --- a/tools/power/acpi/tools/acpidump/apmain.c +++ b/tools/power/acpi/tools/acpidump/apmain.c @@ -80,7 +80,7 @@ struct ap_dump_action action_table[AP_MAX_ACTIONS]; u32 current_action = 0; #define AP_UTILITY_NAME "ACPI Binary Table Dump Utility" -#define AP_SUPPORTED_OPTIONS "?a:bcf:hn:o:r:svxz" +#define AP_SUPPORTED_OPTIONS "?a:bc:f:hn:o:r:svxz" /****************************************************************************** * @@ -96,7 +96,6 @@ static void ap_display_usage(void) ACPI_USAGE_HEADER("acpidump [options]"); ACPI_OPTION("-b", "Dump tables to binary files"); - ACPI_OPTION("-c", "Dump customized tables"); ACPI_OPTION("-h -?", "This help message"); ACPI_OPTION("-o <File>", "Redirect output to file"); ACPI_OPTION("-r <Address>", "Dump tables from specified RSDP"); @@ -107,6 +106,7 @@ static void ap_display_usage(void) ACPI_USAGE_TEXT("\nTable Options:\n"); ACPI_OPTION("-a <Address>", "Get table via a physical address"); + ACPI_OPTION("-c <on|off>", "Turning on/off customized table dumping"); ACPI_OPTION("-f <BinaryFile>", "Get table via a binary file"); ACPI_OPTION("-n <Signature>", "Get table via a name/signature"); ACPI_OPTION("-x", "Do not use but dump XSDT"); @@ -181,7 +181,16 @@ static int ap_do_options(int argc, char **argv) case 'c': /* Dump customized tables */ - gbl_dump_customized_tables = TRUE; + if (!strcmp(acpi_gbl_optarg, "on")) { + gbl_dump_customized_tables = TRUE; + } else if (!strcmp(acpi_gbl_optarg, "off")) { + gbl_dump_customized_tables = FALSE; + } else { + acpi_log_error + ("%s: Cannot handle this switch, please use on|off\n", + acpi_gbl_optarg); + return (-1); + } continue; case 'h': diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 8e9b64520ec1..f56914c7929b 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -1,3 +1,6 @@ +ldflags-y += --wrap=ioremap_wt +ldflags-y += --wrap=ioremap_wc +ldflags-y += --wrap=devm_ioremap_nocache ldflags-y += --wrap=ioremap_cache ldflags-y += --wrap=ioremap_nocache ldflags-y += --wrap=iounmap diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index c85a6f6ba559..64bfaa50831c 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -65,6 +65,21 @@ void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size, return fallback_fn(offset, size); } +void __iomem *__wrap_devm_ioremap_nocache(struct device *dev, + resource_size_t offset, unsigned long size) +{ + struct nfit_test_resource *nfit_res; + + rcu_read_lock(); + nfit_res = get_nfit_res(offset); + rcu_read_unlock(); + if (nfit_res) + return (void __iomem *) nfit_res->buf + offset + - nfit_res->res->start; + return devm_ioremap_nocache(dev, offset, size); +} +EXPORT_SYMBOL(__wrap_devm_ioremap_nocache); + void __iomem *__wrap_ioremap_cache(resource_size_t offset, unsigned long size) { return __nfit_test_ioremap(offset, size, ioremap_cache); @@ -77,6 +92,18 @@ void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size) } EXPORT_SYMBOL(__wrap_ioremap_nocache); +void __iomem *__wrap_ioremap_wt(resource_size_t offset, unsigned long size) +{ + return __nfit_test_ioremap(offset, size, ioremap_wt); +} +EXPORT_SYMBOL(__wrap_ioremap_wt); + +void __iomem *__wrap_ioremap_wc(resource_size_t offset, unsigned long size) +{ + return __nfit_test_ioremap(offset, size, ioremap_wc); +} +EXPORT_SYMBOL(__wrap_ioremap_wc); + void __wrap_iounmap(volatile void __iomem *addr) { struct nfit_test_resource *nfit_res; diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 4b69b8368de0..d0bdae40ccc9 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -128,6 +128,8 @@ struct nfit_test { int num_pm; void **dimm; dma_addr_t *dimm_dma; + void **flush; + dma_addr_t *flush_dma; void **label; dma_addr_t *label_dma; void **spa_set; @@ -155,7 +157,7 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, int i, rc; if (!nfit_mem || !test_bit(cmd, &nfit_mem->dsm_mask)) - return -ENXIO; + return -ENOTTY; /* lookup label space for the given dimm */ for (i = 0; i < ARRAY_SIZE(handle); i++) @@ -331,7 +333,8 @@ static int nfit_test0_alloc(struct nfit_test *t) + sizeof(struct acpi_nfit_system_address) * NUM_SPA + sizeof(struct acpi_nfit_memory_map) * NUM_MEM + sizeof(struct acpi_nfit_control_region) * NUM_DCR - + sizeof(struct acpi_nfit_data_region) * NUM_BDW; + + sizeof(struct acpi_nfit_data_region) * NUM_BDW + + sizeof(struct acpi_nfit_flush_address) * NUM_DCR; int i; t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma); @@ -356,6 +359,10 @@ static int nfit_test0_alloc(struct nfit_test *t) if (!t->label[i]) return -ENOMEM; sprintf(t->label[i], "label%d", i); + + t->flush[i] = test_alloc(t, 8, &t->flush_dma[i]); + if (!t->flush[i]) + return -ENOMEM; } for (i = 0; i < NUM_DCR; i++) { @@ -408,6 +415,7 @@ static void nfit_test0_setup(struct nfit_test *t) struct acpi_nfit_system_address *spa; struct acpi_nfit_control_region *dcr; struct acpi_nfit_data_region *bdw; + struct acpi_nfit_flush_address *flush; unsigned int offset; nfit_test_init_header(nfit_buf, size); @@ -831,6 +839,39 @@ static void nfit_test0_setup(struct nfit_test *t) bdw->capacity = DIMM_SIZE; bdw->start_address = 0; + offset = offset + sizeof(struct acpi_nfit_data_region) * 4; + /* flush0 (dimm0) */ + flush = nfit_buf + offset; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[0]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[0]; + + /* flush1 (dimm1) */ + flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 1; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[1]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[1]; + + /* flush2 (dimm2) */ + flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 2; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[2]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[2]; + + /* flush3 (dimm3) */ + flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 3; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[3]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[3]; + acpi_desc = &t->acpi_desc; set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_dsm_force_en); set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en); @@ -933,6 +974,10 @@ static int nfit_test_probe(struct platform_device *pdev) GFP_KERNEL); nfit_test->dimm_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t), GFP_KERNEL); + nfit_test->flush = devm_kcalloc(dev, num, sizeof(void *), + GFP_KERNEL); + nfit_test->flush_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t), + GFP_KERNEL); nfit_test->label = devm_kcalloc(dev, num, sizeof(void *), GFP_KERNEL); nfit_test->label_dma = devm_kcalloc(dev, num, @@ -943,7 +988,8 @@ static int nfit_test_probe(struct platform_device *pdev) sizeof(dma_addr_t), GFP_KERNEL); if (nfit_test->dimm && nfit_test->dimm_dma && nfit_test->label && nfit_test->label_dma && nfit_test->dcr - && nfit_test->dcr_dma) + && nfit_test->dcr_dma && nfit_test->flush + && nfit_test->flush_dma) /* pass */; else return -ENOMEM; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 848af90b8091..8b8a44453670 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -553,6 +553,8 @@ static struct kvm *kvm_create_vm(unsigned long type) list_add(&kvm->vm_list, &vm_list); spin_unlock(&kvm_lock); + preempt_notifier_inc(); + return kvm; out_err: @@ -620,6 +622,7 @@ static void kvm_destroy_vm(struct kvm *kvm) cleanup_srcu_struct(&kvm->irq_srcu); cleanup_srcu_struct(&kvm->srcu); kvm_arch_free_vm(kvm); + preempt_notifier_dec(); hardware_disable_all(); mmdrop(mm); } |