diff options
924 files changed, 32580 insertions, 12073 deletions
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index 2a3330696372..8af4ad121828 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt @@ -490,6 +490,8 @@ pgpgin - # of charging events to the memory cgroup. The charging pgpgout - # of uncharging events to the memory cgroup. The uncharging event happens each time a page is unaccounted from the cgroup. swap - # of bytes of swap usage +writeback - # of bytes of file/anon cache that are queued for syncing to + disk. inactive_anon - # of bytes of anonymous and swap cache memory on inactive LRU list. active_anon - # of bytes of anonymous and swap cache memory on active diff --git a/Documentation/devicetree/bindings/leds/leds-lp55xx.txt b/Documentation/devicetree/bindings/leds/leds-lp55xx.txt index d5176882d8b9..a61727f9a6d1 100644 --- a/Documentation/devicetree/bindings/leds/leds-lp55xx.txt +++ b/Documentation/devicetree/bindings/leds/leds-lp55xx.txt @@ -1,7 +1,7 @@ Binding for TI/National Semiconductor LP55xx Led Drivers Required properties: -- compatible: "national,lp5521" or "national,lp5523" or "ti,lp5562" +- compatible: "national,lp5521" or "national,lp5523" or "ti,lp5562" or "ti,lp8501" - reg: I2C slave address - clock-mode: Input clock mode, (0: automode, 1: internal, 2: external) @@ -11,6 +11,11 @@ Each child has own specific current settings Optional properties: - label: Used for naming LEDs +- pwr-sel: LP8501 specific property. Power selection for output channels. + 0: D1~9 are connected to VDD + 1: D1~6 with VDD, D7~9 with VOUT + 2: D1~6 with VOUT, D7~9 with VDD + 3: D1~9 are connected to VOUT Alternatively, each child can have specific channel name - chan-name: Name of each channel name @@ -145,3 +150,68 @@ lp5562@30 { max-cur = /bits/ 8 <0x60>; }; }; + +example 4) LP8501 +9 channels are defined. The 'pwr-sel' is LP8501 specific property. +Others are same as LP5523. + +lp8501@32 { + compatible = "ti,lp8501"; + reg = <0x32>; + clock-mode = /bits/ 8 <2>; + pwr-sel = /bits/ 8 <3>; /* D1~9 connected to VOUT */ + + chan0 { + chan-name = "d1"; + led-cur = /bits/ 8 <0x14>; + max-cur = /bits/ 8 <0x20>; + }; + + chan1 { + chan-name = "d2"; + led-cur = /bits/ 8 <0x14>; + max-cur = /bits/ 8 <0x20>; + }; + + chan2 { + chan-name = "d3"; + led-cur = /bits/ 8 <0x14>; + max-cur = /bits/ 8 <0x20>; + }; + + chan3 { + chan-name = "d4"; + led-cur = /bits/ 8 <0x14>; + max-cur = /bits/ 8 <0x20>; + }; + + chan4 { + chan-name = "d5"; + led-cur = /bits/ 8 <0x14>; + max-cur = /bits/ 8 <0x20>; + }; + + chan5 { + chan-name = "d6"; + led-cur = /bits/ 8 <0x14>; + max-cur = /bits/ 8 <0x20>; + }; + + chan6 { + chan-name = "d7"; + led-cur = /bits/ 8 <0x14>; + max-cur = /bits/ 8 <0x20>; + }; + + chan7 { + chan-name = "d8"; + led-cur = /bits/ 8 <0x14>; + max-cur = /bits/ 8 <0x20>; + }; + + chan8 { + chan-name = "d9"; + led-cur = /bits/ 8 <0x14>; + max-cur = /bits/ 8 <0x20>; + }; +}; diff --git a/Documentation/devicetree/bindings/leds/pca963x.txt b/Documentation/devicetree/bindings/leds/pca963x.txt new file mode 100644 index 000000000000..aece3eac1b63 --- /dev/null +++ b/Documentation/devicetree/bindings/leds/pca963x.txt @@ -0,0 +1,47 @@ +LEDs connected to pca9632, pca9633 or pca9634 + +Required properties: +- compatible : should be : "nxp,pca9632", "nxp,pca9633" or "nxp,pca9634" + +Optional properties: +- nxp,totem-pole : use totem pole (push-pull) instead of default open-drain +- nxp,hw-blink : use hardware blinking instead of software blinking + +Each led is represented as a sub-node of the nxp,pca963x device. + +LED sub-node properties: +- label : (optional) see Documentation/devicetree/bindings/leds/common.txt +- reg : number of LED line (could be from 0 to 3 in pca9632 or pca9633 + or 0 to 7 in pca9634) +- linux,default-trigger : (optional) + see Documentation/devicetree/bindings/leds/common.txt + +Examples: + +pca9632: pca9632 { + compatible = "nxp,pca9632"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x62>; + + red@0 { + label = "red"; + reg = <0>; + linux,default-trigger = "none"; + }; + green@1 { + label = "green"; + reg = <1>; + linux,default-trigger = "none"; + }; + blue@2 { + label = "blue"; + reg = <2>; + linux,default-trigger = "none"; + }; + unused@3 { + label = "unused"; + reg = <3>; + linux,default-trigger = "none"; + }; +}; diff --git a/Documentation/devicetree/bindings/pci/ralink,rt3883-pci.txt b/Documentation/devicetree/bindings/pci/ralink,rt3883-pci.txt new file mode 100644 index 000000000000..8e0a1eb0acbb --- /dev/null +++ b/Documentation/devicetree/bindings/pci/ralink,rt3883-pci.txt @@ -0,0 +1,190 @@ +* Mediatek/Ralink RT3883 PCI controller + +1) Main node + + Required properties: + + - compatible: must be "ralink,rt3883-pci" + + - reg: specifies the physical base address of the controller and + the length of the memory mapped region. + + - #address-cells: specifies the number of cells needed to encode an + address. The value must be 1. + + - #size-cells: specifies the number of cells used to represent the size + of an address. The value must be 1. + + - ranges: specifies the translation between child address space and parent + address space + + Optional properties: + + - status: indicates the operational status of the device. + Value must be either "disabled" or "okay". + +2) Child nodes + + The main node must have two child nodes which describes the built-in + interrupt controller and the PCI host bridge. + + a) Interrupt controller: + + Required properties: + + - interrupt-controller: identifies the node as an interrupt controller + + - #address-cells: specifies the number of cells needed to encode an + address. The value must be 0. As such, 'interrupt-map' nodes do not + have to specify a parent unit address. + + - #interrupt-cells: specifies the number of cells needed to encode an + interrupt source. The value must be 1. + + - interrupt-parent: the phandle for the interrupt controller that + services interrupts for this device. + + - interrupts: specifies the interrupt source of the parent interrupt + controller. The format of the interrupt specifier depends on the + parent interrupt controller. + + b) PCI host bridge: + + Required properties: + + - #address-cells: specifies the number of cells needed to encode an + address. The value must be 0. + + - #size-cells: specifies the number of cells used to represent the size + of an address. The value must be 2. + + - #interrupt-cells: specifies the number of cells needed to encode an + interrupt source. The value must be 1. + + - device_type: must be "pci" + + - bus-range: PCI bus numbers covered + + - ranges: specifies the ranges for the PCI memory and I/O regions + + - interrupt-map-mask, + - interrupt-map: standard PCI properties to define the mapping of the + PCI interface to interrupt numbers. + + The PCI host bridge node migh have additional sub-nodes representing + the onboard PCI devices/PCI slots. Each such sub-node must have the + following mandatory properties: + + - reg: used only for interrupt mapping, so only the first four bytes + are used to refer to the correct bus number and device number. + + - device_type: must be "pci" + + If a given sub-node represents a PCI bridge it must have following + mandatory properties as well: + + - #address-cells: must be set to <3> + + - #size-cells: must set to <2> + + - #interrupt-cells: must be set to <1> + + - interrupt-map-mask, + - interrupt-map: standard PCI properties to define the mapping of the + PCI interface to interrupt numbers. + + Besides the required properties the sub-nodes may have these optional + properties: + + - status: indicates the operational status of the sub-node. + Value must be either "disabled" or "okay". + +3) Example: + + a) SoC specific dtsi file: + + pci@10140000 { + compatible = "ralink,rt3883-pci"; + reg = <0x10140000 0x20000>; + #address-cells = <1>; + #size-cells = <1>; + ranges; /* direct mapping */ + + status = "disabled"; + + pciintc: interrupt-controller { + interrupt-controller; + #address-cells = <0>; + #interrupt-cells = <1>; + + interrupt-parent = <&cpuintc>; + interrupts = <4>; + }; + + host-bridge { + #address-cells = <3>; + #size-cells = <2>; + #interrupt-cells = <1>; + + device_type = "pci"; + + bus-range = <0 255>; + ranges = < + 0x02000000 0 0x00000000 0x20000000 0 0x10000000 /* pci memory */ + 0x01000000 0 0x00000000 0x10160000 0 0x00010000 /* io space */ + >; + + interrupt-map-mask = <0xf800 0 0 7>; + interrupt-map = < + /* IDSEL 17 */ + 0x8800 0 0 1 &pciintc 18 + 0x8800 0 0 2 &pciintc 18 + 0x8800 0 0 3 &pciintc 18 + 0x8800 0 0 4 &pciintc 18 + /* IDSEL 18 */ + 0x9000 0 0 1 &pciintc 19 + 0x9000 0 0 2 &pciintc 19 + 0x9000 0 0 3 &pciintc 19 + 0x9000 0 0 4 &pciintc 19 + >; + + pci-bridge@1 { + reg = <0x0800 0 0 0 0>; + device_type = "pci"; + #interrupt-cells = <1>; + #address-cells = <3>; + #size-cells = <2>; + + interrupt-map-mask = <0x0 0 0 0>; + interrupt-map = <0x0 0 0 0 &pciintc 20>; + + status = "disabled"; + }; + + pci-slot@17 { + reg = <0x8800 0 0 0 0>; + device_type = "pci"; + + status = "disabled"; + }; + + pci-slot@18 { + reg = <0x9000 0 0 0 0>; + device_type = "pci"; + + status = "disabled"; + }; + }; + }; + + b) Board specific dts file: + + pci@10140000 { + status = "okay"; + + host-bridge { + pci-bridge@1 { + status = "okay"; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/sound/mvebu-audio.txt b/Documentation/devicetree/bindings/sound/mvebu-audio.txt index 7e5fd37c1b3f..f0062c5871b4 100644 --- a/Documentation/devicetree/bindings/sound/mvebu-audio.txt +++ b/Documentation/devicetree/bindings/sound/mvebu-audio.txt @@ -2,13 +2,17 @@ Required properties: -- compatible: "marvell,mvebu-audio" +- compatible: + "marvell,kirkwood-audio" for Kirkwood platforms + "marvell,dove-audio" for Dove platforms - reg: physical base address of the controller and length of memory mapped region. -- interrupts: list of two irq numbers. - The first irq is used for data flow and the second one is used for errors. +- interrupts: + with "marvell,kirkwood-audio", the audio interrupt + with "marvell,dove-audio", a list of two interrupts, the first for + the data flow, and the second for errors. - clocks: one or two phandles. The first one is mandatory and defines the internal clock. @@ -21,7 +25,7 @@ Required properties: Example: i2s1: audio-controller@b4000 { - compatible = "marvell,mvebu-audio"; + compatible = "marvell,dove-audio"; reg = <0xb4000 0x2210>; interrupts = <21>, <22>; clocks = <&gate_clk 13>; diff --git a/Documentation/devicetree/bindings/thermal/exynos-thermal.txt b/Documentation/devicetree/bindings/thermal/exynos-thermal.txt new file mode 100644 index 000000000000..284f5300fd8b --- /dev/null +++ b/Documentation/devicetree/bindings/thermal/exynos-thermal.txt @@ -0,0 +1,55 @@ +* Exynos Thermal Management Unit (TMU) + +** Required properties: + +- compatible : One of the following: + "samsung,exynos4412-tmu" + "samsung,exynos4210-tmu" + "samsung,exynos5250-tmu" + "samsung,exynos5440-tmu" +- interrupt-parent : The phandle for the interrupt controller +- reg : Address range of the thermal registers. For soc's which has multiple + instances of TMU and some registers are shared across all TMU's like + interrupt related then 2 set of register has to supplied. First set + belongs to each instance of TMU and second set belongs to common TMU + registers. +- interrupts : Should contain interrupt for thermal system +- clocks : The main clock for TMU device +- clock-names : Thermal system clock name +- vtmu-supply: This entry is optional and provides the regulator node supplying + voltage to TMU. If needed this entry can be placed inside + board/platform specific dts file. + +Example 1): + + tmu@100C0000 { + compatible = "samsung,exynos4412-tmu"; + interrupt-parent = <&combiner>; + reg = <0x100C0000 0x100>; + interrupts = <2 4>; + clocks = <&clock 383>; + clock-names = "tmu_apbif"; + status = "disabled"; + vtmu-supply = <&tmu_regulator_node>; + }; + +Example 2): + + tmuctrl_0: tmuctrl@160118 { + compatible = "samsung,exynos5440-tmu"; + reg = <0x160118 0x230>, <0x160368 0x10>; + interrupts = <0 58 0>; + clocks = <&clock 21>; + clock-names = "tmu_apbif"; + }; + +Note: For multi-instance tmu each instance should have an alias correctly +numbered in "aliases" node. + +Example: + +aliases { + tmuctrl0 = &tmuctrl_0; + tmuctrl1 = &tmuctrl_1; + tmuctrl2 = &tmuctrl_2; +}; diff --git a/Documentation/devicetree/bindings/thermal/imx-thermal.txt b/Documentation/devicetree/bindings/thermal/imx-thermal.txt new file mode 100644 index 000000000000..541c25e49abf --- /dev/null +++ b/Documentation/devicetree/bindings/thermal/imx-thermal.txt @@ -0,0 +1,17 @@ +* Temperature Monitor (TEMPMON) on Freescale i.MX SoCs + +Required properties: +- compatible : "fsl,imx6q-thermal" +- fsl,tempmon : phandle pointer to system controller that contains TEMPMON + control registers, e.g. ANATOP on imx6q. +- fsl,tempmon-data : phandle pointer to fuse controller that contains TEMPMON + calibration data, e.g. OCOTP on imx6q. The details about calibration data + can be found in SoC Reference Manual. + +Example: + +tempmon { + compatible = "fsl,imx6q-tempmon"; + fsl,tempmon = <&anatop>; + fsl,tempmon-data = <&ocotp>; +}; diff --git a/Documentation/devicetree/bindings/timer/marvell,armada-370-xp-timer.txt b/Documentation/devicetree/bindings/timer/marvell,armada-370-xp-timer.txt index 36381129d141..f455182b1086 100644 --- a/Documentation/devicetree/bindings/timer/marvell,armada-370-xp-timer.txt +++ b/Documentation/devicetree/bindings/timer/marvell,armada-370-xp-timer.txt @@ -2,14 +2,40 @@ Marvell Armada 370 and Armada XP Timers --------------------------------------- Required properties: -- compatible: Should be "marvell,armada-370-xp-timer" +- compatible: Should be either "marvell,armada-370-timer" or + "marvell,armada-xp-timer" as appropriate. - interrupts: Should contain the list of Global Timer interrupts and then local timer interrupts - reg: Should contain location and length for timers register. First pair for the Global Timer registers, second pair for the local/private timers. -- clocks: clock driving the timer hardware -Optional properties: -- marvell,timer-25Mhz: Tells whether the Global timer supports the 25 - Mhz fixed mode (available on Armada XP and not on Armada 370) +Clocks required for compatible = "marvell,armada-370-timer": +- clocks : Must contain a single entry describing the clock input + +Clocks required for compatible = "marvell,armada-xp-timer": +- clocks : Must contain an entry for each entry in clock-names. +- clock-names : Must include the following entries: + "nbclk" (L2/coherency fabric clock), + "fixed" (Reference 25 MHz fixed-clock). + +Examples: + +- Armada 370: + + timer { + compatible = "marvell,armada-370-timer"; + reg = <0x20300 0x30>, <0x21040 0x30>; + interrupts = <37>, <38>, <39>, <40>, <5>, <6>; + clocks = <&coreclk 2>; + }; + +- Armada XP: + + timer { + compatible = "marvell,armada-xp-timer"; + reg = <0x20300 0x30>, <0x21040 0x30>; + interrupts = <37>, <38>, <39>, <40>, <5>, <6>; + clocks = <&coreclk 2>, <&refclk>; + clock-names = "nbclk", "fixed"; + }; diff --git a/Documentation/filesystems/cifs.txt b/Documentation/filesystems/cifs.txt deleted file mode 100644 index 49cc923a93e3..000000000000 --- a/Documentation/filesystems/cifs.txt +++ /dev/null @@ -1,51 +0,0 @@ - This is the client VFS module for the Common Internet File System - (CIFS) protocol which is the successor to the Server Message Block - (SMB) protocol, the native file sharing mechanism for most early - PC operating systems. CIFS is fully supported by current network - file servers such as Windows 2000, Windows 2003 (including - Windows XP) as well by Samba (which provides excellent CIFS - server support for Linux and many other operating systems), so - this network filesystem client can mount to a wide variety of - servers. The smbfs module should be used instead of this cifs module - for mounting to older SMB servers such as OS/2. The smbfs and cifs - modules can coexist and do not conflict. The CIFS VFS filesystem - module is designed to work well with servers that implement the - newer versions (dialects) of the SMB/CIFS protocol such as Samba, - the program written by Andrew Tridgell that turns any Unix host - into a SMB/CIFS file server. - - The intent of this module is to provide the most advanced network - file system function for CIFS compliant servers, including better - POSIX compliance, secure per-user session establishment, high - performance safe distributed caching (oplock), optional packet - signing, large files, Unicode support and other internationalization - improvements. Since both Samba server and this filesystem client support - the CIFS Unix extensions, the combination can provide a reasonable - alternative to NFSv4 for fileserving in some Linux to Linux environments, - not just in Linux to Windows environments. - - This filesystem has an optional mount utility (mount.cifs) that can - be obtained from the project page and installed in the path in the same - directory with the other mount helpers (such as mount.smbfs). - Mounting using the cifs filesystem without installing the mount helper - requires specifying the server's ip address. - - For Linux 2.4: - mount //anything/here /mnt_target -o - user=username,pass=password,unc=//ip_address_of_server/sharename - - For Linux 2.5: - mount //ip_address_of_server/sharename /mnt_target -o user=username, pass=password - - - For more information on the module see the project page at - - http://us1.samba.org/samba/Linux_CIFS_client.html - - For more information on CIFS see: - - http://www.snia.org/tech_activities/CIFS - - or the Samba site: - - http://www.samba.org diff --git a/fs/cifs/AUTHORS b/Documentation/filesystems/cifs/AUTHORS index ea940b1db77b..ca4a67a0bb1e 100644 --- a/fs/cifs/AUTHORS +++ b/Documentation/filesystems/cifs/AUTHORS @@ -39,6 +39,7 @@ Shaggy (Dave Kleikamp) for innumerable small fs suggestions and some good cleanu Gunter Kukkukk (testing and suggestions for support of old servers) Igor Mammedov (DFS support) Jeff Layton (many, many fixes, as well as great work on the cifs Kerberos code) +Scott Lovenberg Test case and Bug Report contributors ------------------------------------- diff --git a/fs/cifs/CHANGES b/Documentation/filesystems/cifs/CHANGES index bc0025cdd1c9..bc0025cdd1c9 100644 --- a/fs/cifs/CHANGES +++ b/Documentation/filesystems/cifs/CHANGES diff --git a/fs/cifs/README b/Documentation/filesystems/cifs/README index 2d5622f60e11..2d5622f60e11 100644 --- a/fs/cifs/README +++ b/Documentation/filesystems/cifs/README diff --git a/fs/cifs/TODO b/Documentation/filesystems/cifs/TODO index 355abcdcda98..355abcdcda98 100644 --- a/fs/cifs/TODO +++ b/Documentation/filesystems/cifs/TODO diff --git a/Documentation/filesystems/cifs/cifs.txt b/Documentation/filesystems/cifs/cifs.txt new file mode 100644 index 000000000000..2fac91ac96cf --- /dev/null +++ b/Documentation/filesystems/cifs/cifs.txt @@ -0,0 +1,31 @@ + This is the client VFS module for the Common Internet File System + (CIFS) protocol which is the successor to the Server Message Block + (SMB) protocol, the native file sharing mechanism for most early + PC operating systems. New and improved versions of CIFS are now + called SMB2 and SMB3. These dialects are also supported by the + CIFS VFS module. CIFS is fully supported by network + file servers such as Windows 2000, 2003, 2008 and 2012 + as well by Samba (which provides excellent CIFS + server support for Linux and many other operating systems), so + this network filesystem client can mount to a wide variety of + servers. + + The intent of this module is to provide the most advanced network + file system function for CIFS compliant servers, including better + POSIX compliance, secure per-user session establishment, high + performance safe distributed caching (oplock), optional packet + signing, large files, Unicode support and other internationalization + improvements. Since both Samba server and this filesystem client support + the CIFS Unix extensions, the combination can provide a reasonable + alternative to NFSv4 for fileserving in some Linux to Linux environments, + not just in Linux to Windows environments. + + This filesystem has an mount utility (mount.cifs) that can be obtained from + + https://ftp.samba.org/pub/linux-cifs/cifs-utils/ + + It must be installed in the directory with the other mount helpers. + + For more information on the module see the project wiki page at + + https://wiki.samba.org/index.php/LinuxCIFS_utils diff --git a/Documentation/filesystems/cifs/winucase_convert.pl b/Documentation/filesystems/cifs/winucase_convert.pl new file mode 100755 index 000000000000..322a9c833f23 --- /dev/null +++ b/Documentation/filesystems/cifs/winucase_convert.pl @@ -0,0 +1,62 @@ +#!/usr/bin/perl -w +# +# winucase_convert.pl -- convert "Windows 8 Upper Case Mapping Table.txt" to +# a two-level set of C arrays. +# +# Copyright 2013: Jeff Layton <jlayton@redhat.com> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +while(<>) { + next if (!/^0x(..)(..)\t0x(....)\t/); + $firstchar = hex($1); + $secondchar = hex($2); + $uppercase = hex($3); + + $top[$firstchar][$secondchar] = $uppercase; +} + +for ($i = 0; $i < 256; $i++) { + next if (!$top[$i]); + + printf("static const wchar_t t2_%2.2x[256] = {", $i); + for ($j = 0; $j < 256; $j++) { + if (($j % 8) == 0) { + print "\n\t"; + } else { + print " "; + } + printf("0x%4.4x,", $top[$i][$j] ? $top[$i][$j] : 0); + } + print "\n};\n\n"; +} + +printf("static const wchar_t *const toplevel[256] = {", $i); +for ($i = 0; $i < 256; $i++) { + if (($i % 8) == 0) { + print "\n\t"; + } elsif ($top[$i]) { + print " "; + } else { + print " "; + } + + if ($top[$i]) { + printf("t2_%2.2x,", $i); + } else { + print "NULL,"; + } +} +print "\n};\n\n"; diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 206a1bdc7321..f0890581f7f6 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -451,3 +451,7 @@ in your dentry operations instead. -- [mandatory] ->readdir() is gone now; switch to ->iterate() +[mandatory] + vfs_follow_link has been removed. Filesystems must use nd_set_link + from ->follow_link for normal symlinks, or nd_jump_link for magic + /proc/<pid> style links. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index f93a88250a44..deb48b5fd883 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -359,11 +359,9 @@ struct inode_operations { ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); void (*update_time)(struct inode *, struct timespec *, int); - int (*atomic_open)(struct inode *, struct dentry *, + int (*atomic_open)(struct inode *, struct dentry *, struct file *, + unsigned open_flag, umode_t create_mode, int *opened); int (*tmpfile) (struct inode *, struct dentry *, umode_t); -} ____cacheline_aligned; - struct file *, unsigned open_flag, - umode_t create_mode, int *opened); }; Again, all methods are called without any locks being held, unless @@ -470,9 +468,11 @@ otherwise noted. method the filesystem can look up, possibly create and open the file in one atomic operation. If it cannot perform this (e.g. the file type turned out to be wrong) it may signal this by returning 1 instead of - usual 0 or -ve . This method is only called if the last - component is negative or needs lookup. Cached positive dentries are - still handled by f_op->open(). + usual 0 or -ve . This method is only called if the last component is + negative or needs lookup. Cached positive dentries are still handled by + f_op->open(). If the file was created, the FILE_CREATED flag should be + set in "opened". In case of O_EXCL the method must only succeed if the + file didn't exist and hence FILE_CREATED shall always be set on success. tmpfile: called in the end of O_TMPFILE open(). Optional, equivalent to atomically creating, opening and unlinking a file in given directory. diff --git a/Documentation/leds/leds-lp5521.txt b/Documentation/leds/leds-lp5521.txt index 79e4c2e6e5e8..d08d8c179f85 100644 --- a/Documentation/leds/leds-lp5521.txt +++ b/Documentation/leds/leds-lp5521.txt @@ -18,7 +18,25 @@ All three channels can be also controlled using the engine micro programs. More details of the instructions can be found from the public data sheet. LP5521 has the internal program memory for running various LED patterns. -For the details, please refer to 'firmware' section in leds-lp55xx.txt +There are two ways to run LED patterns. + +1) Legacy interface - enginex_mode and enginex_load + Control interface for the engines: + x is 1 .. 3 + enginex_mode : disabled, load, run + enginex_load : store program (visible only in engine load mode) + + Example (start to blink the channel 2 led): + cd /sys/class/leds/lp5521:channel2/device + echo "load" > engine3_mode + echo "037f4d0003ff6000" > engine3_load + echo "run" > engine3_mode + + To stop the engine: + echo "disabled" > engine3_mode + +2) Firmware interface - LP55xx common interface + For the details, please refer to 'firmware' section in leds-lp55xx.txt sysfs contains a selftest entry. The test communicates with the chip and checks that diff --git a/Documentation/leds/leds-lp5523.txt b/Documentation/leds/leds-lp5523.txt index 899fdad509fe..5b3e91d4ac59 100644 --- a/Documentation/leds/leds-lp5523.txt +++ b/Documentation/leds/leds-lp5523.txt @@ -28,7 +28,26 @@ If both fields are NULL, 'lp5523' is used by default. /sys/class/leds/lp5523:channelN (N: 0 ~ 8) LP5523 has the internal program memory for running various LED patterns. -For the details, please refer to 'firmware' section in leds-lp55xx.txt +There are two ways to run LED patterns. + +1) Legacy interface - enginex_mode, enginex_load and enginex_leds + Control interface for the engines: + x is 1 .. 3 + enginex_mode : disabled, load, run + enginex_load : microcode load (visible only in load mode) + enginex_leds : led mux control (visible only in load mode) + + cd /sys/class/leds/lp5523:channel2/device + echo "load" > engine3_mode + echo "9d80400004ff05ff437f0000" > engine3_load + echo "111111111" > engine3_leds + echo "run" > engine3_mode + + To stop the engine: + echo "disabled" > engine3_mode + +2) Firmware interface - LP55xx common interface + For the details, please refer to 'firmware' section in leds-lp55xx.txt Selftest uses always the current from the platform data. diff --git a/Documentation/leds/leds-lp55xx.txt b/Documentation/leds/leds-lp55xx.txt index eec8fa2ffe4e..82713ff92eb3 100644 --- a/Documentation/leds/leds-lp55xx.txt +++ b/Documentation/leds/leds-lp55xx.txt @@ -1,11 +1,11 @@ -LP5521/LP5523/LP55231 Common Driver -=================================== +LP5521/LP5523/LP55231/LP5562/LP8501 Common Driver +================================================= Authors: Milo(Woogyom) Kim <milo.kim@ti.com> Description ----------- -LP5521, LP5523/55231 and LP5562 have common features as below. +LP5521, LP5523/55231, LP5562 and LP8501 have common features as below. Register access via the I2C Device initialization/deinitialization @@ -109,6 +109,30 @@ As soon as 'loading' is set to 0, registered callback is called. Inside the callback, the selected engine is loaded and memory is updated. To run programmed pattern, 'run_engine' attribute should be enabled. +The pattern sqeuence of LP8501 is same as LP5523. +However pattern data is specific. +Ex 1) Engine 1 is used +echo 1 > /sys/bus/i2c/devices/xxxx/select_engine +echo 1 > /sys/class/firmware/lp8501/loading +echo "9d0140ff7e0040007e00a001c000" > /sys/class/firmware/lp8501/data +echo 0 > /sys/class/firmware/lp8501/loading +echo 1 > /sys/bus/i2c/devices/xxxx/run_engine + +Ex 2) Engine 2 and 3 are used at the same time +echo 2 > /sys/bus/i2c/devices/xxxx/select_engine +sleep 1 +echo 1 > /sys/class/firmware/lp8501/loading +echo "9d0140ff7e0040007e00a001c000" > /sys/class/firmware/lp8501/data +echo 0 > /sys/class/firmware/lp8501/loading +sleep 1 +echo 3 > /sys/bus/i2c/devices/xxxx/select_engine +sleep 1 +echo 1 > /sys/class/firmware/lp8501/loading +echo "9d0340ff7e0040007e00a001c000" > /sys/class/firmware/lp8501/data +echo 0 > /sys/class/firmware/lp8501/loading +sleep 1 +echo 1 > /sys/class/leds/d1/device/run_engine + ( 'run_engine' and 'firmware_cb' ) The sequence of running the program data is common. But each device has own specific register addresses for commands. diff --git a/Documentation/scheduler/sched-design-CFS.txt b/Documentation/scheduler/sched-design-CFS.txt index d529e02d928d..f14f49304222 100644 --- a/Documentation/scheduler/sched-design-CFS.txt +++ b/Documentation/scheduler/sched-design-CFS.txt @@ -66,9 +66,7 @@ rq->cfs.load value, which is the sum of the weights of the tasks queued on the runqueue. CFS maintains a time-ordered rbtree, where all runnable tasks are sorted by the -p->se.vruntime key (there is a subtraction using rq->cfs.min_vruntime to -account for possible wraparounds). CFS picks the "leftmost" task from this -tree and sticks to it. +p->se.vruntime key. CFS picks the "leftmost" task from this tree and sticks to it. As the system progresses forwards, the executed tasks are put into the tree more and more to the right --- slowly but surely giving a chance for every task to become the "leftmost task" and thus get on the CPU within a deterministic diff --git a/Documentation/scsi/ChangeLog.megaraid_sas b/Documentation/scsi/ChangeLog.megaraid_sas index cc92ca8c8963..6edaa65b0818 100644 --- a/Documentation/scsi/ChangeLog.megaraid_sas +++ b/Documentation/scsi/ChangeLog.megaraid_sas @@ -1,3 +1,13 @@ +Release Date : Sat. Aug 31, 2013 17:00:00 PST 2013 - + (emaild-id:megaraidlinux@lsi.com) + Adam Radford + Kashyap Desai + Sumit Saxena +Current Version : 06.700.06.00-rc1 +Old Version : 06.600.18.00-rc1 + 1. Add High Availability clustering support using shared Logical Disks. + 2. Version and Changelog update. +------------------------------------------------------------------------------- Release Date : Wed. May 15, 2013 17:00:00 PST 2013 - (emaild-id:megaraidlinux@lsi.com) Adam Radford diff --git a/Documentation/thermal/exynos_thermal b/Documentation/thermal/exynos_thermal index 2b46f67b1ccb..9010c4416967 100644 --- a/Documentation/thermal/exynos_thermal +++ b/Documentation/thermal/exynos_thermal @@ -1,17 +1,17 @@ -Kernel driver exynos4_tmu +Kernel driver exynos_tmu ================= Supported chips: -* ARM SAMSUNG EXYNOS4 series of SoC - Prefix: 'exynos4-tmu' +* ARM SAMSUNG EXYNOS4, EXYNOS5 series of SoC Datasheet: Not publicly available Authors: Donggeun Kim <dg77.kim@samsung.com> +Authors: Amit Daniel <amit.daniel@samsung.com> -Description ------------ +TMU controller Description: +--------------------------- -This driver allows to read temperature inside SAMSUNG EXYNOS4 series of SoC. +This driver allows to read temperature inside SAMSUNG EXYNOS4/5 series of SoC. The chip only exposes the measured 8-bit temperature code value through a register. @@ -34,9 +34,9 @@ The three equations are: TI2: Trimming info for 85 degree Celsius (stored at TRIMINFO register) Temperature code measured at 85 degree Celsius which is unchanged -TMU(Thermal Management Unit) in EXYNOS4 generates interrupt +TMU(Thermal Management Unit) in EXYNOS4/5 generates interrupt when temperature exceeds pre-defined levels. -The maximum number of configurable threshold is four. +The maximum number of configurable threshold is five. The threshold levels are defined as follows: Level_0: current temperature > trigger_level_0 + threshold Level_1: current temperature > trigger_level_1 + threshold @@ -47,6 +47,31 @@ The threshold levels are defined as follows: through the corresponding registers. When an interrupt occurs, this driver notify kernel thermal framework -with the function exynos4_report_trigger. +with the function exynos_report_trigger. Although an interrupt condition for level_0 can be set, it can be used to synchronize the cooling action. + +TMU driver description: +----------------------- + +The exynos thermal driver is structured as, + + Kernel Core thermal framework + (thermal_core.c, step_wise.c, cpu_cooling.c) + ^ + | + | +TMU configuration data -------> TMU Driver <------> Exynos Core thermal wrapper +(exynos_tmu_data.c) (exynos_tmu.c) (exynos_thermal_common.c) +(exynos_tmu_data.h) (exynos_tmu.h) (exynos_thermal_common.h) + +a) TMU configuration data: This consist of TMU register offsets/bitfields + described through structure exynos_tmu_registers. Also several + other platform data (struct exynos_tmu_platform_data) members + are used to configure the TMU. +b) TMU driver: This component initialises the TMU controller and sets different + thresholds. It invokes core thermal implementation with the call + exynos_report_trigger. +c) Exynos Core thermal wrapper: This provides 3 wrapper function to use the + Kernel core thermal framework. They are exynos_unregister_thermal, + exynos_register_thermal and exynos_report_trigger. diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt index a71bd5b90fe8..87519cb379ee 100644 --- a/Documentation/thermal/sysfs-api.txt +++ b/Documentation/thermal/sysfs-api.txt @@ -134,6 +134,13 @@ temperature) and throttle appropriate devices. this thermal zone and cdev, for a particular trip point. If nth bit is set, then the cdev and thermal zone are bound for trip point n. + .limits: This is an array of cooling state limits. Must have exactly + 2 * thermal_zone.number_of_trip_points. It is an array consisting + of tuples <lower-state upper-state> of state limits. Each trip + will be associated with one state limit tuple when binding. + A NULL pointer means <THERMAL_NO_LIMITS THERMAL_NO_LIMITS> + on all trips. These limits are used when binding a cdev to a + trip point. .match: This call back returns success(0) if the 'tz and cdev' need to be bound, as per platform data. 1.4.2 struct thermal_zone_params @@ -142,6 +149,11 @@ temperature) and throttle appropriate devices. This is an optional feature where some platforms can choose not to provide this data. .governor_name: Name of the thermal governor used for this zone + .no_hwmon: a boolean to indicate if the thermal to hwmon sysfs interface + is required. when no_hwmon == false, a hwmon sysfs interface + will be created. when no_hwmon == true, nothing will be done. + In case the thermal_zone_params is NULL, the hwmon interface + will be created (for backward compatibility). .num_tbps: Number of thermal_bind_params entries for this zone .tbp: thermal_bind_params entries @@ -1,8 +1,8 @@ VERSION = 3 -PATCHLEVEL = 11 +PATCHLEVEL = 12 SUBLEVEL = 0 -EXTRAVERSION = -NAME = Suicidal Squirrel +EXTRAVERSION = -rc1 +NAME = One Giant Leap for Frogkind # *DOCUMENTATION* # To see a list of typical targets execute "make help" diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 082d9b4b5472..35a300d4a9fb 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -7,7 +7,6 @@ config ALPHA select HAVE_PCSPKR_PLATFORM select HAVE_PERF_EVENTS select HAVE_DMA_ATTRS - select HAVE_GENERIC_HARDIRQS select VIRT_TO_BUS select GENERIC_IRQ_PROBE select AUTO_IRQ_AFFINITY if SMP diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index 0c4132dd3507..98838a05ba6d 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -89,8 +89,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr, const struct exception_table_entry *fixup; int fault, si_code = SEGV_MAPERR; siginfo_t info; - unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (cause > 0 ? FAULT_FLAG_WRITE : 0)); + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; /* As of EV6, a load into $31/$f31 is a prefetch, and never faults (or is suppressed by the PALcode). Support that for older CPUs @@ -115,7 +114,8 @@ do_page_fault(unsigned long address, unsigned long mmcsr, if (address >= TASK_SIZE) goto vmalloc_fault; #endif - + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); @@ -142,6 +142,7 @@ retry: } else { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; + flags |= FAULT_FLAG_WRITE; } /* If for any reason at all we couldn't handle the fault, diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 68fcbb2d59e2..91dbb2757afd 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -20,7 +20,6 @@ config ARC select GENERIC_SMP_IDLE_THREAD select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK - select HAVE_GENERIC_HARDIRQS select HAVE_IOREMAP_PROT select HAVE_KPROBES select HAVE_KRETPROBES diff --git a/arch/arc/include/asm/sections.h b/arch/arc/include/asm/sections.h index 6fc1159dfefe..764f1e3ba752 100644 --- a/arch/arc/include/asm/sections.h +++ b/arch/arc/include/asm/sections.h @@ -11,7 +11,6 @@ #include <asm-generic/sections.h> -extern char _int_vec_base_lds[]; extern char __arc_dccm_base[]; extern char __dtb_start[]; diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 2a913f85a747..0f944f024513 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -34,6 +34,9 @@ stext: ; IDENTITY Reg [ 3 2 1 0 ] ; (cpu-id) ^^^ => Zero for UP ARC700 ; => #Core-ID if SMP (Master 0) + ; Note that non-boot CPUs might not land here if halt-on-reset and + ; instead breath life from @first_lines_of_secondary, but we still + ; need to make sure only boot cpu takes this path. GET_CPU_ID r5 cmp r5, 0 jnz arc_platform_smp_wait_to_boot @@ -98,6 +101,8 @@ stext: first_lines_of_secondary: + sr @_int_vec_base_lds, [AUX_INTR_VEC_BASE] + ; setup per-cpu idle task as "current" on this CPU ld r0, [@secondary_idle_tsk] SET_CURR_TASK_ON_CPU r0, r1 diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c index 305b3f866aa7..5fc92455da36 100644 --- a/arch/arc/kernel/irq.c +++ b/arch/arc/kernel/irq.c @@ -24,7 +24,6 @@ * -Needed for each CPU (hence not foldable into init_IRQ) * * what it does ? - * -setup Vector Table Base Reg - in case Linux not linked at 0x8000_0000 * -Disable all IRQs (on CPU side) * -Optionally, setup the High priority Interrupts as Level 2 IRQs */ diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index b011f8c164a1..2c68bc7e6a78 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -47,10 +47,7 @@ void read_arc_build_cfg_regs(void) READ_BCR(AUX_IDENTITY, cpu->core); cpu->timers = read_aux_reg(ARC_REG_TIMERS_BCR); - cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE); - if (cpu->vec_base == 0) - cpu->vec_base = (unsigned int)_int_vec_base_lds; READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space); cpu->uncached_base = uncached_space.start << 24; diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index 0fd1f0d515ff..d63f3de0cd5b 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -60,8 +60,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address) siginfo_t info; int fault, ret; int write = regs->ecr_cause & ECR_C_PROTV_STORE; /* ST/EX */ - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (write ? FAULT_FLAG_WRITE : 0); + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; /* * We fault-in kernel-space virtual memory on-demand. The @@ -89,6 +88,8 @@ void do_page_fault(struct pt_regs *regs, unsigned long address) if (in_atomic() || !mm) goto no_context; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); @@ -117,12 +118,12 @@ good_area: if (write) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; + flags |= FAULT_FLAG_WRITE; } else { if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; } -survive: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo @@ -201,10 +202,6 @@ no_context: die("Oops", regs, address); out_of_memory: - if (is_global_init(tsk)) { - yield(); - goto survive; - } up_read(&mm->mmap_sem); if (user_mode(regs)) { diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index c8a916fcd54b..3f7714d8d2d2 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -3,20 +3,21 @@ config ARM default y select ARCH_BINFMT_ELF_RANDOMIZE_PIE select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE - select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST + select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT if MMU + select CLONE_BACKWARDS select CPU_PM if (SUSPEND || CPU_IDLE) select DCACHE_WORD_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && !CPU_BIG_ENDIAN && MMU select GENERIC_ATOMIC64 if (CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI) select GENERIC_CLOCKEVENTS_BROADCAST if SMP + select GENERIC_IDLE_POLL_SETUP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_PCI_IOMAP select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD - select GENERIC_IDLE_POLL_SETUP select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select HARDIRQS_SW_RESEND @@ -25,6 +26,7 @@ config ARM select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_BPF_JIT + select HAVE_CONTEXT_TRACKING select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_API_DEBUG @@ -35,7 +37,6 @@ config ARM select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL) select HAVE_FUNCTION_TRACER if (!XIP_KERNEL) select HAVE_GENERIC_DMA_COHERENT - select HAVE_GENERIC_HARDIRQS select HAVE_HW_BREAKPOINT if (PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7)) select HAVE_IDE if PCI || ISA || PCMCIA select HAVE_IRQ_TIME_ACCOUNTING @@ -47,6 +48,7 @@ config ARM select HAVE_KPROBES if !XIP_KERNEL select HAVE_KRETPROBES if (HAVE_KPROBES) select HAVE_MEMBLOCK + select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND select HAVE_OPROFILE if (HAVE_PERF_EVENTS) select HAVE_PERF_EVENTS select HAVE_REGS_AND_STACK_ACCESS_API @@ -54,15 +56,14 @@ config ARM select HAVE_UID16 select IRQ_FORCED_THREADING select KTIME_SCALAR + select MODULES_USE_ELF_REL + select OLD_SIGACTION + select OLD_SIGSUSPEND3 select PERF_USE_VMALLOC select RTC_LIB select SYS_SUPPORTS_APM_EMULATION - select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND - select MODULES_USE_ELF_REL - select CLONE_BACKWARDS - select OLD_SIGSUSPEND3 - select OLD_SIGACTION - select HAVE_CONTEXT_TRACKING + # Above selects are sorted alphabetically; please add new ones + # according to that. Thanks. help The ARM series is a line of low-power-consumption RISC chip designs licensed by ARM Ltd and targeted at embedded applications and @@ -386,8 +387,8 @@ config ARCH_GEMINI bool "Cortina Systems Gemini" select ARCH_REQUIRE_GPIOLIB select ARCH_USES_GETTIMEOFFSET - select NEED_MACH_GPIO_H select CPU_FA526 + select NEED_MACH_GPIO_H help Support for the Cortina Systems Gemini family SoCs @@ -487,8 +488,8 @@ config ARCH_IXP4XX select GENERIC_CLOCKEVENTS select MIGHT_HAVE_PCI select NEED_MACH_IO_H - select USB_EHCI_BIG_ENDIAN_MMIO select USB_EHCI_BIG_ENDIAN_DESC + select USB_EHCI_BIG_ENDIAN_MMIO help Support for Intel's IXP4XX (XScale) family of processors. @@ -498,11 +499,11 @@ config ARCH_DOVE select CPU_PJ4 select GENERIC_CLOCKEVENTS select MIGHT_HAVE_PCI + select MVEBU_MBUS select PINCTRL select PINCTRL_DOVE select PLAT_ORION_LEGACY select USB_ARCH_HAS_EHCI - select MVEBU_MBUS help Support for the Marvell Dove SoC 88AP510 @@ -512,12 +513,12 @@ config ARCH_KIRKWOOD select ARCH_REQUIRE_GPIOLIB select CPU_FEROCEON select GENERIC_CLOCKEVENTS + select MVEBU_MBUS select PCI select PCI_QUIRKS select PINCTRL select PINCTRL_KIRKWOOD select PLAT_ORION_LEGACY - select MVEBU_MBUS help Support for the following Marvell Kirkwood series SoCs: 88F6180, 88F6192 and 88F6281. @@ -527,9 +528,9 @@ config ARCH_MV78XX0 select ARCH_REQUIRE_GPIOLIB select CPU_FEROCEON select GENERIC_CLOCKEVENTS + select MVEBU_MBUS select PCI select PLAT_ORION_LEGACY - select MVEBU_MBUS help Support for the following Marvell MV78xx0 series SoCs: MV781x0, MV782x0. @@ -540,9 +541,9 @@ config ARCH_ORION5X select ARCH_REQUIRE_GPIOLIB select CPU_FEROCEON select GENERIC_CLOCKEVENTS + select MVEBU_MBUS select PCI select PLAT_ORION_LEGACY - select MVEBU_MBUS help Support for the following Marvell Orion 5x series SoCs: Orion-1 (5181), Orion-VoIP (5181L), Orion-NAS (5182), @@ -758,8 +759,8 @@ config ARCH_S5P64X0 select HAVE_S3C2410_WATCHDOG if WATCHDOG select HAVE_S3C_RTC if RTC_CLASS select NEED_MACH_GPIO_H - select SAMSUNG_WDT_RESET select SAMSUNG_ATAGS + select SAMSUNG_WDT_RESET help Samsung S5P64X0 CPU based systems, such as the Samsung SMDK6440, SMDK6450. @@ -777,8 +778,8 @@ config ARCH_S5PC100 select HAVE_S3C2410_WATCHDOG if WATCHDOG select HAVE_S3C_RTC if RTC_CLASS select NEED_MACH_GPIO_H - select SAMSUNG_WDT_RESET select SAMSUNG_ATAGS + select SAMSUNG_WDT_RESET help Samsung S5PC100 series based systems @@ -1619,9 +1620,10 @@ config HZ_FIXED ARCH_S5PV210 || ARCH_EXYNOS4 default AT91_TIMER_HZ if ARCH_AT91 default SHMOBILE_TIMER_HZ if ARCH_SHMOBILE + default 0 choice - depends on !HZ_FIXED + depends on HZ_FIXED = 0 prompt "Timer frequency" config HZ_100 @@ -1646,7 +1648,7 @@ endchoice config HZ int - default HZ_FIXED if HZ_FIXED + default HZ_FIXED if HZ_FIXED != 0 default 100 if HZ_100 default 200 if HZ_200 default 250 if HZ_250 diff --git a/arch/arm/boot/dts/sun7i-a20-cubieboard2.dts b/arch/arm/boot/dts/sun7i-a20-cubieboard2.dts index 31b76f08b3ad..15e625eca312 100644 --- a/arch/arm/boot/dts/sun7i-a20-cubieboard2.dts +++ b/arch/arm/boot/dts/sun7i-a20-cubieboard2.dts @@ -19,6 +19,21 @@ compatible = "cubietech,cubieboard2", "allwinner,sun7i-a20"; soc@01c00000 { + emac: ethernet@01c0b000 { + pinctrl-names = "default"; + pinctrl-0 = <&emac_pins_a>; + phy = <&phy1>; + status = "okay"; + }; + + mdio@01c0b080 { + status = "okay"; + + phy1: ethernet-phy@1 { + reg = <1>; + }; + }; + pinctrl@01c20800 { led_pins_cubieboard2: led_pins@0 { allwinner,pins = "PH20", "PH21"; diff --git a/arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts b/arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts index 34a6c02a7c72..9e778557fadb 100644 --- a/arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts +++ b/arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts @@ -19,6 +19,21 @@ compatible = "olimex,a20-olinuxino-micro", "allwinner,sun7i-a20"; soc@01c00000 { + emac: ethernet@01c0b000 { + pinctrl-names = "default"; + pinctrl-0 = <&emac_pins_a>; + phy = <&phy1>; + status = "okay"; + }; + + mdio@01c0b080 { + status = "okay"; + + phy1: ethernet-phy@1 { + reg = <1>; + }; + }; + pinctrl@01c20800 { led_pins_olinuxino: led_pins@0 { allwinner,pins = "PH2"; diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index 999ff45cb77e..80559cbdbc87 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -167,6 +167,22 @@ #size-cells = <1>; ranges; + emac: ethernet@01c0b000 { + compatible = "allwinner,sun4i-emac"; + reg = <0x01c0b000 0x1000>; + interrupts = <0 55 1>; + clocks = <&ahb_gates 17>; + status = "disabled"; + }; + + mdio@01c0b080 { + compatible = "allwinner,sun4i-mdio"; + reg = <0x01c0b080 0x14>; + status = "disabled"; + #address-cells = <1>; + #size-cells = <0>; + }; + pio: pinctrl@01c20800 { compatible = "allwinner,sun7i-a20-pinctrl"; reg = <0x01c20800 0x400>; @@ -198,6 +214,17 @@ allwinner,drive = <0>; allwinner,pull = <0>; }; + + emac_pins_a: emac0@0 { + allwinner,pins = "PA0", "PA1", "PA2", + "PA3", "PA4", "PA5", "PA6", + "PA7", "PA8", "PA9", "PA10", + "PA11", "PA12", "PA13", "PA14", + "PA15", "PA16"; + allwinner,function = "emac"; + allwinner,drive = <0>; + allwinner,pull = <0>; + }; }; timer@01c20c00 { diff --git a/arch/arm/common/timer-sp.c b/arch/arm/common/timer-sp.c index 023ee63827a2..e901d0f3e0bb 100644 --- a/arch/arm/common/timer-sp.c +++ b/arch/arm/common/timer-sp.c @@ -166,7 +166,8 @@ static int sp804_set_next_event(unsigned long next, } static struct clock_event_device sp804_clockevent = { - .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, + .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT | + CLOCK_EVT_FEAT_DYNIRQ, .set_mode = sp804_set_mode, .set_next_event = sp804_set_next_event, .rating = 300, diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index aaf3a8731136..bd454b09133e 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -49,5 +49,5 @@ $(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S ifeq ($(CONFIG_KERNEL_MODE_NEON),y) NEON_FLAGS := -mfloat-abi=softfp -mfpu=neon CFLAGS_xor-neon.o += $(NEON_FLAGS) - lib-$(CONFIG_XOR_BLOCKS) += xor-neon.o + obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o endif diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c index f485e5a2af4b..2c40aeab3eaa 100644 --- a/arch/arm/lib/xor-neon.c +++ b/arch/arm/lib/xor-neon.c @@ -9,6 +9,9 @@ */ #include <linux/raid/xor.h> +#include <linux/module.h> + +MODULE_LICENSE("GPL"); #ifndef __ARM_NEON__ #error You should compile this file with '-mfloat-abi=softfp -mfpu=neon' @@ -40,3 +43,4 @@ struct xor_block_template const xor_block_neon_inner = { .do_4 = xor_8regs_4, .do_5 = xor_8regs_5, }; +EXPORT_SYMBOL(xor_block_neon_inner); diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c index df8612fbbc9c..3f12b885c083 100644 --- a/arch/arm/mach-ep93xx/core.c +++ b/arch/arm/mach-ep93xx/core.c @@ -281,7 +281,7 @@ static AMBA_APB_DEVICE(uart1, "apb:uart1", 0x00041010, EP93XX_UART1_PHYS_BASE, { IRQ_EP93XX_UART1 }, &ep93xx_uart_data); static AMBA_APB_DEVICE(uart2, "apb:uart2", 0x00041010, EP93XX_UART2_PHYS_BASE, - { IRQ_EP93XX_UART2 }, &ep93xx_uart_data); + { IRQ_EP93XX_UART2 }, NULL); static AMBA_APB_DEVICE(uart3, "apb:uart3", 0x00041010, EP93XX_UART3_PHYS_BASE, { IRQ_EP93XX_UART3 }, &ep93xx_uart_data); diff --git a/arch/arm/mach-mvebu/armada-370-xp.c b/arch/arm/mach-mvebu/armada-370-xp.c index 829b57306328..e2acff98e750 100644 --- a/arch/arm/mach-mvebu/armada-370-xp.c +++ b/arch/arm/mach-mvebu/armada-370-xp.c @@ -18,7 +18,7 @@ #include <linux/of_address.h> #include <linux/of_platform.h> #include <linux/io.h> -#include <linux/time-armada-370-xp.h> +#include <linux/clocksource.h> #include <linux/dma-mapping.h> #include <linux/mbus.h> #include <asm/hardware/cache-l2x0.h> @@ -37,7 +37,7 @@ static void __init armada_370_xp_map_io(void) static void __init armada_370_xp_timer_and_clk_init(void) { of_clk_init(NULL); - armada_370_xp_timer_init(); + clocksource_of_init(); coherency_init(); BUG_ON(mvebu_mbus_dt_init()); #ifdef CONFIG_CACHE_L2X0 diff --git a/arch/arm/mach-shmobile/board-lager.c b/arch/arm/mach-shmobile/board-lager.c index 4872939cdba2..ffb6f0ac7606 100644 --- a/arch/arm/mach-shmobile/board-lager.c +++ b/arch/arm/mach-shmobile/board-lager.c @@ -96,7 +96,6 @@ static struct resource mmcif1_resources[] __initdata = { static struct sh_eth_plat_data ether_pdata __initdata = { .phy = 0x1, .edmac_endian = EDMAC_LITTLE_ENDIAN, - .register_type = SH_ETH_REG_FAST_RCAR, .phy_interface = PHY_INTERFACE_MODE_RMII, .ether_link_active_low = 1, }; diff --git a/arch/arm/mach-shmobile/setup-r8a7779.c b/arch/arm/mach-shmobile/setup-r8a7779.c index b5b2f787da2e..ecd0148ee1e1 100644 --- a/arch/arm/mach-shmobile/setup-r8a7779.c +++ b/arch/arm/mach-shmobile/setup-r8a7779.c @@ -691,8 +691,8 @@ void __init __weak r8a7779_register_twd(void) { } void __init r8a7779_earlytimer_init(void) { r8a7779_clock_init(); - shmobile_earlytimer_init(); r8a7779_register_twd(); + shmobile_earlytimer_init(); } void __init r8a7779_add_early_devices(void) diff --git a/arch/arm/mach-versatile/include/mach/platform.h b/arch/arm/mach-versatile/include/mach/platform.h index ec087407b163..6f938ccb0c54 100644 --- a/arch/arm/mach-versatile/include/mach/platform.h +++ b/arch/arm/mach-versatile/include/mach/platform.h @@ -231,12 +231,14 @@ /* PCI space */ #define VERSATILE_PCI_BASE 0x41000000 /* PCI Interface */ #define VERSATILE_PCI_CFG_BASE 0x42000000 +#define VERSATILE_PCI_IO_BASE 0x43000000 #define VERSATILE_PCI_MEM_BASE0 0x44000000 #define VERSATILE_PCI_MEM_BASE1 0x50000000 #define VERSATILE_PCI_MEM_BASE2 0x60000000 /* Sizes of above maps */ #define VERSATILE_PCI_BASE_SIZE 0x01000000 #define VERSATILE_PCI_CFG_BASE_SIZE 0x02000000 +#define VERSATILE_PCI_IO_BASE_SIZE 0x01000000 #define VERSATILE_PCI_MEM_BASE0_SIZE 0x0c000000 /* 32Mb */ #define VERSATILE_PCI_MEM_BASE1_SIZE 0x10000000 /* 256Mb */ #define VERSATILE_PCI_MEM_BASE2_SIZE 0x10000000 /* 256Mb */ diff --git a/arch/arm/mach-versatile/pci.c b/arch/arm/mach-versatile/pci.c index e92e5e0705bc..c97be4ea76d2 100644 --- a/arch/arm/mach-versatile/pci.c +++ b/arch/arm/mach-versatile/pci.c @@ -43,9 +43,9 @@ #define PCI_IMAP0 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x0) #define PCI_IMAP1 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x4) #define PCI_IMAP2 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x8) -#define PCI_SMAP0 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x10) -#define PCI_SMAP1 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x14) -#define PCI_SMAP2 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x18) +#define PCI_SMAP0 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x14) +#define PCI_SMAP1 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x18) +#define PCI_SMAP2 __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x1c) #define PCI_SELFID __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0xc) #define DEVICE_ID_OFFSET 0x00 @@ -170,8 +170,8 @@ static struct pci_ops pci_versatile_ops = { .write = versatile_write_config, }; -static struct resource io_mem = { - .name = "PCI I/O space", +static struct resource unused_mem = { + .name = "PCI unused", .start = VERSATILE_PCI_MEM_BASE0, .end = VERSATILE_PCI_MEM_BASE0+VERSATILE_PCI_MEM_BASE0_SIZE-1, .flags = IORESOURCE_MEM, @@ -195,9 +195,9 @@ static int __init pci_versatile_setup_resources(struct pci_sys_data *sys) { int ret = 0; - ret = request_resource(&iomem_resource, &io_mem); + ret = request_resource(&iomem_resource, &unused_mem); if (ret) { - printk(KERN_ERR "PCI: unable to allocate I/O " + printk(KERN_ERR "PCI: unable to allocate unused " "memory region (%d)\n", ret); goto out; } @@ -205,7 +205,7 @@ static int __init pci_versatile_setup_resources(struct pci_sys_data *sys) if (ret) { printk(KERN_ERR "PCI: unable to allocate non-prefetchable " "memory region (%d)\n", ret); - goto release_io_mem; + goto release_unused_mem; } ret = request_resource(&iomem_resource, &pre_mem); if (ret) { @@ -225,8 +225,8 @@ static int __init pci_versatile_setup_resources(struct pci_sys_data *sys) release_non_mem: release_resource(&non_mem); - release_io_mem: - release_resource(&io_mem); + release_unused_mem: + release_resource(&unused_mem); out: return ret; } @@ -246,7 +246,7 @@ int __init pci_versatile_setup(int nr, struct pci_sys_data *sys) goto out; } - ret = pci_ioremap_io(0, VERSATILE_PCI_MEM_BASE0); + ret = pci_ioremap_io(0, VERSATILE_PCI_IO_BASE); if (ret) goto out; @@ -295,6 +295,19 @@ int __init pci_versatile_setup(int nr, struct pci_sys_data *sys) __raw_writel(PHYS_OFFSET, local_pci_cfg_base + PCI_BASE_ADDRESS_2); /* + * For many years the kernel and QEMU were symbiotically buggy + * in that they both assumed the same broken IRQ mapping. + * QEMU therefore attempts to auto-detect old broken kernels + * so that they still work on newer QEMU as they did on old + * QEMU. Since we now use the correct (ie matching-hardware) + * IRQ mapping we write a definitely different value to a + * PCI_INTERRUPT_LINE register to tell QEMU that we expect + * real hardware behaviour and it need not be backwards + * compatible for us. This write is harmless on real hardware. + */ + __raw_writel(0, VERSATILE_PCI_VIRT_BASE+PCI_INTERRUPT_LINE); + + /* * Do not to map Versatile FPGA PCI device into memory space */ pci_slot_ignore |= (1 << myslot); @@ -327,13 +340,13 @@ static int __init versatile_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { int irq; - /* slot, pin, irq - * 24 1 IRQ_SIC_PCI0 - * 25 1 IRQ_SIC_PCI1 - * 26 1 IRQ_SIC_PCI2 - * 27 1 IRQ_SIC_PCI3 + /* + * Slot INTA INTB INTC INTD + * 31 PCI1 PCI2 PCI3 PCI0 + * 30 PCI0 PCI1 PCI2 PCI3 + * 29 PCI3 PCI0 PCI1 PCI2 */ - irq = IRQ_SIC_PCI0 + ((slot - 24 + pin - 1) & 3); + irq = IRQ_SIC_PCI0 + ((slot + 2 + pin - 1) & 3); return irq; } diff --git a/arch/arm/mach-vexpress/Makefile b/arch/arm/mach-vexpress/Makefile index 36ea8247123a..505e64ab3eae 100644 --- a/arch/arm/mach-vexpress/Makefile +++ b/arch/arm/mach-vexpress/Makefile @@ -7,6 +7,8 @@ ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \ obj-y := v2m.o obj-$(CONFIG_ARCH_VEXPRESS_CA9X4) += ct-ca9x4.o obj-$(CONFIG_ARCH_VEXPRESS_DCSCB) += dcscb.o dcscb_setup.o +CFLAGS_dcscb.o += -march=armv7-a obj-$(CONFIG_ARCH_VEXPRESS_TC2_PM) += tc2_pm.o spc.o +CFLAGS_tc2_pm.o += -march=armv7-a obj-$(CONFIG_SMP) += platsmp.o obj-$(CONFIG_HOTPLUG_CPU) += hotplug.o diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index c97f7940cb95..eb8830a4c5ed 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -261,9 +261,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) struct task_struct *tsk; struct mm_struct *mm; int fault, sig, code; - int write = fsr & FSR_WRITE; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (write ? FAULT_FLAG_WRITE : 0); + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; if (notify_page_fault(regs, fsr)) return 0; @@ -282,6 +280,11 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (in_atomic() || !mm) goto no_context; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + if (fsr & FSR_WRITE) + flags |= FAULT_FLAG_WRITE; + /* * As per x86, we may deadlock here. However, since the kernel only * validly references user space from well defined areas of the code, @@ -349,6 +352,13 @@ retry: if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS)))) return 0; + /* + * If we are in kernel mode at this point, we + * have no context to handle this fault with. + */ + if (!user_mode(regs)) + goto no_context; + if (fault & VM_FAULT_OOM) { /* * We ran out of memory, call the OOM killer, and return to @@ -359,13 +369,6 @@ retry: return 0; } - /* - * If we are in kernel mode at this point, we - * have no context to handle this fault with. - */ - if (!user_mode(regs)) - goto no_context; - if (fault & VM_FAULT_SIGBUS) { /* * We had some memory, but were unable to diff --git a/arch/arm/plat-pxa/ssp.c b/arch/arm/plat-pxa/ssp.c index c83f27b6bdda..3ea02903d75a 100644 --- a/arch/arm/plat-pxa/ssp.c +++ b/arch/arm/plat-pxa/ssp.c @@ -132,6 +132,7 @@ static int pxa_ssp_probe(struct platform_device *pdev) if (dev->of_node) { struct of_phandle_args dma_spec; struct device_node *np = dev->of_node; + int ret; /* * FIXME: we should allocate the DMA channel from this @@ -140,14 +141,23 @@ static int pxa_ssp_probe(struct platform_device *pdev) */ /* rx */ - of_parse_phandle_with_args(np, "dmas", "#dma-cells", - 0, &dma_spec); + ret = of_parse_phandle_with_args(np, "dmas", "#dma-cells", + 0, &dma_spec); + + if (ret) { + dev_err(dev, "Can't parse dmas property\n"); + return -ENODEV; + } ssp->drcmr_rx = dma_spec.args[0]; of_node_put(dma_spec.np); /* tx */ - of_parse_phandle_with_args(np, "dmas", "#dma-cells", - 1, &dma_spec); + ret = of_parse_phandle_with_args(np, "dmas", "#dma-cells", + 1, &dma_spec); + if (ret) { + dev_err(dev, "Can't parse dmas property\n"); + return -ENODEV; + } ssp->drcmr_tx = dma_spec.args[0]; of_node_put(dma_spec.np); } else { diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index ae323a45c28c..c04454876bcb 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -23,7 +23,6 @@ config ARM64 select HAVE_DMA_API_DEBUG select HAVE_DMA_ATTRS select HAVE_GENERIC_DMA_COHERENT - select HAVE_GENERIC_HARDIRQS select HAVE_HW_BREAKPOINT if PERF_EVENTS select HAVE_MEMBLOCK select HAVE_PERF_EVENTS diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 6c8ba25bf6bb..6d6acf153bff 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -199,13 +199,6 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; - if (esr & ESR_LNX_EXEC) { - vm_flags = VM_EXEC; - } else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) { - vm_flags = VM_WRITE; - mm_flags |= FAULT_FLAG_WRITE; - } - tsk = current; mm = tsk->mm; @@ -220,6 +213,16 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, if (in_atomic() || !mm) goto no_context; + if (user_mode(regs)) + mm_flags |= FAULT_FLAG_USER; + + if (esr & ESR_LNX_EXEC) { + vm_flags = VM_EXEC; + } else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) { + vm_flags = VM_WRITE; + mm_flags |= FAULT_FLAG_WRITE; + } + /* * As per x86, we may deadlock here. However, since the kernel only * validly references user space from well defined areas of the code, @@ -288,6 +291,13 @@ retry: VM_FAULT_BADACCESS)))) return 0; + /* + * If we are in kernel mode at this point, we have no context to + * handle this fault with. + */ + if (!user_mode(regs)) + goto no_context; + if (fault & VM_FAULT_OOM) { /* * We ran out of memory, call the OOM killer, and return to @@ -298,13 +308,6 @@ retry: return 0; } - /* - * If we are in kernel mode at this point, we have no context to - * handle this fault with. - */ - if (!user_mode(regs)) - goto no_context; - if (fault & VM_FAULT_SIGBUS) { /* * We had some memory, but were unable to successfully fix up diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig index 549903cfc2cb..b6878eb64884 100644 --- a/arch/avr32/Kconfig +++ b/arch/avr32/Kconfig @@ -6,7 +6,6 @@ config AVR32 select HAVE_CLK select HAVE_OPROFILE select HAVE_KPROBES - select HAVE_GENERIC_HARDIRQS select VIRT_TO_BUS select GENERIC_IRQ_PROBE select GENERIC_ATOMIC64 diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c index b2f2d2d66849..0eca93327195 100644 --- a/arch/avr32/mm/fault.c +++ b/arch/avr32/mm/fault.c @@ -86,6 +86,8 @@ asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs) local_irq_enable(); + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; retry: down_read(&mm->mmap_sem); @@ -228,9 +230,9 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - pagefault_out_of_memory(); if (!user_mode(regs)) goto no_context; + pagefault_out_of_memory(); return; do_sigbus: diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index 3b6abc54b015..f78c9a2c7e28 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -32,7 +32,6 @@ config BLACKFIN select HAVE_UNDERSCORE_SYMBOL_PREFIX select VIRT_TO_BUS select ARCH_WANT_IPC_PARSE_VERSION - select HAVE_GENERIC_HARDIRQS select GENERIC_ATOMIC64 select GENERIC_IRQ_PROBE select USE_GENERIC_SMP_HELPERS if SMP diff --git a/arch/blackfin/boot/.gitignore b/arch/blackfin/boot/.gitignore index 229e50808677..1287a5487e7d 100644 --- a/arch/blackfin/boot/.gitignore +++ b/arch/blackfin/boot/.gitignore @@ -1,2 +1,3 @@ vmImage* vmlinux* +uImage* diff --git a/arch/blackfin/include/asm/scb.h b/arch/blackfin/include/asm/scb.h new file mode 100644 index 000000000000..a294cc0d1a4a --- /dev/null +++ b/arch/blackfin/include/asm/scb.h @@ -0,0 +1,21 @@ +/* + * arch/blackfin/mach-common/scb-init.c - reprogram system cross bar priority + * + * Copyright 2012 Analog Devices Inc. + * + * Licensed under the GPL-2 or later. + */ + +#define SCB_SLOT_OFFSET 24 +#define SCB_MI_MAX_SLOT 32 + +struct scb_mi_prio { + unsigned long scb_mi_arbr; + unsigned long scb_mi_arbw; + unsigned char scb_mi_slots; + unsigned char scb_mi_prio[SCB_MI_MAX_SLOT]; +}; + +extern struct scb_mi_prio scb_data[]; + +extern void init_scb(void); diff --git a/arch/blackfin/kernel/setup.c b/arch/blackfin/kernel/setup.c index 19ad0637e8ff..396193042127 100644 --- a/arch/blackfin/kernel/setup.c +++ b/arch/blackfin/kernel/setup.c @@ -35,6 +35,9 @@ #ifdef CONFIG_BF60x #include <mach/pm.h> #endif +#ifdef CONFIG_SCB_PRIORITY +#include <asm/scb.h> +#endif u16 _bfin_swrst; EXPORT_SYMBOL(_bfin_swrst); @@ -1101,6 +1104,9 @@ void __init setup_arch(char **cmdline_p) #endif init_exception_vectors(); bfin_cache_init(); /* Initialize caches for the boot CPU */ +#ifdef CONFIG_SCB_PRIORITY + init_scb(); +#endif } static int __init topology_init(void) diff --git a/arch/blackfin/mach-bf609/Kconfig b/arch/blackfin/mach-bf609/Kconfig index 95a4f1b676ce..2bcbf94b1edf 100644 --- a/arch/blackfin/mach-bf609/Kconfig +++ b/arch/blackfin/mach-bf609/Kconfig @@ -59,6 +59,1661 @@ config SEC_IRQ_PRIORITY_LEVELS Divide the total number of interrupt priority levels into sub-levels. There is 2 ^ (SEC_IRQ_PRIORITY_LEVELS + 1) different levels. + +comment "System Cross Bar Priority Assignment" + +config SCB_PRIORITY + bool "Init System Cross Bar Priority" + default n + +menuconfig SCB0_MI0 + bool "SCB0 Master Interface 0 (DDR)" + default n + depends on SCB_PRIORITY + help + The slave interface id of each slot should be set according following table. + Core 0 -- 0 + Core 1 -- 2 + SCB1 -- 9 + SCB2 -- 10 + SCB3 -- 11 + SCB4 -- 12 + SCB5 -- 5 + SCB6 -- 6 + SCB7 -- 8 + SCB8 -- 7 + SCB9 -- 4 + USB -- 13 + +if SCB0_MI0 + +config SCB0_MI0_SLOT0 + int "Slot 0 slave interface id" + default 0 + range 0 13 + +config SCB0_MI0_SLOT1 + int "Slot 1 slave interface id" + default 2 + range 0 13 + +config SCB0_MI0_SLOT2 + int "Slot 2 slave interface id" + default 4 + range 0 13 + +config SCB0_MI0_SLOT3 + int "Slot 3 slave interface id" + default 5 + range 0 13 + +config SCB0_MI0_SLOT4 + int "Slot 4 slave interface id" + default 6 + range 0 13 + +config SCB0_MI0_SLOT5 + int "Slot 5 slave interface id" + default 7 + range 0 13 + +config SCB0_MI0_SLOT6 + int "Slot 6 slave interface id" + default 8 + range 0 13 + +config SCB0_MI0_SLOT7 + int "Slot 7 slave interface id" + default 9 + range 0 13 + +config SCB0_MI0_SLOT8 + int "Slot 8 slave interface id" + default 10 + range 0 13 + +config SCB0_MI0_SLOT9 + int "Slot 9 slave interface id" + default 11 + range 0 13 + +config SCB0_MI0_SLOT10 + int "Slot 10 slave interface id" + default 13 + range 0 13 + +config SCB0_MI0_SLOT11 + int "Slot 11 slave interface id" + default 12 + range 0 13 + +config SCB0_MI0_SLOT12 + int "Slot 12 slave interface id" + default 0 + range 0 13 + +config SCB0_MI0_SLOT13 + int "Slot 13 slave interface id" + default 2 + range 0 13 + +config SCB0_MI0_SLOT14 + int "Slot 14 slave interface id" + default 4 + range 0 13 + +config SCB0_MI0_SLOT15 + int "Slot 15 slave interface id" + default 5 + range 0 13 + +config SCB0_MI0_SLOT16 + int "Slot 16 slave interface id" + default 6 + range 0 13 + +config SCB0_MI0_SLOT17 + int "Slot 17 slave interface id" + default 7 + range 0 13 + +config SCB0_MI0_SLOT18 + int "Slot 18 slave interface id" + default 8 + range 0 13 + +config SCB0_MI0_SLOT19 + int "Slot 19 slave interface id" + default 9 + range 0 13 + +config SCB0_MI0_SLOT20 + int "Slot 20 slave interface id" + default 10 + range 0 13 + +config SCB0_MI0_SLOT21 + int "Slot 21 slave interface id" + default 11 + range 0 13 + +config SCB0_MI0_SLOT22 + int "Slot 22 slave interface id" + default 13 + range 0 13 + +config SCB0_MI0_SLOT23 + int "Slot 23 slave interface id" + default 12 + range 0 13 + +config SCB0_MI0_SLOT24 + int "Slot 24 slave interface id" + default 0 + range 0 13 + +config SCB0_MI0_SLOT25 + int "Slot 25 slave interface id" + default 2 + range 0 13 + +config SCB0_MI0_SLOT26 + int "Slot 26 slave interface id" + default 4 + range 0 13 + +config SCB0_MI0_SLOT27 + int "Slot 27 slave interface id" + default 5 + range 0 13 + +config SCB0_MI0_SLOT28 + int "Slot 28 slave interface id" + default 6 + range 0 13 + +config SCB0_MI0_SLOT29 + int "Slot 29 slave interface id" + default 7 + range 0 13 + +config SCB0_MI0_SLOT30 + int "Slot 30 slave interface id" + default 8 + range 0 13 + +config SCB0_MI0_SLOT31 + int "Slot 31 slave interface id" + default 13 + range 0 13 + +endif # SCB0_MI0 + +menuconfig SCB0_MI1 + bool "SCB0 Master Interface 1 (SMC)" + default n + depends on SCB_PRIORITY + help + The slave interface id of each slot should be set according following table. + Core 0 -- 0 + Core 1 -- 2 + SCB1 -- 9 + SCB2 -- 10 + SCB3 -- 11 + SCB4 -- 12 + SCB5 -- 5 + SCB6 -- 6 + SCB7 -- 8 + SCB8 -- 7 + SCB9 -- 4 + USB -- 13 + +if SCB0_MI1 + +config SCB0_MI1_SLOT0 + int "Slot 0 slave interface id" + default 0 + range 0 13 + +config SCB0_MI1_SLOT1 + int "Slot 1 slave interface id" + default 2 + range 0 13 + +config SCB0_MI1_SLOT2 + int "Slot 2 slave interface id" + default 4 + range 0 13 + +config SCB0_MI1_SLOT3 + int "Slot 3 slave interface id" + default 5 + range 0 13 + +config SCB0_MI1_SLOT4 + int "Slot 4 slave interface id" + default 6 + range 0 13 + +config SCB0_MI1_SLOT5 + int "Slot 5 slave interface id" + default 7 + range 0 13 + +config SCB0_MI1_SLOT6 + int "Slot 6 slave interface id" + default 8 + range 0 13 + +config SCB0_MI1_SLOT7 + int "Slot 7 slave interface id" + default 9 + range 0 13 + +config SCB0_MI1_SLOT8 + int "Slot 8 slave interface id" + default 10 + range 0 13 + +config SCB0_MI1_SLOT9 + int "Slot 9 slave interface id" + default 11 + range 0 13 + +config SCB0_MI1_SLOT10 + int "Slot 10 slave interface id" + default 13 + range 0 13 + +config SCB0_MI1_SLOT11 + int "Slot 11 slave interface id" + default 12 + range 0 13 + +config SCB0_MI1_SLOT12 + int "Slot 12 slave interface id" + default 0 + range 0 13 + +config SCB0_MI1_SLOT13 + int "Slot 13 slave interface id" + default 2 + range 0 13 + +config SCB0_MI1_SLOT14 + int "Slot 14 slave interface id" + default 4 + range 0 13 + +config SCB0_MI1_SLOT15 + int "Slot 15 slave interface id" + default 5 + range 0 13 + +config SCB0_MI1_SLOT16 + int "Slot 16 slave interface id" + default 6 + range 0 13 + +config SCB0_MI1_SLOT17 + int "Slot 17 slave interface id" + default 7 + range 0 13 + +config SCB0_MI1_SLOT18 + int "Slot 18 slave interface id" + default 8 + range 0 13 + +config SCB0_MI1_SLOT19 + int "Slot 19 slave interface id" + default 9 + range 0 13 + +config SCB0_MI1_SLOT20 + int "Slot 20 slave interface id" + default 10 + range 0 13 + +config SCB0_MI1_SLOT21 + int "Slot 21 slave interface id" + default 11 + range 0 13 + +config SCB0_MI1_SLOT22 + int "Slot 22 slave interface id" + default 13 + range 0 13 + +config SCB0_MI1_SLOT23 + int "Slot 23 slave interface id" + default 12 + range 0 13 + +config SCB0_MI1_SLOT24 + int "Slot 24 slave interface id" + default 0 + range 0 13 + +config SCB0_MI1_SLOT25 + int "Slot 25 slave interface id" + default 2 + range 0 13 + +config SCB0_MI1_SLOT26 + int "Slot 26 slave interface id" + default 4 + range 0 13 + +config SCB0_MI1_SLOT27 + int "Slot 27 slave interface id" + default 5 + range 0 13 + +config SCB0_MI1_SLOT28 + int "Slot 28 slave interface id" + default 6 + range 0 13 + +config SCB0_MI1_SLOT29 + int "Slot 29 slave interface id" + default 7 + range 0 13 + +config SCB0_MI1_SLOT30 + int "Slot 30 slave interface id" + default 8 + range 0 13 + +config SCB0_MI1_SLOT31 + int "Slot 31 slave interface id" + default 13 + range 0 13 + +endif # SCB0_MI1 + +menuconfig SCB0_MI2 + bool "SCB0 Master Interface 2 (Data L2)" + default n + depends on SCB_PRIORITY + help + The slave interface id of each slot should be set according following table. + Core 0 -- 0 + Core 1 -- 2 + SCB1 -- 9 + SCB2 -- 10 + SCB3 -- 11 + SCB4 -- 12 + SCB5 -- 5 + SCB6 -- 6 + SCB7 -- 8 + SCB8 -- 7 + SCB9 -- 4 + USB -- 13 + +if SCB0_MI2 + +config SCB0_MI2_SLOT0 + int "Slot 0 slave interface id" + default 4 + range 0 13 + +config SCB0_MI2_SLOT1 + int "Slot 1 slave interface id" + default 5 + range 0 13 + +config SCB0_MI2_SLOT2 + int "Slot 2 slave interface id" + default 6 + range 0 13 + +config SCB0_MI2_SLOT3 + int "Slot 3 slave interface id" + default 7 + range 0 13 + +config SCB0_MI2_SLOT4 + int "Slot 4 slave interface id" + default 8 + range 0 13 + +config SCB0_MI2_SLOT5 + int "Slot 5 slave interface id" + default 9 + range 0 13 + +config SCB0_MI2_SLOT6 + int "Slot 6 slave interface id" + default 10 + range 0 13 + +config SCB0_MI2_SLOT7 + int "Slot 7 slave interface id" + default 11 + range 0 13 + +config SCB0_MI2_SLOT8 + int "Slot 8 slave interface id" + default 13 + range 0 13 + +config SCB0_MI2_SLOT9 + int "Slot 9 slave interface id" + default 12 + range 0 13 + +config SCB0_MI2_SLOT10 + int "Slot 10 slave interface id" + default 4 + range 0 13 + +config SCB0_MI2_SLOT11 + int "Slot 11 slave interface id" + default 5 + range 0 13 + +config SCB0_MI2_SLOT12 + int "Slot 12 slave interface id" + default 6 + range 0 13 + +config SCB0_MI2_SLOT13 + int "Slot 13 slave interface id" + default 7 + range 0 13 + +config SCB0_MI2_SLOT14 + int "Slot 14 slave interface id" + default 8 + range 0 13 + +config SCB0_MI2_SLOT15 + int "Slot 15 slave interface id" + default 9 + range 0 13 + +config SCB0_MI2_SLOT16 + int "Slot 16 slave interface id" + default 10 + range 0 13 + +config SCB0_MI2_SLOT17 + int "Slot 17 slave interface id" + default 11 + range 0 13 + +config SCB0_MI2_SLOT18 + int "Slot 18 slave interface id" + default 13 + range 0 13 + +config SCB0_MI2_SLOT19 + int "Slot 19 slave interface id" + default 12 + range 0 13 + +config SCB0_MI2_SLOT20 + int "Slot 20 slave interface id" + default 4 + range 0 13 + +config SCB0_MI2_SLOT21 + int "Slot 21 slave interface id" + default 5 + range 0 13 + +config SCB0_MI2_SLOT22 + int "Slot 22 slave interface id" + default 6 + range 0 13 + +config SCB0_MI2_SLOT23 + int "Slot 23 slave interface id" + default 7 + range 0 13 + +config SCB0_MI2_SLOT24 + int "Slot 24 slave interface id" + default 8 + range 0 13 + +config SCB0_MI2_SLOT25 + int "Slot 25 slave interface id" + default 9 + range 0 13 + +config SCB0_MI2_SLOT26 + int "Slot 26 slave interface id" + default 10 + range 0 13 + +config SCB0_MI2_SLOT27 + int "Slot 27 slave interface id" + default 11 + range 0 13 + +config SCB0_MI2_SLOT28 + int "Slot 28 slave interface id" + default 13 + range 0 13 + +config SCB0_MI2_SLOT29 + int "Slot 29 slave interface id" + default 12 + range 0 13 + +config SCB0_MI2_SLOT30 + int "Slot 30 slave interface id" + default 4 + range 0 13 + +config SCB0_MI2_SLOT31 + int "Slot 31 slave interface id" + default 7 + range 0 13 + +endif # SCB0_MI2 + +menuconfig SCB0_MI3 + bool "SCB0 Master Interface 3 (L1A)" + default n + depends on SCB_PRIORITY + help + The slave interface id of each slot should be set according following table. + Core 0 -- 0 + Core 1 -- 2 + SCB1 -- 9 + SCB2 -- 10 + SCB3 -- 11 + SCB4 -- 12 + SCB5 -- 5 + SCB6 -- 6 + SCB7 -- 8 + SCB8 -- 7 + SCB9 -- 4 + USB -- 13 + +if SCB0_MI3 + +config SCB0_MI3_SLOT0 + int "Slot 0 slave interface id" + default 4 + range 0 13 + +config SCB0_MI3_SLOT1 + int "Slot 1 slave interface id" + default 5 + range 0 13 + +config SCB0_MI3_SLOT2 + int "Slot 2 slave interface id" + default 6 + range 0 13 + +config SCB0_MI3_SLOT3 + int "Slot 3 slave interface id" + default 7 + range 0 13 + +config SCB0_MI3_SLOT4 + int "Slot 4 slave interface id" + default 8 + range 0 13 + +config SCB0_MI3_SLOT5 + int "Slot 5 slave interface id" + default 9 + range 0 13 + +config SCB0_MI3_SLOT6 + int "Slot 6 slave interface id" + default 10 + range 0 13 + +config SCB0_MI3_SLOT7 + int "Slot 7 slave interface id" + default 11 + range 0 13 + +config SCB0_MI3_SLOT8 + int "Slot 8 slave interface id" + default 13 + range 0 13 + +config SCB0_MI3_SLOT9 + int "Slot 9 slave interface id" + default 12 + range 0 13 + +config SCB0_MI3_SLOT10 + int "Slot 10 slave interface id" + default 4 + range 0 13 + +config SCB0_MI3_SLOT11 + int "Slot 11 slave interface id" + default 5 + range 0 13 + +config SCB0_MI3_SLOT12 + int "Slot 12 slave interface id" + default 6 + range 0 13 + +config SCB0_MI3_SLOT13 + int "Slot 13 slave interface id" + default 7 + range 0 13 + +config SCB0_MI3_SLOT14 + int "Slot 14 slave interface id" + default 8 + range 0 13 + +config SCB0_MI3_SLOT15 + int "Slot 15 slave interface id" + default 9 + range 0 13 + +config SCB0_MI3_SLOT16 + int "Slot 16 slave interface id" + default 10 + range 0 13 + +config SCB0_MI3_SLOT17 + int "Slot 17 slave interface id" + default 11 + range 0 13 + +config SCB0_MI3_SLOT18 + int "Slot 18 slave interface id" + default 13 + range 0 13 + +config SCB0_MI3_SLOT19 + int "Slot 19 slave interface id" + default 12 + range 0 13 + +config SCB0_MI3_SLOT20 + int "Slot 20 slave interface id" + default 4 + range 0 13 + +config SCB0_MI3_SLOT21 + int "Slot 21 slave interface id" + default 5 + range 0 13 + +config SCB0_MI3_SLOT22 + int "Slot 22 slave interface id" + default 6 + range 0 13 + +config SCB0_MI3_SLOT23 + int "Slot 23 slave interface id" + default 7 + range 0 13 + +config SCB0_MI3_SLOT24 + int "Slot 24 slave interface id" + default 8 + range 0 13 + +config SCB0_MI3_SLOT25 + int "Slot 25 slave interface id" + default 9 + range 0 13 + +config SCB0_MI3_SLOT26 + int "Slot 26 slave interface id" + default 10 + range 0 13 + +config SCB0_MI3_SLOT27 + int "Slot 27 slave interface id" + default 11 + range 0 13 + +config SCB0_MI3_SLOT28 + int "Slot 28 slave interface id" + default 13 + range 0 13 + +config SCB0_MI3_SLOT29 + int "Slot 29 slave interface id" + default 12 + range 0 13 + +config SCB0_MI3_SLOT30 + int "Slot 30 slave interface id" + default 4 + range 0 13 + +config SCB0_MI3_SLOT31 + int "Slot 31 slave interface id" + default 7 + range 0 13 + +endif # SCB0_MI3 + +menuconfig SCB0_MI4 + bool "SCB0 Master Interface 4 (L1B)" + default n + depends on SCB_PRIORITY + help + The slave interface id of each slot should be set according following table. + Core 0 -- 0 + Core 1 -- 2 + SCB1 -- 9 + SCB2 -- 10 + SCB3 -- 11 + SCB4 -- 12 + SCB5 -- 5 + SCB6 -- 6 + SCB7 -- 8 + SCB8 -- 7 + SCB9 -- 4 + USB -- 13 + +if SCB0_MI4 + +config SCB0_MI4_SLOT0 + int "Slot 0 slave interface id" + default 4 + range 0 13 + +config SCB0_MI4_SLOT1 + int "Slot 1 slave interface id" + default 5 + range 0 13 + +config SCB0_MI4_SLOT2 + int "Slot 2 slave interface id" + default 6 + range 0 13 + +config SCB0_MI4_SLOT3 + int "Slot 3 slave interface id" + default 7 + range 0 13 + +config SCB0_MI4_SLOT4 + int "Slot 4 slave interface id" + default 8 + range 0 13 + +config SCB0_MI4_SLOT5 + int "Slot 5 slave interface id" + default 9 + range 0 13 + +config SCB0_MI4_SLOT6 + int "Slot 6 slave interface id" + default 10 + range 0 13 + +config SCB0_MI4_SLOT7 + int "Slot 7 slave interface id" + default 11 + range 0 13 + +config SCB0_MI4_SLOT8 + int "Slot 8 slave interface id" + default 13 + range 0 13 + +config SCB0_MI4_SLOT9 + int "Slot 9 slave interface id" + default 12 + range 0 13 + +config SCB0_MI4_SLOT10 + int "Slot 10 slave interface id" + default 4 + range 0 13 + +config SCB0_MI4_SLOT11 + int "Slot 11 slave interface id" + default 5 + range 0 13 + +config SCB0_MI4_SLOT12 + int "Slot 12 slave interface id" + default 6 + range 0 13 + +config SCB0_MI4_SLOT13 + int "Slot 13 slave interface id" + default 7 + range 0 13 + +config SCB0_MI4_SLOT14 + int "Slot 14 slave interface id" + default 8 + range 0 13 + +config SCB0_MI4_SLOT15 + int "Slot 15 slave interface id" + default 9 + range 0 13 + +config SCB0_MI4_SLOT16 + int "Slot 16 slave interface id" + default 10 + range 0 13 + +config SCB0_MI4_SLOT17 + int "Slot 17 slave interface id" + default 11 + range 0 13 + +config SCB0_MI4_SLOT18 + int "Slot 18 slave interface id" + default 13 + range 0 13 + +config SCB0_MI4_SLOT19 + int "Slot 19 slave interface id" + default 12 + range 0 13 + +config SCB0_MI4_SLOT20 + int "Slot 20 slave interface id" + default 4 + range 0 13 + +config SCB0_MI4_SLOT21 + int "Slot 21 slave interface id" + default 5 + range 0 13 + +config SCB0_MI4_SLOT22 + int "Slot 22 slave interface id" + default 6 + range 0 13 + +config SCB0_MI4_SLOT23 + int "Slot 23 slave interface id" + default 7 + range 0 13 + +config SCB0_MI4_SLOT24 + int "Slot 24 slave interface id" + default 8 + range 0 13 + +config SCB0_MI4_SLOT25 + int "Slot 25 slave interface id" + default 9 + range 0 13 + +config SCB0_MI4_SLOT26 + int "Slot 26 slave interface id" + default 10 + range 0 13 + +config SCB0_MI4_SLOT27 + int "Slot 27 slave interface id" + default 11 + range 0 13 + +config SCB0_MI4_SLOT28 + int "Slot 28 slave interface id" + default 13 + range 0 13 + +config SCB0_MI4_SLOT29 + int "Slot 29 slave interface id" + default 12 + range 0 13 + +config SCB0_MI4_SLOT30 + int "Slot 30 slave interface id" + default 4 + range 0 13 + +config SCB0_MI4_SLOT31 + int "Slot 31 slave interface id" + default 7 + range 0 13 + +endif # SCB0_MI4 + +menuconfig SCB0_MI5 + bool "SCB0 Master Interface 5 (SMMR)" + default n + depends on SCB_PRIORITY + help + The slave interface id of each slot should be set according following table. + MMR0 -- 1 + MMR1 -- 3 + SCB2 -- 10 + SCB4 -- 12 + +if SCB0_MI5 + +config SCB0_MI5_SLOT0 + int "Slot 0 slave interface id" + default 1 + range 0 13 + +config SCB0_MI5_SLOT1 + int "Slot 1 slave interface id" + default 3 + range 0 13 + +config SCB0_MI5_SLOT2 + int "Slot 2 slave interface id" + default 10 + range 0 13 + +config SCB0_MI5_SLOT3 + int "Slot 3 slave interface id" + default 12 + range 0 13 + +config SCB0_MI5_SLOT4 + int "Slot 4 slave interface id" + default 1 + range 0 13 + +config SCB0_MI5_SLOT5 + int "Slot 5 slave interface id" + default 3 + range 0 13 + +config SCB0_MI5_SLOT6 + int "Slot 6 slave interface id" + default 10 + range 0 13 + +config SCB0_MI5_SLOT7 + int "Slot 7 slave interface id" + default 12 + range 0 13 + +config SCB0_MI5_SLOT8 + int "Slot 8 slave interface id" + default 1 + range 0 13 + +config SCB0_MI5_SLOT9 + int "Slot 9 slave interface id" + default 3 + range 0 13 + +config SCB0_MI5_SLOT10 + int "Slot 10 slave interface id" + default 10 + range 0 13 + +config SCB0_MI5_SLOT11 + int "Slot 11 slave interface id" + default 12 + range 0 13 + +config SCB0_MI5_SLOT12 + int "Slot 12 slave interface id" + default 1 + range 0 13 + +config SCB0_MI5_SLOT13 + int "Slot 13 slave interface id" + default 3 + range 0 13 + +config SCB0_MI5_SLOT14 + int "Slot 14 slave interface id" + default 10 + range 0 13 + +config SCB0_MI5_SLOT15 + int "Slot 15 slave interface id" + default 12 + range 0 13 + +endif # SCB0_MI5 + +menuconfig SCB1_MI0 + bool "SCB1 Master Interface 0" + default n + depends on SCB_PRIORITY + help + The slave interface id of each slot should be set according following table. + SPORT0A -- 0 + SPORT0B -- 1 + SPORT1A -- 2 + SPORT1B -- 3 + SPORT2A -- 4 + SPORT2B -- 5 + SPI0TX -- 6 + SPI0RX -- 7 + SPI1TX -- 8 + SPI1RX -- 9 + +if SCB1_MI0 + +config SCB1_MI0_SLOT0 + int "Slot 0 slave interface id" + default 0 + range 0 9 + +config SCB1_MI0_SLOT1 + int "Slot 1 slave interface id" + default 1 + range 0 9 + +config SCB1_MI0_SLOT2 + int "Slot 2 slave interface id" + default 2 + range 0 9 + +config SCB1_MI0_SLOT3 + int "Slot 3 slave interface id" + default 3 + range 0 9 + +config SCB1_MI0_SLOT4 + int "Slot 4 slave interface id" + default 4 + range 0 9 + +config SCB1_MI0_SLOT5 + int "Slot 5 slave interface id" + default 5 + range 0 9 + +config SCB1_MI0_SLOT6 + int "Slot 6 slave interface id" + default 6 + range 0 9 + +config SCB1_MI0_SLOT7 + int "Slot 7 slave interface id" + default 7 + range 0 9 + +config SCB1_MI0_SLOT8 + int "Slot 8 slave interface id" + default 8 + range 0 9 + +config SCB1_MI0_SLOT9 + int "Slot 9 slave interface id" + default 9 + range 0 9 + +config SCB1_MI0_SLOT10 + int "Slot 10 slave interface id" + default 0 + range 0 9 + +config SCB1_MI0_SLOT11 + int "Slot 11 slave interface id" + default 1 + range 0 9 + +config SCB1_MI0_SLOT12 + int "Slot 12 slave interface id" + default 2 + range 0 9 + +config SCB1_MI0_SLOT13 + int "Slot 13 slave interface id" + default 3 + range 0 9 + +config SCB1_MI0_SLOT14 + int "Slot 14 slave interface id" + default 4 + range 0 9 + +config SCB1_MI0_SLOT15 + int "Slot 15 slave interface id" + default 5 + range 0 9 + +config SCB1_MI0_SLOT16 + int "Slot 16 slave interface id" + default 6 + range 0 13 + +config SCB1_MI0_SLOT17 + int "Slot 17 slave interface id" + default 7 + range 0 13 + +config SCB1_MI0_SLOT18 + int "Slot 18 slave interface id" + default 8 + range 0 13 + +config SCB1_MI0_SLOT19 + int "Slot 19 slave interface id" + default 9 + range 0 13 + +endif # SCB1_MI0 + +menuconfig SCB2_MI0 + bool "SCB2 Master Interface 0" + default n + depends on SCB_PRIORITY + help + The slave interface id of each slot should be set according following table. + RSI -- 0 + SDU DMA -- 1 + SDU -- 2 + EMAC0 -- 3 + EMAC1 -- 4 + +if SCB2_MI0 + +config SCB2_MI0_SLOT0 + int "Slot 0 slave interface id" + default 0 + range 0 4 + +config SCB2_MI0_SLOT1 + int "Slot 1 slave interface id" + default 1 + range 0 4 + +config SCB2_MI0_SLOT2 + int "Slot 2 slave interface id" + default 2 + range 0 4 + +config SCB2_MI0_SLOT3 + int "Slot 3 slave interface id" + default 3 + range 0 4 + +config SCB2_MI0_SLOT4 + int "Slot 4 slave interface id" + default 4 + range 0 4 + +config SCB2_MI0_SLOT5 + int "Slot 5 slave interface id" + default 0 + range 0 4 + +config SCB2_MI0_SLOT6 + int "Slot 6 slave interface id" + default 1 + range 0 4 + +config SCB2_MI0_SLOT7 + int "Slot 7 slave interface id" + default 2 + range 0 4 + +config SCB2_MI0_SLOT8 + int "Slot 8 slave interface id" + default 3 + range 0 4 + +config SCB2_MI0_SLOT9 + int "Slot 9 slave interface id" + default 4 + range 0 4 + +endif # SCB2_MI0 + +menuconfig SCB3_MI0 + bool "SCB3 Master Interface 0" + default n + depends on SCB_PRIORITY + help + The slave interface id of each slot should be set according following table. + LP0 -- 0 + LP1 -- 1 + LP2 -- 2 + LP3 -- 3 + UART0TX -- 4 + UART0RX -- 5 + UART1TX -- 4 + UART1RX -- 5 + +if SCB3_MI0 + +config SCB3_MI0_SLOT0 + int "Slot 0 slave interface id" + default 0 + range 0 7 + +config SCB3_MI0_SLOT1 + int "Slot 1 slave interface id" + default 1 + range 0 7 + +config SCB3_MI0_SLOT2 + int "Slot 2 slave interface id" + default 2 + range 0 7 + +config SCB3_MI0_SLOT3 + int "Slot 3 slave interface id" + default 3 + range 0 7 + +config SCB3_MI0_SLOT4 + int "Slot 4 slave interface id" + default 4 + range 0 7 + +config SCB3_MI0_SLOT5 + int "Slot 5 slave interface id" + default 5 + range 0 7 + +config SCB3_MI0_SLOT6 + int "Slot 6 slave interface id" + default 6 + range 0 7 + +config SCB3_MI0_SLOT7 + int "Slot 7 slave interface id" + default 7 + range 0 7 + +config SCB3_MI0_SLOT8 + int "Slot 8 slave interface id" + default 0 + range 0 7 + +config SCB3_MI0_SLOT9 + int "Slot 9 slave interface id" + default 1 + range 0 7 + +config SCB3_MI0_SLOT10 + int "Slot 10 slave interface id" + default 2 + range 0 7 + +config SCB3_MI0_SLOT11 + int "Slot 11 slave interface id" + default 3 + range 0 7 + +config SCB3_MI0_SLOT12 + int "Slot 12 slave interface id" + default 4 + range 0 7 + +config SCB3_MI0_SLOT13 + int "Slot 13 slave interface id" + default 5 + range 0 7 + +config SCB3_MI0_SLOT14 + int "Slot 14 slave interface id" + default 6 + range 0 7 + +config SCB3_MI0_SLOT15 + int "Slot 15 slave interface id" + default 7 + range 0 7 + +endif # SCB3_MI0 + +menuconfig SCB4_MI0 + bool "SCB4 Master Interface 0" + default n + depends on SCB_PRIORITY + help + The slave interface id of each slot should be set according following table. + MDA21 -- 0 + MDA22 -- 1 + MDA23 -- 2 + MDA24 -- 3 + MDA25 -- 4 + MDA26 -- 5 + MDA27 -- 6 + MDA28 -- 7 + +if SCB4_MI0 + +config SCB4_MI0_SLOT0 + int "Slot 0 slave interface id" + default 0 + range 0 7 + +config SCB4_MI0_SLOT1 + int "Slot 1 slave interface id" + default 1 + range 0 7 + +config SCB4_MI0_SLOT2 + int "Slot 2 slave interface id" + default 2 + range 0 7 + +config SCB4_MI0_SLOT3 + int "Slot 3 slave interface id" + default 3 + range 0 7 + +config SCB4_MI0_SLOT4 + int "Slot 4 slave interface id" + default 4 + range 0 7 + +config SCB4_MI0_SLOT5 + int "Slot 5 slave interface id" + default 5 + range 0 7 + +config SCB4_MI0_SLOT6 + int "Slot 6 slave interface id" + default 6 + range 0 7 + +config SCB4_MI0_SLOT7 + int "Slot 7 slave interface id" + default 7 + range 0 7 + +config SCB4_MI0_SLOT8 + int "Slot 8 slave interface id" + default 0 + range 0 7 + +config SCB4_MI0_SLOT9 + int "Slot 9 slave interface id" + default 1 + range 0 7 + +config SCB4_MI0_SLOT10 + int "Slot 10 slave interface id" + default 2 + range 0 7 + +config SCB4_MI0_SLOT11 + int "Slot 11 slave interface id" + default 3 + range 0 7 + +config SCB4_MI0_SLOT12 + int "Slot 12 slave interface id" + default 4 + range 0 7 + +config SCB4_MI0_SLOT13 + int "Slot 13 slave interface id" + default 5 + range 0 7 + +config SCB4_MI0_SLOT14 + int "Slot 14 slave interface id" + default 6 + range 0 7 + +config SCB4_MI0_SLOT15 + int "Slot 15 slave interface id" + default 7 + range 0 7 + +endif # SCB4_MI0 + +menuconfig SCB5_MI0 + bool "SCB5 Master Interface 0" + default n + depends on SCB_PRIORITY + help + The slave interface id of each slot should be set according following table. + PPI0 MDA29 -- 0 + PPI0 MDA30 -- 1 + PPI2 MDA31 -- 2 + PPI2 MDA32 -- 3 + +if SCB5_MI0 + +config SCB5_MI0_SLOT0 + int "Slot 0 slave interface id" + default 0 + range 0 3 + +config SCB5_MI0_SLOT1 + int "Slot 1 slave interface id" + default 1 + range 0 3 + +config SCB5_MI0_SLOT2 + int "Slot 2 slave interface id" + default 2 + range 0 3 + +config SCB5_MI0_SLOT3 + int "Slot 3 slave interface id" + default 3 + range 0 3 + +config SCB5_MI0_SLOT4 + int "Slot 4 slave interface id" + default 0 + range 0 3 + +config SCB5_MI0_SLOT5 + int "Slot 5 slave interface id" + default 1 + range 0 3 + +config SCB5_MI0_SLOT6 + int "Slot 6 slave interface id" + default 2 + range 0 3 + +config SCB5_MI0_SLOT7 + int "Slot 7 slave interface id" + default 3 + range 0 3 + +endif # SCB5_MI0 + +menuconfig SCB6_MI0 + bool "SCB6 Master Interface 0" + default n + depends on SCB_PRIORITY + help + The slave interface id of each slot should be set according following table. + PPI1 MDA33 -- 0 + PPI1 MDA34 -- 1 + +if SCB6_MI0 + +config SCB6_MI0_SLOT0 + int "Slot 0 slave interface id" + default 0 + range 0 1 + +config SCB6_MI0_SLOT1 + int "Slot 1 slave interface id" + default 1 + range 0 1 + +config SCB6_MI0_SLOT2 + int "Slot 2 slave interface id" + default 0 + range 0 1 + +config SCB6_MI0_SLOT3 + int "Slot 3 slave interface id" + default 1 + range 0 1 + +endif # SCB6_MI0 + +menuconfig SCB7_MI0 + bool "SCB7 Master Interface 0" + default n + depends on SCB_PRIORITY + help + The slave interface id of each slot should be set according following table. + PIXC0 -- 0 + PIXC1 -- 1 + PIXC2 -- 2 + +if SCB7_MI0 + +config SCB7_MI0_SLOT0 + int "Slot 0 slave interface id" + default 0 + range 0 2 + +config SCB7_MI0_SLOT1 + int "Slot 1 slave interface id" + default 1 + range 0 2 + +config SCB7_MI0_SLOT2 + int "Slot 2 slave interface id" + default 2 + range 0 2 + +config SCB7_MI0_SLOT3 + int "Slot 3 slave interface id" + default 0 + range 0 2 + +config SCB7_MI0_SLOT4 + int "Slot 4 slave interface id" + default 1 + range 0 2 + +config SCB7_MI0_SLOT5 + int "Slot 5 slave interface id" + default 2 + range 0 2 + +endif # SCB7_MI0 + +menuconfig SCB8_MI0 + bool "SCB8 Master Interface 0" + default n + depends on SCB_PRIORITY + help + The slave interface id of each slot should be set according following table. + PVP CPDOB -- 0 + PVP CPDOC -- 1 + PVP CPCO -- 2 + PVP CPCI -- 3 + +if SCB8_MI0 + +config SCB8_MI0_SLOT0 + int "Slot 0 slave interface id" + default 0 + range 0 3 + +config SCB8_MI0_SLOT1 + int "Slot 1 slave interface id" + default 1 + range 0 3 + +config SCB8_MI0_SLOT2 + int "Slot 2 slave interface id" + default 2 + range 0 3 + +config SCB8_MI0_SLOT3 + int "Slot 3 slave interface id" + default 3 + range 0 3 + +config SCB8_MI0_SLOT4 + int "Slot 4 slave interface id" + default 0 + range 0 3 + +config SCB8_MI0_SLOT5 + int "Slot 5 slave interface id" + default 1 + range 0 3 + +config SCB8_MI0_SLOT6 + int "Slot 6 slave interface id" + default 2 + range 0 3 + +config SCB8_MI0_SLOT7 + int "Slot 7 slave interface id" + default 3 + range 0 3 + +endif # SCB8_MI0 + +menuconfig SCB9_MI0 + bool "SCB9 Master Interface 0" + default n + depends on SCB_PRIORITY + help + The slave interface id of each slot should be set according following table. + PVP MPDO -- 0 + PVP MPDI -- 1 + PVP MPCO -- 2 + PVP MPCI -- 3 + PVP CPDOA -- 4 + +if SCB9_MI0 + +config SCB9_MI0_SLOT0 + int "Slot 0 slave interface id" + default 0 + range 0 4 + +config SCB9_MI0_SLOT1 + int "Slot 1 slave interface id" + default 1 + range 0 4 + +config SCB9_MI0_SLOT2 + int "Slot 2 slave interface id" + default 2 + range 0 4 + +config SCB9_MI0_SLOT3 + int "Slot 3 slave interface id" + default 3 + range 0 4 + +config SCB9_MI0_SLOT4 + int "Slot 4 slave interface id" + default 4 + range 0 4 + +config SCB9_MI0_SLOT5 + int "Slot 5 slave interface id" + default 0 + range 0 4 + +config SCB9_MI0_SLOT6 + int "Slot 6 slave interface id" + default 1 + range 0 4 + +config SCB9_MI0_SLOT7 + int "Slot 7 slave interface id" + default 2 + range 0 4 + +config SCB9_MI0_SLOT8 + int "Slot 8 slave interface id" + default 3 + range 0 4 + +config SCB9_MI0_SLOT9 + int "Slot 9 slave interface id" + default 4 + range 0 4 + +endif # SCB9_MI0 + endmenu endif diff --git a/arch/blackfin/mach-bf609/Makefile b/arch/blackfin/mach-bf609/Makefile index 234fe1b4bb0e..60ffaf85d303 100644 --- a/arch/blackfin/mach-bf609/Makefile +++ b/arch/blackfin/mach-bf609/Makefile @@ -4,3 +4,4 @@ obj-y := dma.o clock.o ints-priority.o obj-$(CONFIG_PM) += pm.o dpm.o +obj-$(CONFIG_SCB_PRIORITY) += scb.o diff --git a/arch/blackfin/mach-bf609/boards/ezkit.c b/arch/blackfin/mach-bf609/boards/ezkit.c index 0bc47231540b..d56a55ad83a7 100644 --- a/arch/blackfin/mach-bf609/boards/ezkit.c +++ b/arch/blackfin/mach-bf609/boards/ezkit.c @@ -104,6 +104,7 @@ static struct platform_device bfin_rotary_device = { #if defined(CONFIG_STMMAC_ETH) || defined(CONFIG_STMMAC_ETH_MODULE) #include <linux/stmmac.h> +#include <linux/phy.h> static unsigned short pins[] = P_RMII0; @@ -111,11 +112,26 @@ static struct stmmac_mdio_bus_data phy_private_data = { .phy_mask = 1, }; +static struct stmmac_dma_cfg eth_dma_cfg = { + .pbl = 2, +}; + +int stmmac_ptp_clk_init(struct platform_device *pdev) +{ + bfin_write32(PADS0_EMAC_PTP_CLKSEL, 0); + return 0; +} + static struct plat_stmmacenet_data eth_private_data = { + .has_gmac = 1, .bus_id = 0, .enh_desc = 1, .phy_addr = 1, .mdio_bus_data = &phy_private_data, + .dma_cfg = ð_dma_cfg, + .force_thresh_dma_mode = 1, + .interface = PHY_INTERFACE_MODE_RMII, + .init = stmmac_ptp_clk_init, }; static struct platform_device bfin_eth_device = { @@ -1107,6 +1123,81 @@ static struct bfin_display_config bfin_display_data = { }; #endif +#if IS_ENABLED(CONFIG_VIDEO_ADV7343) +#include <media/adv7343.h> + +static struct v4l2_output adv7343_outputs[] = { + { + .index = 0, + .name = "Composite", + .type = V4L2_OUTPUT_TYPE_ANALOG, + .std = V4L2_STD_ALL, + .capabilities = V4L2_OUT_CAP_STD, + }, + { + .index = 1, + .name = "S-Video", + .type = V4L2_OUTPUT_TYPE_ANALOG, + .std = V4L2_STD_ALL, + .capabilities = V4L2_OUT_CAP_STD, + }, + { + .index = 2, + .name = "Component", + .type = V4L2_OUTPUT_TYPE_ANALOG, + .std = V4L2_STD_ALL, + .capabilities = V4L2_OUT_CAP_STD, + }, + +}; + +static struct disp_route adv7343_routes[] = { + { + .output = ADV7343_COMPOSITE_ID, + }, + { + .output = ADV7343_SVIDEO_ID, + }, + { + .output = ADV7343_COMPONENT_ID, + }, +}; + +static struct adv7343_platform_data adv7343_data = { + .mode_config = { + .sleep_mode = false, + .pll_control = false, + .dac_1 = true, + .dac_2 = true, + .dac_3 = true, + .dac_4 = true, + .dac_5 = true, + .dac_6 = true, + }, + .sd_config = { + .sd_dac_out1 = false, + .sd_dac_out2 = false, + }, +}; + +static struct bfin_display_config bfin_display_data = { + .card_name = "BF609", + .outputs = adv7343_outputs, + .num_outputs = ARRAY_SIZE(adv7343_outputs), + .routes = adv7343_routes, + .i2c_adapter_id = 0, + .board_info = { + .type = "adv7343", + .addr = 0x2b, + .platform_data = (void *)&adv7343_data, + }, + .ppi_info = &ppi_info_disp, + .ppi_control = (PACK_EN | DLEN_8 | EPPI_CTL_FS1LO_FS2LO + | EPPI_CTL_POLC3 | EPPI_CTL_BLANKGEN | EPPI_CTL_SYNC2 + | EPPI_CTL_NON656 | EPPI_CTL_DIR), +}; +#endif + static struct platform_device bfin_display_device = { .name = "bfin_display", .dev = { diff --git a/arch/blackfin/mach-bf609/clock.c b/arch/blackfin/mach-bf609/clock.c index 437d56c82281..dab8849af884 100644 --- a/arch/blackfin/mach-bf609/clock.c +++ b/arch/blackfin/mach-bf609/clock.c @@ -220,6 +220,12 @@ unsigned long sys_clk_get_rate(struct clk *clk) } } +unsigned long dummy_get_rate(struct clk *clk) +{ + clk->parent->rate = clk_get_rate(clk->parent); + return clk->parent->rate; +} + unsigned long sys_clk_round_rate(struct clk *clk, unsigned long rate) { unsigned long max_rate; @@ -283,6 +289,10 @@ static struct clk_ops sys_clk_ops = { .round_rate = sys_clk_round_rate, }; +static struct clk_ops dummy_clk_ops = { + .get_rate = dummy_get_rate, +}; + static struct clk sys_clkin = { .name = "SYS_CLKIN", .rate = CONFIG_CLKIN_HZ, @@ -364,6 +374,12 @@ static struct clk oclk = { .parent = &pll_clk, }; +static struct clk ethclk = { + .name = "stmmaceth", + .parent = &sclk0, + .ops = &dummy_clk_ops, +}; + static struct clk_lookup bf609_clks[] = { CLK(sys_clkin, NULL, "SYS_CLKIN"), CLK(pll_clk, NULL, "PLLCLK"), @@ -375,6 +391,7 @@ static struct clk_lookup bf609_clks[] = { CLK(sclk1, NULL, "SCLK1"), CLK(dclk, NULL, "DCLK"), CLK(oclk, NULL, "OCLK"), + CLK(ethclk, NULL, "stmmaceth"), }; int __init clk_init(void) diff --git a/arch/blackfin/mach-bf609/include/mach/defBF60x_base.h b/arch/blackfin/mach-bf609/include/mach/defBF60x_base.h index f1a6afae1a71..35caa7bc192c 100644 --- a/arch/blackfin/mach-bf609/include/mach/defBF60x_base.h +++ b/arch/blackfin/mach-bf609/include/mach/defBF60x_base.h @@ -839,6 +839,16 @@ #define PORTG_LOCK 0xFFC03344 /* PORTG Port x GPIO Lock Register */ #define PORTG_REVID 0xFFC0337C /* PORTG Port x GPIO Revision ID */ +/* ================================================== + Pads Controller Registers + ================================================== */ + +/* ========================= + PADS0 + ========================= */ +#define PADS0_EMAC_PTP_CLKSEL 0xFFC03404 /* PADS0 Clock Selection for EMAC and PTP */ +#define PADS0_TWI_VSEL 0xFFC03408 /* PADS0 TWI Voltage Selection */ +#define PADS0_PORTS_HYST 0xFFC03440 /* PADS0 Hysteresis Enable Register */ /* ========================= PINT Registers diff --git a/arch/blackfin/mach-bf609/scb.c b/arch/blackfin/mach-bf609/scb.c new file mode 100644 index 000000000000..ac1f07c33594 --- /dev/null +++ b/arch/blackfin/mach-bf609/scb.c @@ -0,0 +1,363 @@ +/* + * arch/blackfin/mach-common/scb-init.c - reprogram system cross bar priority + * + * Copyright 2012 Analog Devices Inc. + * + * Licensed under the GPL-2 or later. + */ + +#include <asm/blackfin.h> +#include <asm/scb.h> + +struct scb_mi_prio scb_data[] = { +#ifdef CONFIG_SCB0_MI0 + { REG_SCB0_ARBR0, REG_SCB0_ARBW0, 32, { + CONFIG_SCB0_MI0_SLOT0, + CONFIG_SCB0_MI0_SLOT1, + CONFIG_SCB0_MI0_SLOT2, + CONFIG_SCB0_MI0_SLOT3, + CONFIG_SCB0_MI0_SLOT4, + CONFIG_SCB0_MI0_SLOT5, + CONFIG_SCB0_MI0_SLOT6, + CONFIG_SCB0_MI0_SLOT7, + CONFIG_SCB0_MI0_SLOT8, + CONFIG_SCB0_MI0_SLOT9, + CONFIG_SCB0_MI0_SLOT10, + CONFIG_SCB0_MI0_SLOT11, + CONFIG_SCB0_MI0_SLOT12, + CONFIG_SCB0_MI0_SLOT13, + CONFIG_SCB0_MI0_SLOT14, + CONFIG_SCB0_MI0_SLOT15, + CONFIG_SCB0_MI0_SLOT16, + CONFIG_SCB0_MI0_SLOT17, + CONFIG_SCB0_MI0_SLOT18, + CONFIG_SCB0_MI0_SLOT19, + CONFIG_SCB0_MI0_SLOT20, + CONFIG_SCB0_MI0_SLOT21, + CONFIG_SCB0_MI0_SLOT22, + CONFIG_SCB0_MI0_SLOT23, + CONFIG_SCB0_MI0_SLOT24, + CONFIG_SCB0_MI0_SLOT25, + CONFIG_SCB0_MI0_SLOT26, + CONFIG_SCB0_MI0_SLOT27, + CONFIG_SCB0_MI0_SLOT28, + CONFIG_SCB0_MI0_SLOT29, + CONFIG_SCB0_MI0_SLOT30, + CONFIG_SCB0_MI0_SLOT31 + }, + }, +#endif +#ifdef CONFIG_SCB0_MI1 + { REG_SCB0_ARBR1, REG_SCB0_ARBW1, 32, { + CONFIG_SCB0_MI1_SLOT0, + CONFIG_SCB0_MI1_SLOT1, + CONFIG_SCB0_MI1_SLOT2, + CONFIG_SCB0_MI1_SLOT3, + CONFIG_SCB0_MI1_SLOT4, + CONFIG_SCB0_MI1_SLOT5, + CONFIG_SCB0_MI1_SLOT6, + CONFIG_SCB0_MI1_SLOT7, + CONFIG_SCB0_MI1_SLOT8, + CONFIG_SCB0_MI1_SLOT9, + CONFIG_SCB0_MI1_SLOT10, + CONFIG_SCB0_MI1_SLOT11, + CONFIG_SCB0_MI1_SLOT12, + CONFIG_SCB0_MI1_SLOT13, + CONFIG_SCB0_MI1_SLOT14, + CONFIG_SCB0_MI1_SLOT15, + CONFIG_SCB0_MI1_SLOT16, + CONFIG_SCB0_MI1_SLOT17, + CONFIG_SCB0_MI1_SLOT18, + CONFIG_SCB0_MI1_SLOT19, + CONFIG_SCB0_MI1_SLOT20, + CONFIG_SCB0_MI1_SLOT21, + CONFIG_SCB0_MI1_SLOT22, + CONFIG_SCB0_MI1_SLOT23, + CONFIG_SCB0_MI1_SLOT24, + CONFIG_SCB0_MI1_SLOT25, + CONFIG_SCB0_MI1_SLOT26, + CONFIG_SCB0_MI1_SLOT27, + CONFIG_SCB0_MI1_SLOT28, + CONFIG_SCB0_MI1_SLOT29, + CONFIG_SCB0_MI1_SLOT30, + CONFIG_SCB0_MI1_SLOT31 + }, + }, +#endif +#ifdef CONFIG_SCB0_MI2 + { REG_SCB0_ARBR2, REG_SCB0_ARBW2, 32, { + CONFIG_SCB0_MI2_SLOT0, + CONFIG_SCB0_MI2_SLOT1, + CONFIG_SCB0_MI2_SLOT2, + CONFIG_SCB0_MI2_SLOT3, + CONFIG_SCB0_MI2_SLOT4, + CONFIG_SCB0_MI2_SLOT5, + CONFIG_SCB0_MI2_SLOT6, + CONFIG_SCB0_MI2_SLOT7, + CONFIG_SCB0_MI2_SLOT8, + CONFIG_SCB0_MI2_SLOT9, + CONFIG_SCB0_MI2_SLOT10, + CONFIG_SCB0_MI2_SLOT11, + CONFIG_SCB0_MI2_SLOT12, + CONFIG_SCB0_MI2_SLOT13, + CONFIG_SCB0_MI2_SLOT14, + CONFIG_SCB0_MI2_SLOT15, + CONFIG_SCB0_MI2_SLOT16, + CONFIG_SCB0_MI2_SLOT17, + CONFIG_SCB0_MI2_SLOT18, + CONFIG_SCB0_MI2_SLOT19, + CONFIG_SCB0_MI2_SLOT20, + CONFIG_SCB0_MI2_SLOT21, + CONFIG_SCB0_MI2_SLOT22, + CONFIG_SCB0_MI2_SLOT23, + CONFIG_SCB0_MI2_SLOT24, + CONFIG_SCB0_MI2_SLOT25, + CONFIG_SCB0_MI2_SLOT26, + CONFIG_SCB0_MI2_SLOT27, + CONFIG_SCB0_MI2_SLOT28, + CONFIG_SCB0_MI2_SLOT29, + CONFIG_SCB0_MI2_SLOT30, + CONFIG_SCB0_MI2_SLOT31 + }, + }, +#endif +#ifdef CONFIG_SCB0_MI3 + { REG_SCB0_ARBR3, REG_SCB0_ARBW3, 32, { + CONFIG_SCB0_MI3_SLOT0, + CONFIG_SCB0_MI3_SLOT1, + CONFIG_SCB0_MI3_SLOT2, + CONFIG_SCB0_MI3_SLOT3, + CONFIG_SCB0_MI3_SLOT4, + CONFIG_SCB0_MI3_SLOT5, + CONFIG_SCB0_MI3_SLOT6, + CONFIG_SCB0_MI3_SLOT7, + CONFIG_SCB0_MI3_SLOT8, + CONFIG_SCB0_MI3_SLOT9, + CONFIG_SCB0_MI3_SLOT10, + CONFIG_SCB0_MI3_SLOT11, + CONFIG_SCB0_MI3_SLOT12, + CONFIG_SCB0_MI3_SLOT13, + CONFIG_SCB0_MI3_SLOT14, + CONFIG_SCB0_MI3_SLOT15, + CONFIG_SCB0_MI3_SLOT16, + CONFIG_SCB0_MI3_SLOT17, + CONFIG_SCB0_MI3_SLOT18, + CONFIG_SCB0_MI3_SLOT19, + CONFIG_SCB0_MI3_SLOT20, + CONFIG_SCB0_MI3_SLOT21, + CONFIG_SCB0_MI3_SLOT22, + CONFIG_SCB0_MI3_SLOT23, + CONFIG_SCB0_MI3_SLOT24, + CONFIG_SCB0_MI3_SLOT25, + CONFIG_SCB0_MI3_SLOT26, + CONFIG_SCB0_MI3_SLOT27, + CONFIG_SCB0_MI3_SLOT28, + CONFIG_SCB0_MI3_SLOT29, + CONFIG_SCB0_MI3_SLOT30, + CONFIG_SCB0_MI3_SLOT31 + }, + }, +#endif +#ifdef CONFIG_SCB0_MI4 + { REG_SCB0_ARBR4, REG_SCB4_ARBW0, 32, { + CONFIG_SCB0_MI4_SLOT0, + CONFIG_SCB0_MI4_SLOT1, + CONFIG_SCB0_MI4_SLOT2, + CONFIG_SCB0_MI4_SLOT3, + CONFIG_SCB0_MI4_SLOT4, + CONFIG_SCB0_MI4_SLOT5, + CONFIG_SCB0_MI4_SLOT6, + CONFIG_SCB0_MI4_SLOT7, + CONFIG_SCB0_MI4_SLOT8, + CONFIG_SCB0_MI4_SLOT9, + CONFIG_SCB0_MI4_SLOT10, + CONFIG_SCB0_MI4_SLOT11, + CONFIG_SCB0_MI4_SLOT12, + CONFIG_SCB0_MI4_SLOT13, + CONFIG_SCB0_MI4_SLOT14, + CONFIG_SCB0_MI4_SLOT15, + CONFIG_SCB0_MI4_SLOT16, + CONFIG_SCB0_MI4_SLOT17, + CONFIG_SCB0_MI4_SLOT18, + CONFIG_SCB0_MI4_SLOT19, + CONFIG_SCB0_MI4_SLOT20, + CONFIG_SCB0_MI4_SLOT21, + CONFIG_SCB0_MI4_SLOT22, + CONFIG_SCB0_MI4_SLOT23, + CONFIG_SCB0_MI4_SLOT24, + CONFIG_SCB0_MI4_SLOT25, + CONFIG_SCB0_MI4_SLOT26, + CONFIG_SCB0_MI4_SLOT27, + CONFIG_SCB0_MI4_SLOT28, + CONFIG_SCB0_MI4_SLOT29, + CONFIG_SCB0_MI4_SLOT30, + CONFIG_SCB0_MI4_SLOT31 + }, + }, +#endif +#ifdef CONFIG_SCB0_MI5 + { REG_SCB0_ARBR5, REG_SCB0_ARBW5, 16, { + CONFIG_SCB0_MI5_SLOT0, + CONFIG_SCB0_MI5_SLOT1, + CONFIG_SCB0_MI5_SLOT2, + CONFIG_SCB0_MI5_SLOT3, + CONFIG_SCB0_MI5_SLOT4, + CONFIG_SCB0_MI5_SLOT5, + CONFIG_SCB0_MI5_SLOT6, + CONFIG_SCB0_MI5_SLOT7, + CONFIG_SCB0_MI5_SLOT8, + CONFIG_SCB0_MI5_SLOT9, + CONFIG_SCB0_MI5_SLOT10, + CONFIG_SCB0_MI5_SLOT11, + CONFIG_SCB0_MI5_SLOT12, + CONFIG_SCB0_MI5_SLOT13, + CONFIG_SCB0_MI5_SLOT14, + CONFIG_SCB0_MI5_SLOT15 + }, + }, +#endif +#ifdef CONFIG_SCB1_MI0 + { REG_SCB1_ARBR0, REG_SCB1_ARBW0, 20, { + CONFIG_SCB1_MI0_SLOT0, + CONFIG_SCB1_MI0_SLOT1, + CONFIG_SCB1_MI0_SLOT2, + CONFIG_SCB1_MI0_SLOT3, + CONFIG_SCB1_MI0_SLOT4, + CONFIG_SCB1_MI0_SLOT5, + CONFIG_SCB1_MI0_SLOT6, + CONFIG_SCB1_MI0_SLOT7, + CONFIG_SCB1_MI0_SLOT8, + CONFIG_SCB1_MI0_SLOT9, + CONFIG_SCB1_MI0_SLOT10, + CONFIG_SCB1_MI0_SLOT11, + CONFIG_SCB1_MI0_SLOT12, + CONFIG_SCB1_MI0_SLOT13, + CONFIG_SCB1_MI0_SLOT14, + CONFIG_SCB1_MI0_SLOT15, + CONFIG_SCB1_MI0_SLOT16, + CONFIG_SCB1_MI0_SLOT17, + CONFIG_SCB1_MI0_SLOT18, + CONFIG_SCB1_MI0_SLOT19 + }, + }, +#endif +#ifdef CONFIG_SCB2_MI0 + { REG_SCB2_ARBR0, REG_SCB2_ARBW0, 10, { + CONFIG_SCB2_MI0_SLOT0, + CONFIG_SCB2_MI0_SLOT1, + CONFIG_SCB2_MI0_SLOT2, + CONFIG_SCB2_MI0_SLOT3, + CONFIG_SCB2_MI0_SLOT4, + CONFIG_SCB2_MI0_SLOT5, + CONFIG_SCB2_MI0_SLOT6, + CONFIG_SCB2_MI0_SLOT7, + CONFIG_SCB2_MI0_SLOT8, + CONFIG_SCB2_MI0_SLOT9 + }, + }, +#endif +#ifdef CONFIG_SCB3_MI0 + { REG_SCB3_ARBR0, REG_SCB3_ARBW0, 16, { + CONFIG_SCB3_MI0_SLOT0, + CONFIG_SCB3_MI0_SLOT1, + CONFIG_SCB3_MI0_SLOT2, + CONFIG_SCB3_MI0_SLOT3, + CONFIG_SCB3_MI0_SLOT4, + CONFIG_SCB3_MI0_SLOT5, + CONFIG_SCB3_MI0_SLOT6, + CONFIG_SCB3_MI0_SLOT7, + CONFIG_SCB3_MI0_SLOT8, + CONFIG_SCB3_MI0_SLOT9, + CONFIG_SCB3_MI0_SLOT10, + CONFIG_SCB3_MI0_SLOT11, + CONFIG_SCB3_MI0_SLOT12, + CONFIG_SCB3_MI0_SLOT13, + CONFIG_SCB3_MI0_SLOT14, + CONFIG_SCB3_MI0_SLOT15 + }, + }, +#endif +#ifdef CONFIG_SCB4_MI0 + { REG_SCB4_ARBR0, REG_SCB4_ARBW0, 16, { + CONFIG_SCB4_MI0_SLOT0, + CONFIG_SCB4_MI0_SLOT1, + CONFIG_SCB4_MI0_SLOT2, + CONFIG_SCB4_MI0_SLOT3, + CONFIG_SCB4_MI0_SLOT4, + CONFIG_SCB4_MI0_SLOT5, + CONFIG_SCB4_MI0_SLOT6, + CONFIG_SCB4_MI0_SLOT7, + CONFIG_SCB4_MI0_SLOT8, + CONFIG_SCB4_MI0_SLOT9, + CONFIG_SCB4_MI0_SLOT10, + CONFIG_SCB4_MI0_SLOT11, + CONFIG_SCB4_MI0_SLOT12, + CONFIG_SCB4_MI0_SLOT13, + CONFIG_SCB4_MI0_SLOT14, + CONFIG_SCB4_MI0_SLOT15 + }, + }, +#endif +#ifdef CONFIG_SCB5_MI0 + { REG_SCB5_ARBR0, REG_SCB5_ARBW0, 8, { + CONFIG_SCB5_MI0_SLOT0, + CONFIG_SCB5_MI0_SLOT1, + CONFIG_SCB5_MI0_SLOT2, + CONFIG_SCB5_MI0_SLOT3, + CONFIG_SCB5_MI0_SLOT4, + CONFIG_SCB5_MI0_SLOT5, + CONFIG_SCB5_MI0_SLOT6, + CONFIG_SCB5_MI0_SLOT7 + }, + }, +#endif +#ifdef CONFIG_SCB6_MI0 + { REG_SCB6_ARBR0, REG_SCB6_ARBW0, 4, { + CONFIG_SCB6_MI0_SLOT0, + CONFIG_SCB6_MI0_SLOT1, + CONFIG_SCB6_MI0_SLOT2, + CONFIG_SCB6_MI0_SLOT3 + }, + }, +#endif +#ifdef CONFIG_SCB7_MI0 + { REG_SCB7_ARBR0, REG_SCB7_ARBW0, 6, { + CONFIG_SCB7_MI0_SLOT0, + CONFIG_SCB7_MI0_SLOT1, + CONFIG_SCB7_MI0_SLOT2, + CONFIG_SCB7_MI0_SLOT3, + CONFIG_SCB7_MI0_SLOT4, + CONFIG_SCB7_MI0_SLOT5 + }, + }, +#endif +#ifdef CONFIG_SCB8_MI0 + { REG_SCB8_ARBR0, REG_SCB8_ARBW0, 8, { + CONFIG_SCB8_MI0_SLOT0, + CONFIG_SCB8_MI0_SLOT1, + CONFIG_SCB8_MI0_SLOT2, + CONFIG_SCB8_MI0_SLOT3, + CONFIG_SCB8_MI0_SLOT4, + CONFIG_SCB8_MI0_SLOT5, + CONFIG_SCB8_MI0_SLOT6, + CONFIG_SCB8_MI0_SLOT7 + }, + }, +#endif +#ifdef CONFIG_SCB9_MI0 + { REG_SCB9_ARBR0, REG_SCB9_ARBW0, 10, { + CONFIG_SCB9_MI0_SLOT0, + CONFIG_SCB9_MI0_SLOT1, + CONFIG_SCB9_MI0_SLOT2, + CONFIG_SCB9_MI0_SLOT3, + CONFIG_SCB9_MI0_SLOT4, + CONFIG_SCB9_MI0_SLOT5, + CONFIG_SCB9_MI0_SLOT6, + CONFIG_SCB9_MI0_SLOT7, + CONFIG_SCB9_MI0_SLOT8, + CONFIG_SCB9_MI0_SLOT9 + }, + }, +#endif + { 0, } +}; diff --git a/arch/blackfin/mach-common/Makefile b/arch/blackfin/mach-common/Makefile index 675466d490d4..f09979204040 100644 --- a/arch/blackfin/mach-common/Makefile +++ b/arch/blackfin/mach-common/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_PM) += pm.o ifneq ($(CONFIG_BF60x),y) obj-$(CONFIG_PM) += dpmc_modes.o endif +obj-$(CONFIG_SCB_PRIORITY) += scb-init.o obj-$(CONFIG_CPU_VOLTAGE) += dpmc.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_BFIN_KERNEL_CLOCK) += clocks-init.o diff --git a/arch/blackfin/mach-common/scb-init.c b/arch/blackfin/mach-common/scb-init.c new file mode 100644 index 000000000000..2cbfb0b5679e --- /dev/null +++ b/arch/blackfin/mach-common/scb-init.c @@ -0,0 +1,53 @@ +/* + * arch/blackfin/mach-common/scb-init.c - reprogram system cross bar priority + * + * Copyright 2012 Analog Devices Inc. + * + * Licensed under the GPL-2 or later. + */ + +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <asm/scb.h> + +__attribute__((l1_text)) +inline void scb_mi_write(unsigned long scb_mi_arbw, unsigned int slots, + unsigned char *scb_mi_prio) +{ + unsigned int i; + + for (i = 0; i < slots; ++i) + bfin_write32(scb_mi_arbw, (i << SCB_SLOT_OFFSET) | scb_mi_prio[i]); +} + +__attribute__((l1_text)) +inline void scb_mi_read(unsigned long scb_mi_arbw, unsigned int slots, + unsigned char *scb_mi_prio) +{ + unsigned int i; + + for (i = 0; i < slots; ++i) { + bfin_write32(scb_mi_arbw, (0xFF << SCB_SLOT_OFFSET) | i); + scb_mi_prio[i] = bfin_read32(scb_mi_arbw); + } +} + +__attribute__((l1_text)) +void init_scb(void) +{ + unsigned int i, j; + unsigned char scb_tmp_prio[32]; + + pr_info("Init System Crossbar\n"); + for (i = 0; scb_data[i].scb_mi_arbr > 0; ++i) { + + scb_mi_write(scb_data[i].scb_mi_arbw, scb_data[i].scb_mi_slots, scb_data[i].scb_mi_prio); + + pr_debug("scb priority at 0x%lx:\n", scb_data[i].scb_mi_arbr); + scb_mi_read(scb_data[i].scb_mi_arbw, scb_data[i].scb_mi_slots, scb_tmp_prio); + for (j = 0; j < scb_data[i].scb_mi_slots; ++j) + pr_debug("slot %d = %d\n", j, scb_tmp_prio[j]); + } + +} diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig index f6a3648f5ec3..957dd00ea561 100644 --- a/arch/c6x/Kconfig +++ b/arch/c6x/Kconfig @@ -10,7 +10,6 @@ config C6X select GENERIC_IRQ_SHOW select HAVE_ARCH_TRACEHOOK select HAVE_DMA_API_DEBUG - select HAVE_GENERIC_HARDIRQS select HAVE_MEMBLOCK select SPARSE_IRQ select IRQ_DOMAIN diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index c699d3259872..02380bed189c 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -41,7 +41,6 @@ config CRIS default y select HAVE_IDE select GENERIC_ATOMIC64 - select HAVE_GENERIC_HARDIRQS select HAVE_UID16 select VIRT_TO_BUS select ARCH_WANT_IPC_PARSE_VERSION diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c index 73312ab6c696..1790f22e71a2 100644 --- a/arch/cris/mm/fault.c +++ b/arch/cris/mm/fault.c @@ -58,8 +58,7 @@ do_page_fault(unsigned long address, struct pt_regs *regs, struct vm_area_struct * vma; siginfo_t info; int fault; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - ((writeaccess & 1) ? FAULT_FLAG_WRITE : 0); + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; D(printk(KERN_DEBUG "Page fault for %lX on %X at %lX, prot %d write %d\n", @@ -117,6 +116,8 @@ do_page_fault(unsigned long address, struct pt_regs *regs, if (in_atomic() || !mm) goto no_context; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); @@ -155,6 +156,7 @@ retry: } else if (writeaccess == 1) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; + flags |= FAULT_FLAG_WRITE; } else { if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index 4b6628ea381e..34aa19352dc1 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -5,7 +5,6 @@ config FRV select HAVE_ARCH_TRACEHOOK select HAVE_PERF_EVENTS select HAVE_UID16 - select HAVE_GENERIC_HARDIRQS select VIRT_TO_BUS select GENERIC_IRQ_SHOW select HAVE_DEBUG_BUGVERBOSE diff --git a/arch/frv/mm/fault.c b/arch/frv/mm/fault.c index 331c1e2cfb67..9a66372fc7c7 100644 --- a/arch/frv/mm/fault.c +++ b/arch/frv/mm/fault.c @@ -34,11 +34,11 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear struct vm_area_struct *vma; struct mm_struct *mm; unsigned long _pme, lrai, lrad, fixup; + unsigned long flags = 0; siginfo_t info; pgd_t *pge; pud_t *pue; pte_t *pte; - int write; int fault; #if 0 @@ -81,6 +81,9 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear if (in_atomic() || !mm) goto no_context; + if (user_mode(__frame)) + flags |= FAULT_FLAG_USER; + down_read(&mm->mmap_sem); vma = find_vma(mm, ear0); @@ -129,7 +132,6 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear */ good_area: info.si_code = SEGV_ACCERR; - write = 0; switch (esr0 & ESR0_ATXC) { default: /* handle write to write protected page */ @@ -140,7 +142,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear #endif if (!(vma->vm_flags & VM_WRITE)) goto bad_area; - write = 1; + flags |= FAULT_FLAG_WRITE; break; /* handle read from protected page */ @@ -162,7 +164,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, ear0, write ? FAULT_FLAG_WRITE : 0); + fault = handle_mm_fault(mm, vma, ear0, flags); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index 3d6759ee382f..24b1dc2564f1 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -2,7 +2,6 @@ config H8300 bool default y select HAVE_IDE - select HAVE_GENERIC_HARDIRQS select GENERIC_ATOMIC64 select HAVE_UID16 select VIRT_TO_BUS diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index 77d442ab28c8..99041b07e610 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -15,7 +15,6 @@ config HEXAGON # select GENERIC_PENDING_IRQ if SMP select GENERIC_ATOMIC64 select HAVE_PERF_EVENTS - select HAVE_GENERIC_HARDIRQS # GENERIC_ALLOCATOR is used by dma_alloc_coherent() select GENERIC_ALLOCATOR select GENERIC_IRQ_SHOW diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c index 1bd276dbec7d..8704c9320032 100644 --- a/arch/hexagon/mm/vm_fault.c +++ b/arch/hexagon/mm/vm_fault.c @@ -53,8 +53,7 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs) int si_code = SEGV_MAPERR; int fault; const struct exception_table_entry *fixup; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (cause > 0 ? FAULT_FLAG_WRITE : 0); + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; /* * If we're in an interrupt or have no user context, @@ -65,6 +64,8 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs) local_irq_enable(); + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); @@ -96,6 +97,7 @@ good_area: case FLT_STORE: if (!(vma->vm_flags & VM_WRITE)) goto bad_area; + flags |= FAULT_FLAG_WRITE; break; } diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index a86a56d9e73f..7740ab10a171 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -21,7 +21,6 @@ config IA64 select HAVE_KVM select HAVE_ARCH_TRACEHOOK select HAVE_DMA_API_DEBUG - select HAVE_GENERIC_HARDIRQS select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_VIRT_CPU_ACCOUNTING diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 6cf0341f978e..7225dad87094 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -90,8 +90,6 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re mask = ((((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT) | (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT)); - flags |= ((mask & VM_WRITE) ? FAULT_FLAG_WRITE : 0); - /* mmap_sem is performance critical.... */ prefetchw(&mm->mmap_sem); @@ -119,6 +117,10 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re if (notify_page_fault(regs, TRAP_BRKPT)) return; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + if (mask & VM_WRITE) + flags |= FAULT_FLAG_WRITE; retry: down_read(&mm->mmap_sem); diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index 29a7ef4e448b..75661fbf4529 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -9,7 +9,6 @@ config M32R select HAVE_KERNEL_LZMA select ARCH_WANT_IPC_PARSE_VERSION select HAVE_DEBUG_BUGVERBOSE - select HAVE_GENERIC_HARDIRQS select VIRT_TO_BUS select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW diff --git a/arch/m32r/mm/fault.c b/arch/m32r/mm/fault.c index 3cdfa9c1d091..e9c6a8014bd6 100644 --- a/arch/m32r/mm/fault.c +++ b/arch/m32r/mm/fault.c @@ -78,7 +78,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, struct mm_struct *mm; struct vm_area_struct * vma; unsigned long page, addr; - int write; + unsigned long flags = 0; int fault; siginfo_t info; @@ -117,6 +117,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, if (in_atomic() || !mm) goto bad_area_nosemaphore; + if (error_code & ACE_USERMODE) + flags |= FAULT_FLAG_USER; + /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the * kernel and should generate an OOPS. Unfortunately, in the case of an @@ -166,14 +169,13 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, */ good_area: info.si_code = SEGV_ACCERR; - write = 0; switch (error_code & (ACE_WRITE|ACE_PROTECTION)) { default: /* 3: write, present */ /* fall through */ case ACE_WRITE: /* write, not present */ if (!(vma->vm_flags & VM_WRITE)) goto bad_area; - write++; + flags |= FAULT_FLAG_WRITE; break; case ACE_PROTECTION: /* read, present */ case 0: /* read, not present */ @@ -194,7 +196,7 @@ good_area: */ addr = (address & PAGE_MASK); set_thread_fault_code(error_code); - fault = handle_mm_fault(mm, vma, addr, write ? FAULT_FLAG_WRITE : 0); + fault = handle_mm_fault(mm, vma, addr, flags); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index c3cda41af801..311a300d48cc 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -4,7 +4,6 @@ config M68K select HAVE_IDE select HAVE_AOUT if MMU select HAVE_DEBUG_BUGVERBOSE - select HAVE_GENERIC_HARDIRQS select GENERIC_IRQ_SHOW select GENERIC_ATOMIC64 select HAVE_UID16 diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index a563727806bf..eb1d61f68725 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -88,6 +88,8 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, if (in_atomic() || !mm) goto no_context; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; retry: down_read(&mm->mmap_sem); diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig index cfd831c29824..36368eb07e13 100644 --- a/arch/metag/Kconfig +++ b/arch/metag/Kconfig @@ -13,7 +13,6 @@ config METAG select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_TRACE_MCOUNT_TEST - select HAVE_GENERIC_HARDIRQS select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZO diff --git a/arch/metag/mm/fault.c b/arch/metag/mm/fault.c index 8fddf46e6c62..332680e5ebf2 100644 --- a/arch/metag/mm/fault.c +++ b/arch/metag/mm/fault.c @@ -53,8 +53,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, struct vm_area_struct *vma, *prev_vma; siginfo_t info; int fault; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (write_access ? FAULT_FLAG_WRITE : 0); + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; tsk = current; @@ -109,6 +108,8 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, if (in_atomic() || !mm) goto no_context; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; retry: down_read(&mm->mmap_sem); @@ -121,6 +122,7 @@ good_area: if (write_access) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; + flags |= FAULT_FLAG_WRITE; } else { if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) goto bad_area; diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 3f6659cbc969..b82f82b74319 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -18,7 +18,6 @@ config MICROBLAZE select ARCH_WANT_IPC_PARSE_VERSION select HAVE_DEBUG_KMEMLEAK select IRQ_DOMAIN - select HAVE_GENERIC_HARDIRQS select VIRT_TO_BUS select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c index 731f739d17a1..fa4cf52aa7a6 100644 --- a/arch/microblaze/mm/fault.c +++ b/arch/microblaze/mm/fault.c @@ -92,8 +92,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address, int code = SEGV_MAPERR; int is_write = error_code & ESR_S; int fault; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (is_write ? FAULT_FLAG_WRITE : 0); + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; regs->ear = address; regs->esr = error_code; @@ -121,6 +120,9 @@ void do_page_fault(struct pt_regs *regs, unsigned long address, die("Weird page fault", regs, SIGSEGV); } + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the * kernel and should generate an OOPS. Unfortunately, in the case of an @@ -199,6 +201,7 @@ good_area: if (unlikely(is_write)) { if (unlikely(!(vma->vm_flags & VM_WRITE))) goto bad_area; + flags |= FAULT_FLAG_WRITE; /* a read */ } else { /* protection fault */ diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 71f15e73bc89..f75ab4a2f246 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -25,7 +25,6 @@ config MIPS select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select HAVE_DMA_ATTRS select HAVE_DMA_API_DEBUG - select HAVE_GENERIC_HARDIRQS select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_PCI_IOMAP @@ -95,6 +94,7 @@ config ATH79 select CSRC_R4K select DMA_NONCOHERENT select HAVE_CLK + select CLKDEV_LOOKUP select IRQ_CPU select MIPS_MACHINE select SYS_HAS_CPU_MIPS32_R2 @@ -131,7 +131,6 @@ config BCM63XX select IRQ_CPU select SYS_HAS_CPU_MIPS32_R1 select SYS_HAS_CPU_BMIPS4350 if !BCM63XX_CPU_6338 && !BCM63XX_CPU_6345 && !BCM63XX_CPU_6348 - select NR_CPUS_DEFAULT_2 select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN select SYS_HAS_EARLY_PRINTK @@ -445,6 +444,8 @@ config RALINK select SYS_HAS_EARLY_PRINTK select HAVE_MACH_CLKDEV select CLKDEV_LOOKUP + select ARCH_HAS_RESET_CONTROLLER + select RESET_CONTROLLER config SGI_IP22 bool "SGI IP22 (Indy/Indigo2)" @@ -609,7 +610,6 @@ config SIBYTE_SWARM select BOOT_ELF32 select DMA_COHERENT select HAVE_PATA_PLATFORM - select NR_CPUS_DEFAULT_2 select SIBYTE_SB1250 select SWAP_IO_SPACE select SYS_HAS_CPU_SB1 @@ -623,7 +623,6 @@ config SIBYTE_LITTLESUR select BOOT_ELF32 select DMA_COHERENT select HAVE_PATA_PLATFORM - select NR_CPUS_DEFAULT_2 select SIBYTE_SB1250 select SWAP_IO_SPACE select SYS_HAS_CPU_SB1 @@ -635,7 +634,6 @@ config SIBYTE_SENTOSA bool "Sibyte BCM91250E-Sentosa" select BOOT_ELF32 select DMA_COHERENT - select NR_CPUS_DEFAULT_2 select SIBYTE_SB1250 select SWAP_IO_SPACE select SYS_HAS_CPU_SB1 @@ -731,6 +729,7 @@ config CAVIUM_OCTEON_SOC select USB_ARCH_HAS_OHCI select USB_ARCH_HAS_EHCI select HOLES_IN_ZONE + select ARCH_REQUIRE_GPIOLIB help This option supports all of the Octeon reference boards from Cavium Networks. It builds a kernel that dynamically determines the Octeon @@ -1860,7 +1859,6 @@ config MIPS_MT_SMP select CPU_MIPSR2_IRQ_VI select CPU_MIPSR2_IRQ_EI select MIPS_MT - select NR_CPUS_DEFAULT_2 select SMP select SYS_SUPPORTS_SCHED_SMT if SMP select SYS_SUPPORTS_SMP @@ -2171,12 +2169,6 @@ config SYS_SUPPORTS_MIPS_CMP config SYS_SUPPORTS_SMP bool -config NR_CPUS_DEFAULT_1 - bool - -config NR_CPUS_DEFAULT_2 - bool - config NR_CPUS_DEFAULT_4 bool @@ -2194,10 +2186,8 @@ config NR_CPUS_DEFAULT_64 config NR_CPUS int "Maximum number of CPUs (2-64)" - range 1 64 if NR_CPUS_DEFAULT_1 + range 2 64 depends on SMP - default "1" if NR_CPUS_DEFAULT_1 - default "2" if NR_CPUS_DEFAULT_2 default "4" if NR_CPUS_DEFAULT_4 default "8" if NR_CPUS_DEFAULT_8 default "16" if NR_CPUS_DEFAULT_16 diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 37f9ef324f2f..ca8f8340d75f 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -194,6 +194,8 @@ include $(srctree)/arch/mips/Kbuild.platforms ifdef CONFIG_PHYSICAL_START load-y = $(CONFIG_PHYSICAL_START) endif +entry-y = 0x$(shell $(NM) vmlinux 2>/dev/null \ + | grep "\bkernel_entry\b" | cut -f1 -d \ ) cflags-y += -I$(srctree)/arch/mips/include/asm/mach-generic drivers-$(CONFIG_PCI) += arch/mips/pci/ @@ -225,6 +227,9 @@ KBUILD_CFLAGS += $(cflags-y) KBUILD_CPPFLAGS += -DVMLINUX_LOAD_ADDRESS=$(load-y) KBUILD_CPPFLAGS += -DDATAOFFSET=$(if $(dataoffset-y),$(dataoffset-y),0) +bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y) \ + VMLINUX_ENTRY_ADDRESS=$(entry-y) + LDFLAGS += -m $(ld-emul) ifdef CONFIG_CC_STACKPROTECTOR @@ -254,9 +259,25 @@ drivers-$(CONFIG_OPROFILE) += arch/mips/oprofile/ # suspend and hibernation support drivers-$(CONFIG_PM) += arch/mips/power/ +# boot image targets (arch/mips/boot/) +boot-y := vmlinux.bin +boot-y += vmlinux.ecoff +boot-y += vmlinux.srec +ifeq ($(shell expr $(load-y) \< 0xffffffff80000000 2> /dev/null), 0) +boot-y += uImage +boot-y += uImage.gz +endif + +# compressed boot image targets (arch/mips/boot/compressed/) +bootz-y := vmlinuz +bootz-y += vmlinuz.bin +bootz-y += vmlinuz.ecoff +bootz-y += vmlinuz.srec + ifdef CONFIG_LASAT rom.bin rom.sw: vmlinux - $(Q)$(MAKE) $(build)=arch/mips/lasat/image $@ + $(Q)$(MAKE) $(build)=arch/mips/lasat/image \ + $(bootvars-y) $@ endif # @@ -267,9 +288,6 @@ endif vmlinux.32: vmlinux $(OBJCOPY) -O $(32bit-bfd) $(OBJCOPYFLAGS) $< $@ - -#obj-$(CONFIG_KPROBES) += kprobes.o - # # The 64-bit ELF tools are pretty broken so at this time we generate 64-bit # ELF files from 32-bit files by conversion. @@ -280,13 +298,14 @@ vmlinux.64: vmlinux all: $(all-y) # boot -vmlinux.bin vmlinux.ecoff vmlinux.srec: $(vmlinux-32) FORCE - $(Q)$(MAKE) $(build)=arch/mips/boot VMLINUX=$(vmlinux-32) arch/mips/boot/$@ +$(boot-y): $(vmlinux-32) FORCE + $(Q)$(MAKE) $(build)=arch/mips/boot VMLINUX=$(vmlinux-32) \ + $(bootvars-y) arch/mips/boot/$@ # boot/compressed -vmlinuz vmlinuz.bin vmlinuz.ecoff vmlinuz.srec: $(vmlinux-32) FORCE +$(bootz-y): $(vmlinux-32) FORCE $(Q)$(MAKE) $(build)=arch/mips/boot/compressed \ - VMLINUX_LOAD_ADDRESS=$(load-y) 32bit-bfd=$(32bit-bfd) $@ + $(bootvars-y) 32bit-bfd=$(32bit-bfd) $@ CLEAN_FILES += vmlinux.32 vmlinux.64 @@ -323,6 +342,8 @@ define archhelp echo ' vmlinuz.ecoff - ECOFF zboot image' echo ' vmlinuz.bin - Raw binary zboot image' echo ' vmlinuz.srec - SREC zboot image' + echo ' uImage - U-Boot image' + echo ' uImage.gz - U-Boot image (gzip)' echo echo ' These will be default as appropriate for a configured platform.' endef diff --git a/arch/mips/alchemy/common/usb.c b/arch/mips/alchemy/common/usb.c index fcc695626117..2adc7edda49c 100644 --- a/arch/mips/alchemy/common/usb.c +++ b/arch/mips/alchemy/common/usb.c @@ -14,6 +14,7 @@ #include <linux/module.h> #include <linux/spinlock.h> #include <linux/syscore_ops.h> +#include <asm/cpu.h> #include <asm/mach-au1x00/au1000.h> /* control register offsets */ @@ -358,7 +359,7 @@ static inline int au1200_coherency_bug(void) { #if defined(CONFIG_DMA_COHERENT) /* Au1200 AB USB does not support coherent memory */ - if (!(read_c0_prid() & 0xff)) { + if (!(read_c0_prid() & PRID_REV_MASK)) { printk(KERN_INFO "Au1200 USB: this is chip revision AB !!\n"); printk(KERN_INFO "Au1200 USB: update your board or re-configure" " the kernel\n"); diff --git a/arch/mips/ath79/clock.c b/arch/mips/ath79/clock.c index 765ef30e3e1c..26479f437675 100644 --- a/arch/mips/ath79/clock.c +++ b/arch/mips/ath79/clock.c @@ -16,6 +16,7 @@ #include <linux/init.h> #include <linux/err.h> #include <linux/clk.h> +#include <linux/clkdev.h> #include <asm/div64.h> @@ -31,92 +32,132 @@ struct clk { unsigned long rate; }; -static struct clk ath79_ref_clk; -static struct clk ath79_cpu_clk; -static struct clk ath79_ddr_clk; -static struct clk ath79_ahb_clk; -static struct clk ath79_wdt_clk; -static struct clk ath79_uart_clk; +static void __init ath79_add_sys_clkdev(const char *id, unsigned long rate) +{ + struct clk *clk; + int err; + + clk = kzalloc(sizeof(*clk), GFP_KERNEL); + if (!clk) + panic("failed to allocate %s clock structure", id); + + clk->rate = rate; + + err = clk_register_clkdev(clk, id, NULL); + if (err) + panic("unable to register %s clock device", id); +} static void __init ar71xx_clocks_init(void) { + unsigned long ref_rate; + unsigned long cpu_rate; + unsigned long ddr_rate; + unsigned long ahb_rate; u32 pll; u32 freq; u32 div; - ath79_ref_clk.rate = AR71XX_BASE_FREQ; + ref_rate = AR71XX_BASE_FREQ; pll = ath79_pll_rr(AR71XX_PLL_REG_CPU_CONFIG); div = ((pll >> AR71XX_PLL_DIV_SHIFT) & AR71XX_PLL_DIV_MASK) + 1; - freq = div * ath79_ref_clk.rate; + freq = div * ref_rate; div = ((pll >> AR71XX_CPU_DIV_SHIFT) & AR71XX_CPU_DIV_MASK) + 1; - ath79_cpu_clk.rate = freq / div; + cpu_rate = freq / div; div = ((pll >> AR71XX_DDR_DIV_SHIFT) & AR71XX_DDR_DIV_MASK) + 1; - ath79_ddr_clk.rate = freq / div; + ddr_rate = freq / div; div = (((pll >> AR71XX_AHB_DIV_SHIFT) & AR71XX_AHB_DIV_MASK) + 1) * 2; - ath79_ahb_clk.rate = ath79_cpu_clk.rate / div; + ahb_rate = cpu_rate / div; + + ath79_add_sys_clkdev("ref", ref_rate); + ath79_add_sys_clkdev("cpu", cpu_rate); + ath79_add_sys_clkdev("ddr", ddr_rate); + ath79_add_sys_clkdev("ahb", ahb_rate); - ath79_wdt_clk.rate = ath79_ahb_clk.rate; - ath79_uart_clk.rate = ath79_ahb_clk.rate; + clk_add_alias("wdt", NULL, "ahb", NULL); + clk_add_alias("uart", NULL, "ahb", NULL); } static void __init ar724x_clocks_init(void) { + unsigned long ref_rate; + unsigned long cpu_rate; + unsigned long ddr_rate; + unsigned long ahb_rate; u32 pll; u32 freq; u32 div; - ath79_ref_clk.rate = AR724X_BASE_FREQ; + ref_rate = AR724X_BASE_FREQ; pll = ath79_pll_rr(AR724X_PLL_REG_CPU_CONFIG); div = ((pll >> AR724X_PLL_DIV_SHIFT) & AR724X_PLL_DIV_MASK); - freq = div * ath79_ref_clk.rate; + freq = div * ref_rate; div = ((pll >> AR724X_PLL_REF_DIV_SHIFT) & AR724X_PLL_REF_DIV_MASK); freq *= div; - ath79_cpu_clk.rate = freq; + cpu_rate = freq; div = ((pll >> AR724X_DDR_DIV_SHIFT) & AR724X_DDR_DIV_MASK) + 1; - ath79_ddr_clk.rate = freq / div; + ddr_rate = freq / div; div = (((pll >> AR724X_AHB_DIV_SHIFT) & AR724X_AHB_DIV_MASK) + 1) * 2; - ath79_ahb_clk.rate = ath79_cpu_clk.rate / div; + ahb_rate = cpu_rate / div; + + ath79_add_sys_clkdev("ref", ref_rate); + ath79_add_sys_clkdev("cpu", cpu_rate); + ath79_add_sys_clkdev("ddr", ddr_rate); + ath79_add_sys_clkdev("ahb", ahb_rate); - ath79_wdt_clk.rate = ath79_ahb_clk.rate; - ath79_uart_clk.rate = ath79_ahb_clk.rate; + clk_add_alias("wdt", NULL, "ahb", NULL); + clk_add_alias("uart", NULL, "ahb", NULL); } static void __init ar913x_clocks_init(void) { + unsigned long ref_rate; + unsigned long cpu_rate; + unsigned long ddr_rate; + unsigned long ahb_rate; u32 pll; u32 freq; u32 div; - ath79_ref_clk.rate = AR913X_BASE_FREQ; + ref_rate = AR913X_BASE_FREQ; pll = ath79_pll_rr(AR913X_PLL_REG_CPU_CONFIG); div = ((pll >> AR913X_PLL_DIV_SHIFT) & AR913X_PLL_DIV_MASK); - freq = div * ath79_ref_clk.rate; + freq = div * ref_rate; - ath79_cpu_clk.rate = freq; + cpu_rate = freq; div = ((pll >> AR913X_DDR_DIV_SHIFT) & AR913X_DDR_DIV_MASK) + 1; - ath79_ddr_clk.rate = freq / div; + ddr_rate = freq / div; div = (((pll >> AR913X_AHB_DIV_SHIFT) & AR913X_AHB_DIV_MASK) + 1) * 2; - ath79_ahb_clk.rate = ath79_cpu_clk.rate / div; + ahb_rate = cpu_rate / div; - ath79_wdt_clk.rate = ath79_ahb_clk.rate; - ath79_uart_clk.rate = ath79_ahb_clk.rate; + ath79_add_sys_clkdev("ref", ref_rate); + ath79_add_sys_clkdev("cpu", cpu_rate); + ath79_add_sys_clkdev("ddr", ddr_rate); + ath79_add_sys_clkdev("ahb", ahb_rate); + + clk_add_alias("wdt", NULL, "ahb", NULL); + clk_add_alias("uart", NULL, "ahb", NULL); } static void __init ar933x_clocks_init(void) { + unsigned long ref_rate; + unsigned long cpu_rate; + unsigned long ddr_rate; + unsigned long ahb_rate; u32 clock_ctrl; u32 cpu_config; u32 freq; @@ -124,21 +165,21 @@ static void __init ar933x_clocks_init(void) t = ath79_reset_rr(AR933X_RESET_REG_BOOTSTRAP); if (t & AR933X_BOOTSTRAP_REF_CLK_40) - ath79_ref_clk.rate = (40 * 1000 * 1000); + ref_rate = (40 * 1000 * 1000); else - ath79_ref_clk.rate = (25 * 1000 * 1000); + ref_rate = (25 * 1000 * 1000); clock_ctrl = ath79_pll_rr(AR933X_PLL_CLOCK_CTRL_REG); if (clock_ctrl & AR933X_PLL_CLOCK_CTRL_BYPASS) { - ath79_cpu_clk.rate = ath79_ref_clk.rate; - ath79_ahb_clk.rate = ath79_ref_clk.rate; - ath79_ddr_clk.rate = ath79_ref_clk.rate; + cpu_rate = ref_rate; + ahb_rate = ref_rate; + ddr_rate = ref_rate; } else { cpu_config = ath79_pll_rr(AR933X_PLL_CPU_CONFIG_REG); t = (cpu_config >> AR933X_PLL_CPU_CONFIG_REFDIV_SHIFT) & AR933X_PLL_CPU_CONFIG_REFDIV_MASK; - freq = ath79_ref_clk.rate / t; + freq = ref_rate / t; t = (cpu_config >> AR933X_PLL_CPU_CONFIG_NINT_SHIFT) & AR933X_PLL_CPU_CONFIG_NINT_MASK; @@ -153,19 +194,24 @@ static void __init ar933x_clocks_init(void) t = ((clock_ctrl >> AR933X_PLL_CLOCK_CTRL_CPU_DIV_SHIFT) & AR933X_PLL_CLOCK_CTRL_CPU_DIV_MASK) + 1; - ath79_cpu_clk.rate = freq / t; + cpu_rate = freq / t; t = ((clock_ctrl >> AR933X_PLL_CLOCK_CTRL_DDR_DIV_SHIFT) & AR933X_PLL_CLOCK_CTRL_DDR_DIV_MASK) + 1; - ath79_ddr_clk.rate = freq / t; + ddr_rate = freq / t; t = ((clock_ctrl >> AR933X_PLL_CLOCK_CTRL_AHB_DIV_SHIFT) & AR933X_PLL_CLOCK_CTRL_AHB_DIV_MASK) + 1; - ath79_ahb_clk.rate = freq / t; + ahb_rate = freq / t; } - ath79_wdt_clk.rate = ath79_ref_clk.rate; - ath79_uart_clk.rate = ath79_ref_clk.rate; + ath79_add_sys_clkdev("ref", ref_rate); + ath79_add_sys_clkdev("cpu", cpu_rate); + ath79_add_sys_clkdev("ddr", ddr_rate); + ath79_add_sys_clkdev("ahb", ahb_rate); + + clk_add_alias("wdt", NULL, "ahb", NULL); + clk_add_alias("uart", NULL, "ref", NULL); } static u32 __init ar934x_get_pll_freq(u32 ref, u32 ref_div, u32 nint, u32 nfrac, @@ -174,12 +220,12 @@ static u32 __init ar934x_get_pll_freq(u32 ref, u32 ref_div, u32 nint, u32 nfrac, u64 t; u32 ret; - t = ath79_ref_clk.rate; + t = ref; t *= nint; do_div(t, ref_div); ret = t; - t = ath79_ref_clk.rate; + t = ref; t *= nfrac; do_div(t, ref_div * frac); ret += t; @@ -190,6 +236,10 @@ static u32 __init ar934x_get_pll_freq(u32 ref, u32 ref_div, u32 nint, u32 nfrac, static void __init ar934x_clocks_init(void) { + unsigned long ref_rate; + unsigned long cpu_rate; + unsigned long ddr_rate; + unsigned long ahb_rate; u32 pll, out_div, ref_div, nint, nfrac, frac, clk_ctrl, postdiv; u32 cpu_pll, ddr_pll; u32 bootstrap; @@ -199,9 +249,9 @@ static void __init ar934x_clocks_init(void) bootstrap = ath79_reset_rr(AR934X_RESET_REG_BOOTSTRAP); if (bootstrap & AR934X_BOOTSTRAP_REF_CLK_40) - ath79_ref_clk.rate = 40 * 1000 * 1000; + ref_rate = 40 * 1000 * 1000; else - ath79_ref_clk.rate = 25 * 1000 * 1000; + ref_rate = 25 * 1000 * 1000; pll = __raw_readl(dpll_base + AR934X_SRIF_CPU_DPLL2_REG); if (pll & AR934X_SRIF_DPLL2_LOCAL_PLL) { @@ -227,7 +277,7 @@ static void __init ar934x_clocks_init(void) frac = 1 << 6; } - cpu_pll = ar934x_get_pll_freq(ath79_ref_clk.rate, ref_div, nint, + cpu_pll = ar934x_get_pll_freq(ref_rate, ref_div, nint, nfrac, frac, out_div); pll = __raw_readl(dpll_base + AR934X_SRIF_DDR_DPLL2_REG); @@ -254,7 +304,7 @@ static void __init ar934x_clocks_init(void) frac = 1 << 10; } - ddr_pll = ar934x_get_pll_freq(ath79_ref_clk.rate, ref_div, nint, + ddr_pll = ar934x_get_pll_freq(ref_rate, ref_div, nint, nfrac, frac, out_div); clk_ctrl = ath79_pll_rr(AR934X_PLL_CPU_DDR_CLK_CTRL_REG); @@ -263,49 +313,58 @@ static void __init ar934x_clocks_init(void) AR934X_PLL_CPU_DDR_CLK_CTRL_CPU_POST_DIV_MASK; if (clk_ctrl & AR934X_PLL_CPU_DDR_CLK_CTRL_CPU_PLL_BYPASS) - ath79_cpu_clk.rate = ath79_ref_clk.rate; + cpu_rate = ref_rate; else if (clk_ctrl & AR934X_PLL_CPU_DDR_CLK_CTRL_CPUCLK_FROM_CPUPLL) - ath79_cpu_clk.rate = cpu_pll / (postdiv + 1); + cpu_rate = cpu_pll / (postdiv + 1); else - ath79_cpu_clk.rate = ddr_pll / (postdiv + 1); + cpu_rate = ddr_pll / (postdiv + 1); postdiv = (clk_ctrl >> AR934X_PLL_CPU_DDR_CLK_CTRL_DDR_POST_DIV_SHIFT) & AR934X_PLL_CPU_DDR_CLK_CTRL_DDR_POST_DIV_MASK; if (clk_ctrl & AR934X_PLL_CPU_DDR_CLK_CTRL_DDR_PLL_BYPASS) - ath79_ddr_clk.rate = ath79_ref_clk.rate; + ddr_rate = ref_rate; else if (clk_ctrl & AR934X_PLL_CPU_DDR_CLK_CTRL_DDRCLK_FROM_DDRPLL) - ath79_ddr_clk.rate = ddr_pll / (postdiv + 1); + ddr_rate = ddr_pll / (postdiv + 1); else - ath79_ddr_clk.rate = cpu_pll / (postdiv + 1); + ddr_rate = cpu_pll / (postdiv + 1); postdiv = (clk_ctrl >> AR934X_PLL_CPU_DDR_CLK_CTRL_AHB_POST_DIV_SHIFT) & AR934X_PLL_CPU_DDR_CLK_CTRL_AHB_POST_DIV_MASK; if (clk_ctrl & AR934X_PLL_CPU_DDR_CLK_CTRL_AHB_PLL_BYPASS) - ath79_ahb_clk.rate = ath79_ref_clk.rate; + ahb_rate = ref_rate; else if (clk_ctrl & AR934X_PLL_CPU_DDR_CLK_CTRL_AHBCLK_FROM_DDRPLL) - ath79_ahb_clk.rate = ddr_pll / (postdiv + 1); + ahb_rate = ddr_pll / (postdiv + 1); else - ath79_ahb_clk.rate = cpu_pll / (postdiv + 1); + ahb_rate = cpu_pll / (postdiv + 1); + + ath79_add_sys_clkdev("ref", ref_rate); + ath79_add_sys_clkdev("cpu", cpu_rate); + ath79_add_sys_clkdev("ddr", ddr_rate); + ath79_add_sys_clkdev("ahb", ahb_rate); - ath79_wdt_clk.rate = ath79_ref_clk.rate; - ath79_uart_clk.rate = ath79_ref_clk.rate; + clk_add_alias("wdt", NULL, "ref", NULL); + clk_add_alias("uart", NULL, "ref", NULL); iounmap(dpll_base); } static void __init qca955x_clocks_init(void) { + unsigned long ref_rate; + unsigned long cpu_rate; + unsigned long ddr_rate; + unsigned long ahb_rate; u32 pll, out_div, ref_div, nint, frac, clk_ctrl, postdiv; u32 cpu_pll, ddr_pll; u32 bootstrap; bootstrap = ath79_reset_rr(QCA955X_RESET_REG_BOOTSTRAP); if (bootstrap & QCA955X_BOOTSTRAP_REF_CLK_40) - ath79_ref_clk.rate = 40 * 1000 * 1000; + ref_rate = 40 * 1000 * 1000; else - ath79_ref_clk.rate = 25 * 1000 * 1000; + ref_rate = 25 * 1000 * 1000; pll = ath79_pll_rr(QCA955X_PLL_CPU_CONFIG_REG); out_div = (pll >> QCA955X_PLL_CPU_CONFIG_OUTDIV_SHIFT) & @@ -317,8 +376,8 @@ static void __init qca955x_clocks_init(void) frac = (pll >> QCA955X_PLL_CPU_CONFIG_NFRAC_SHIFT) & QCA955X_PLL_CPU_CONFIG_NFRAC_MASK; - cpu_pll = nint * ath79_ref_clk.rate / ref_div; - cpu_pll += frac * ath79_ref_clk.rate / (ref_div * (1 << 6)); + cpu_pll = nint * ref_rate / ref_div; + cpu_pll += frac * ref_rate / (ref_div * (1 << 6)); cpu_pll /= (1 << out_div); pll = ath79_pll_rr(QCA955X_PLL_DDR_CONFIG_REG); @@ -331,8 +390,8 @@ static void __init qca955x_clocks_init(void) frac = (pll >> QCA955X_PLL_DDR_CONFIG_NFRAC_SHIFT) & QCA955X_PLL_DDR_CONFIG_NFRAC_MASK; - ddr_pll = nint * ath79_ref_clk.rate / ref_div; - ddr_pll += frac * ath79_ref_clk.rate / (ref_div * (1 << 10)); + ddr_pll = nint * ref_rate / ref_div; + ddr_pll += frac * ref_rate / (ref_div * (1 << 10)); ddr_pll /= (1 << out_div); clk_ctrl = ath79_pll_rr(QCA955X_PLL_CLK_CTRL_REG); @@ -341,34 +400,39 @@ static void __init qca955x_clocks_init(void) QCA955X_PLL_CLK_CTRL_CPU_POST_DIV_MASK; if (clk_ctrl & QCA955X_PLL_CLK_CTRL_CPU_PLL_BYPASS) - ath79_cpu_clk.rate = ath79_ref_clk.rate; + cpu_rate = ref_rate; else if (clk_ctrl & QCA955X_PLL_CLK_CTRL_CPUCLK_FROM_CPUPLL) - ath79_cpu_clk.rate = ddr_pll / (postdiv + 1); + cpu_rate = ddr_pll / (postdiv + 1); else - ath79_cpu_clk.rate = cpu_pll / (postdiv + 1); + cpu_rate = cpu_pll / (postdiv + 1); postdiv = (clk_ctrl >> QCA955X_PLL_CLK_CTRL_DDR_POST_DIV_SHIFT) & QCA955X_PLL_CLK_CTRL_DDR_POST_DIV_MASK; if (clk_ctrl & QCA955X_PLL_CLK_CTRL_DDR_PLL_BYPASS) - ath79_ddr_clk.rate = ath79_ref_clk.rate; + ddr_rate = ref_rate; else if (clk_ctrl & QCA955X_PLL_CLK_CTRL_DDRCLK_FROM_DDRPLL) - ath79_ddr_clk.rate = cpu_pll / (postdiv + 1); + ddr_rate = cpu_pll / (postdiv + 1); else - ath79_ddr_clk.rate = ddr_pll / (postdiv + 1); + ddr_rate = ddr_pll / (postdiv + 1); postdiv = (clk_ctrl >> QCA955X_PLL_CLK_CTRL_AHB_POST_DIV_SHIFT) & QCA955X_PLL_CLK_CTRL_AHB_POST_DIV_MASK; if (clk_ctrl & QCA955X_PLL_CLK_CTRL_AHB_PLL_BYPASS) - ath79_ahb_clk.rate = ath79_ref_clk.rate; + ahb_rate = ref_rate; else if (clk_ctrl & QCA955X_PLL_CLK_CTRL_AHBCLK_FROM_DDRPLL) - ath79_ahb_clk.rate = ddr_pll / (postdiv + 1); + ahb_rate = ddr_pll / (postdiv + 1); else - ath79_ahb_clk.rate = cpu_pll / (postdiv + 1); + ahb_rate = cpu_pll / (postdiv + 1); - ath79_wdt_clk.rate = ath79_ref_clk.rate; - ath79_uart_clk.rate = ath79_ref_clk.rate; + ath79_add_sys_clkdev("ref", ref_rate); + ath79_add_sys_clkdev("cpu", cpu_rate); + ath79_add_sys_clkdev("ddr", ddr_rate); + ath79_add_sys_clkdev("ahb", ahb_rate); + + clk_add_alias("wdt", NULL, "ref", NULL); + clk_add_alias("uart", NULL, "ref", NULL); } void __init ath79_clocks_init(void) @@ -387,46 +451,27 @@ void __init ath79_clocks_init(void) qca955x_clocks_init(); else BUG(); - - pr_info("Clocks: CPU:%lu.%03luMHz, DDR:%lu.%03luMHz, AHB:%lu.%03luMHz, " - "Ref:%lu.%03luMHz", - ath79_cpu_clk.rate / 1000000, - (ath79_cpu_clk.rate / 1000) % 1000, - ath79_ddr_clk.rate / 1000000, - (ath79_ddr_clk.rate / 1000) % 1000, - ath79_ahb_clk.rate / 1000000, - (ath79_ahb_clk.rate / 1000) % 1000, - ath79_ref_clk.rate / 1000000, - (ath79_ref_clk.rate / 1000) % 1000); } -/* - * Linux clock API - */ -struct clk *clk_get(struct device *dev, const char *id) +unsigned long __init +ath79_get_sys_clk_rate(const char *id) { - if (!strcmp(id, "ref")) - return &ath79_ref_clk; - - if (!strcmp(id, "cpu")) - return &ath79_cpu_clk; - - if (!strcmp(id, "ddr")) - return &ath79_ddr_clk; - - if (!strcmp(id, "ahb")) - return &ath79_ahb_clk; + struct clk *clk; + unsigned long rate; - if (!strcmp(id, "wdt")) - return &ath79_wdt_clk; + clk = clk_get(NULL, id); + if (IS_ERR(clk)) + panic("unable to get %s clock, err=%d", id, (int) PTR_ERR(clk)); - if (!strcmp(id, "uart")) - return &ath79_uart_clk; + rate = clk_get_rate(clk); + clk_put(clk); - return ERR_PTR(-ENOENT); + return rate; } -EXPORT_SYMBOL(clk_get); +/* + * Linux clock API + */ int clk_enable(struct clk *clk) { return 0; @@ -443,8 +488,3 @@ unsigned long clk_get_rate(struct clk *clk) return clk->rate; } EXPORT_SYMBOL(clk_get_rate); - -void clk_put(struct clk *clk) -{ -} -EXPORT_SYMBOL(clk_put); diff --git a/arch/mips/ath79/common.h b/arch/mips/ath79/common.h index 561906c2345e..648d2dafbc56 100644 --- a/arch/mips/ath79/common.h +++ b/arch/mips/ath79/common.h @@ -21,6 +21,8 @@ #define ATH79_MEM_SIZE_MAX (128 * 1024 * 1024) void ath79_clocks_init(void); +unsigned long ath79_get_sys_clk_rate(const char *id); + void ath79_ddr_wb_flush(unsigned int reg); void ath79_gpio_function_enable(u32 mask); diff --git a/arch/mips/ath79/dev-common.c b/arch/mips/ath79/dev-common.c index a3a2741d0688..c3b04c929f29 100644 --- a/arch/mips/ath79/dev-common.c +++ b/arch/mips/ath79/dev-common.c @@ -81,21 +81,19 @@ static struct platform_device ar933x_uart_device = { void __init ath79_register_uart(void) { - struct clk *clk; + unsigned long uart_clk_rate; - clk = clk_get(NULL, "uart"); - if (IS_ERR(clk)) - panic("unable to get UART clock, err=%ld", PTR_ERR(clk)); + uart_clk_rate = ath79_get_sys_clk_rate("uart"); if (soc_is_ar71xx() || soc_is_ar724x() || soc_is_ar913x() || soc_is_ar934x() || soc_is_qca955x()) { - ath79_uart_data[0].uartclk = clk_get_rate(clk); + ath79_uart_data[0].uartclk = uart_clk_rate; platform_device_register(&ath79_uart_device); } else if (soc_is_ar933x()) { - ar933x_uart_data.uartclk = clk_get_rate(clk); + ar933x_uart_data.uartclk = uart_clk_rate; platform_device_register(&ar933x_uart_device); } else { BUG(); diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c index 80f4ecd42b0d..64807a4809d0 100644 --- a/arch/mips/ath79/setup.c +++ b/arch/mips/ath79/setup.c @@ -200,7 +200,6 @@ void __init plat_mem_setup(void) ath79_detect_sys_type(); detect_memory_region(0, ATH79_MEM_SIZE_MIN, ATH79_MEM_SIZE_MAX); - ath79_clocks_init(); _machine_restart = ath79_restart; _machine_halt = ath79_halt; @@ -209,13 +208,25 @@ void __init plat_mem_setup(void) void __init plat_time_init(void) { - struct clk *clk; + unsigned long cpu_clk_rate; + unsigned long ahb_clk_rate; + unsigned long ddr_clk_rate; + unsigned long ref_clk_rate; + + ath79_clocks_init(); + + cpu_clk_rate = ath79_get_sys_clk_rate("cpu"); + ahb_clk_rate = ath79_get_sys_clk_rate("ahb"); + ddr_clk_rate = ath79_get_sys_clk_rate("ddr"); + ref_clk_rate = ath79_get_sys_clk_rate("ref"); - clk = clk_get(NULL, "cpu"); - if (IS_ERR(clk)) - panic("unable to get CPU clock, err=%ld", PTR_ERR(clk)); + pr_info("Clocks: CPU:%lu.%03luMHz, DDR:%lu.%03luMHz, AHB:%lu.%03luMHz, Ref:%lu.%03luMHz", + cpu_clk_rate / 1000000, (cpu_clk_rate / 1000) % 1000, + ddr_clk_rate / 1000000, (ddr_clk_rate / 1000) % 1000, + ahb_clk_rate / 1000000, (ahb_clk_rate / 1000) % 1000, + ref_clk_rate / 1000000, (ref_clk_rate / 1000) % 1000); - mips_hpt_frequency = clk_get_rate(clk) / 2; + mips_hpt_frequency = cpu_clk_rate / 2; } static int __init ath79_setup(void) diff --git a/arch/mips/bcm63xx/cpu.c b/arch/mips/bcm63xx/cpu.c index 7e17374a9ae8..b713cd64b087 100644 --- a/arch/mips/bcm63xx/cpu.c +++ b/arch/mips/bcm63xx/cpu.c @@ -306,14 +306,14 @@ void __init bcm63xx_cpu_init(void) switch (c->cputype) { case CPU_BMIPS3300: - if ((read_c0_prid() & 0xff00) != PRID_IMP_BMIPS3300_ALT) + if ((read_c0_prid() & PRID_IMP_MASK) != PRID_IMP_BMIPS3300_ALT) __cpu_name[cpu] = "Broadcom BCM6338"; /* fall-through */ case CPU_BMIPS32: chipid_reg = BCM_6345_PERF_BASE; break; case CPU_BMIPS4350: - switch ((read_c0_prid() & 0xff)) { + switch ((read_c0_prid() & PRID_REV_MASK)) { case 0x04: chipid_reg = BCM_3368_PERF_BASE; break; diff --git a/arch/mips/boot/.gitignore b/arch/mips/boot/.gitignore index f210b09ececc..a73d6e2c4f64 100644 --- a/arch/mips/boot/.gitignore +++ b/arch/mips/boot/.gitignore @@ -4,3 +4,4 @@ vmlinux.* zImage zImage.tmp calc_vmlinuz_load_addr +uImage diff --git a/arch/mips/boot/Makefile b/arch/mips/boot/Makefile index 851261e9fdc0..1466c0026093 100644 --- a/arch/mips/boot/Makefile +++ b/arch/mips/boot/Makefile @@ -40,3 +40,18 @@ quiet_cmd_srec = OBJCOPY $@ cmd_srec = $(OBJCOPY) -S -O srec $(strip-flags) $(VMLINUX) $@ $(obj)/vmlinux.srec: $(VMLINUX) FORCE $(call if_changed,srec) + +UIMAGE_LOADADDR = $(VMLINUX_LOAD_ADDRESS) +UIMAGE_ENTRYADDR = $(VMLINUX_ENTRY_ADDRESS) + +$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE + $(call if_changed,gzip) + +targets += uImage.gz +$(obj)/uImage.gz: $(obj)/vmlinux.bin.gz FORCE + $(call if_changed,uimage,gzip) + +targets += uImage +$(obj)/uImage: $(obj)/uImage.gz FORCE + @ln -sf $(notdir $<) $@ + @echo ' Image $@ is ready' diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile index bb1dbf4abb9d..0048c0897896 100644 --- a/arch/mips/boot/compressed/Makefile +++ b/arch/mips/boot/compressed/Makefile @@ -25,7 +25,7 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) $(KBUILD_CFLAGS) -D__KERNEL__ \ KBUILD_AFLAGS := $(LINUXINCLUDE) $(KBUILD_AFLAGS) -D__ASSEMBLY__ \ -DBOOT_HEAP_SIZE=$(BOOT_HEAP_SIZE) \ - -DKERNEL_ENTRY=0x$(shell $(NM) $(objtree)/$(KBUILD_IMAGE) 2>/dev/null | grep " kernel_entry" | cut -f1 -d \ ) + -DKERNEL_ENTRY=$(VMLINUX_ENTRY_ADDRESS) targets := head.o decompress.o dbg.o uart-16550.o uart-alchemy.o diff --git a/arch/mips/boot/dts/include/dt-bindings b/arch/mips/boot/dts/include/dt-bindings new file mode 120000 index 000000000000..08c00e4972fa --- /dev/null +++ b/arch/mips/boot/dts/include/dt-bindings @@ -0,0 +1 @@ +../../../../../include/dt-bindings
\ No newline at end of file diff --git a/arch/mips/cavium-octeon/csrc-octeon.c b/arch/mips/cavium-octeon/csrc-octeon.c index 02193953eb9e..b752c4ed0b79 100644 --- a/arch/mips/cavium-octeon/csrc-octeon.c +++ b/arch/mips/cavium-octeon/csrc-octeon.c @@ -12,6 +12,7 @@ #include <linux/smp.h> #include <asm/cpu-info.h> +#include <asm/cpu-type.h> #include <asm/time.h> #include <asm/octeon/octeon.h> diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index 9d36774bded1..25fbfae06c1f 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -1776,7 +1776,7 @@ asmlinkage void plat_irq_dispatch(void) #ifdef CONFIG_HOTPLUG_CPU -void fixup_irqs(void) +void octeon_fixup_irqs(void) { irq_cpu_offline(); } diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c index 48b08eb9d9e4..b212ae12e5ac 100644 --- a/arch/mips/cavium-octeon/setup.c +++ b/arch/mips/cavium-octeon/setup.c @@ -8,6 +8,7 @@ * written by Ralf Baechle <ralf@linux-mips.org> */ #include <linux/compiler.h> +#include <linux/vmalloc.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/console.h> @@ -1139,3 +1140,30 @@ static int __init edac_devinit(void) return err; } device_initcall(edac_devinit); + +static void __initdata *octeon_dummy_iospace; + +static int __init octeon_no_pci_init(void) +{ + /* + * Initially assume there is no PCI. The PCI/PCIe platform code will + * later re-initialize these to correct values if they are present. + */ + octeon_dummy_iospace = vzalloc(IO_SPACE_LIMIT); + set_io_port_base((unsigned long)octeon_dummy_iospace); + ioport_resource.start = MAX_RESOURCE; + ioport_resource.end = 0; + return 0; +} +core_initcall(octeon_no_pci_init); + +static int __init octeon_no_pci_release(void) +{ + /* + * Release the allocated memory if a real IO space is there. + */ + if ((unsigned long)octeon_dummy_iospace != mips_io_port_base) + vfree(octeon_dummy_iospace); + return 0; +} +late_initcall(octeon_no_pci_release); diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c index 138cc80c5928..24a2167db778 100644 --- a/arch/mips/cavium-octeon/smp.c +++ b/arch/mips/cavium-octeon/smp.c @@ -255,8 +255,6 @@ static void octeon_cpus_done(void) /* State of each CPU. */ DEFINE_PER_CPU(int, cpu_state); -extern void fixup_irqs(void); - static int octeon_cpu_disable(void) { unsigned int cpu = smp_processor_id(); @@ -267,7 +265,7 @@ static int octeon_cpu_disable(void) set_cpu_online(cpu, false); cpu_clear(cpu, cpu_callin_map); local_irq_disable(); - fixup_irqs(); + octeon_fixup_irqs(); local_irq_enable(); flush_cache_all(); diff --git a/arch/mips/configs/xway_defconfig b/arch/mips/configs/xway_defconfig new file mode 100644 index 000000000000..8987846240f7 --- /dev/null +++ b/arch/mips/configs/xway_defconfig @@ -0,0 +1,159 @@ +CONFIG_LANTIQ=y +CONFIG_XRX200_PHY_FW=y +CONFIG_CPU_MIPS32_R2=y +# CONFIG_COMPACTION is not set +# CONFIG_CROSS_MEMORY_ATTACH is not set +CONFIG_HZ_100=y +# CONFIG_SECCOMP is not set +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_SYSVIPC=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BLK_DEV_INITRD=y +# CONFIG_RD_GZIP is not set +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_KALLSYMS_ALL=y +# CONFIG_AIO is not set +CONFIG_EMBEDDED=y +# CONFIG_VM_EVENT_COUNTERS is not set +# CONFIG_SLUB_DEBUG is not set +# CONFIG_COMPAT_BRK is not set +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +# CONFIG_IOSCHED_CFQ is not set +# CONFIG_COREDUMP is not set +# CONFIG_SUSPEND is not set +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IP_MROUTE=y +CONFIG_IP_MROUTE_MULTIPLE_TABLES=y +CONFIG_ARPD=y +CONFIG_SYN_COOKIES=y +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_LRO is not set +# CONFIG_INET_DIAG is not set +CONFIG_TCP_CONG_ADVANCED=y +# CONFIG_TCP_CONG_BIC is not set +# CONFIG_TCP_CONG_WESTWOOD is not set +# CONFIG_TCP_CONG_HTCP is not set +# CONFIG_IPV6 is not set +CONFIG_NETFILTER=y +# CONFIG_BRIDGE_NETFILTER is not set +CONFIG_NF_CONNTRACK=m +CONFIG_NF_CONNTRACK_FTP=m +CONFIG_NF_CONNTRACK_IRC=m +CONFIG_NETFILTER_XT_TARGET_CT=m +CONFIG_NETFILTER_XT_TARGET_LOG=m +CONFIG_NETFILTER_XT_TARGET_TCPMSS=m +CONFIG_NETFILTER_XT_MATCH_COMMENT=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NETFILTER_XT_MATCH_LIMIT=m +CONFIG_NETFILTER_XT_MATCH_MAC=m +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m +CONFIG_NETFILTER_XT_MATCH_STATE=m +CONFIG_NF_CONNTRACK_IPV4=m +# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_RAW=m +CONFIG_BRIDGE=y +# CONFIG_BRIDGE_IGMP_SNOOPING is not set +CONFIG_VLAN_8021Q=y +CONFIG_NET_SCHED=y +CONFIG_HAMRADIO=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +# CONFIG_FIRMWARE_IN_KERNEL is not set +CONFIG_MTD=y +CONFIG_MTD_CMDLINE_PARTS=y +CONFIG_MTD_BLOCK=y +CONFIG_MTD_CFI=y +CONFIG_MTD_CFI_AMDSTD=y +CONFIG_MTD_COMPLEX_MAPPINGS=y +CONFIG_MTD_PHYSMAP=y +CONFIG_MTD_PHYSMAP_OF=y +CONFIG_MTD_LANTIQ=y +CONFIG_EEPROM_93CX6=m +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_NETDEVICES=y +CONFIG_LANTIQ_ETOP=y +# CONFIG_NET_VENDOR_WIZNET is not set +CONFIG_PHYLIB=y +CONFIG_PPP=m +CONFIG_PPP_FILTER=y +CONFIG_PPP_MULTILINK=y +CONFIG_PPPOE=m +CONFIG_PPP_ASYNC=m +CONFIG_ISDN=y +CONFIG_INPUT=m +CONFIG_INPUT_POLLDEV=m +# CONFIG_INPUT_MOUSEDEV is not set +# CONFIG_KEYBOARD_ATKBD is not set +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_MISC=y +# CONFIG_SERIO is not set +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_RUNTIME_UARTS=2 +CONFIG_SERIAL_OF_PLATFORM=y +CONFIG_SPI=y +CONFIG_GPIO_MM_LANTIQ=y +CONFIG_GPIO_STP_XWAY=y +# CONFIG_HWMON is not set +CONFIG_WATCHDOG=y +# CONFIG_HID is not set +# CONFIG_USB_HID is not set +CONFIG_USB=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y +CONFIG_USB_STORAGE=y +CONFIG_USB_STORAGE_DEBUG=y +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y +CONFIG_LEDS_TRIGGERS=y +CONFIG_LEDS_TRIGGER_TIMER=y +CONFIG_LEDS_TRIGGER_DEFAULT_ON=y +CONFIG_STAGING=y +# CONFIG_IOMMU_SUPPORT is not set +# CONFIG_DNOTIFY is not set +# CONFIG_PROC_PAGE_MONITOR is not set +CONFIG_TMPFS=y +CONFIG_TMPFS_XATTR=y +CONFIG_JFFS2_FS=y +CONFIG_JFFS2_SUMMARY=y +CONFIG_JFFS2_FS_XATTR=y +# CONFIG_JFFS2_FS_POSIX_ACL is not set +# CONFIG_JFFS2_FS_SECURITY is not set +CONFIG_JFFS2_COMPRESSION_OPTIONS=y +# CONFIG_JFFS2_ZLIB is not set +CONFIG_SQUASHFS=y +# CONFIG_SQUASHFS_ZLIB is not set +CONFIG_SQUASHFS_XZ=y +CONFIG_PRINTK_TIME=y +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_STRIP_ASM_SYMS=y +CONFIG_DEBUG_FS=y +CONFIG_MAGIC_SYSRQ=y +# CONFIG_SCHED_DEBUG is not set +# CONFIG_FTRACE is not set +CONFIG_CMDLINE_BOOL=y +CONFIG_CRYPTO_MANAGER=m +CONFIG_CRYPTO_ARC4=m +# CONFIG_CRYPTO_ANSI_CPRNG is not set +CONFIG_CRC_ITU_T=m +CONFIG_CRC32_SARWATE=y +CONFIG_AVERAGE=y diff --git a/arch/mips/dec/ioasic-irq.c b/arch/mips/dec/ioasic-irq.c index 824e08c73798..4b3e3a4375a6 100644 --- a/arch/mips/dec/ioasic-irq.c +++ b/arch/mips/dec/ioasic-irq.c @@ -51,6 +51,14 @@ static struct irq_chip ioasic_irq_type = { .irq_unmask = unmask_ioasic_irq, }; +void clear_ioasic_dma_irq(unsigned int irq) +{ + u32 sir; + + sir = ~(1 << (irq - ioasic_irq_base)); + ioasic_write(IO_REG_SIR, sir); +} + static struct irq_chip ioasic_dma_irq_type = { .name = "IO-ASIC-DMA", .irq_ack = ack_ioasic_irq, diff --git a/arch/mips/dec/prom/init.c b/arch/mips/dec/prom/init.c index ab169046e442..468f665de7bb 100644 --- a/arch/mips/dec/prom/init.c +++ b/arch/mips/dec/prom/init.c @@ -13,6 +13,7 @@ #include <asm/bootinfo.h> #include <asm/cpu.h> +#include <asm/cpu-type.h> #include <asm/processor.h> #include <asm/dec/prom.h> diff --git a/arch/mips/dec/time.c b/arch/mips/dec/time.c index ea57f39e6736..1914e56f0d96 100644 --- a/arch/mips/dec/time.c +++ b/arch/mips/dec/time.c @@ -125,13 +125,18 @@ int rtc_mips_set_mmss(unsigned long nowtime) void __init plat_time_init(void) { + int ioasic_clock = 0; u32 start, end; - int i = HZ / 10; + int i = HZ / 8; /* Set up the rate of periodic DS1287 interrupts. */ ds1287_set_base_clock(HZ); + /* On some I/O ASIC systems we have the I/O ASIC's counter. */ + if (IOASIC) + ioasic_clock = dec_ioasic_clocksource_init() == 0; if (cpu_has_counter) { + ds1287_timer_state(); while (!ds1287_timer_state()) ; @@ -143,12 +148,24 @@ void __init plat_time_init(void) end = read_c0_count(); - mips_hpt_frequency = (end - start) * 10; + mips_hpt_frequency = (end - start) * 8; printk(KERN_INFO "MIPS counter frequency %dHz\n", mips_hpt_frequency); - } else if (IOASIC) - /* For pre-R4k systems we use the I/O ASIC's counter. */ - dec_ioasic_clocksource_init(); + + /* + * All R4k DECstations suffer from the CP0 Count erratum, + * so we can't use the timer as a clock source, and a clock + * event both at a time. An accurate wall clock is more + * important than a high-precision interval timer so only + * use the timer as a clock source, and not a clock event + * if there's no I/O ASIC counter available to serve as a + * clock source. + */ + if (!ioasic_clock) { + init_r4k_clocksource(); + mips_hpt_frequency = 0; + } + } ds1287_clockevent_init(dec_interrupt[DEC_IRQ_RTC]); } diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 9b54b7a403d4..454ddf9bb76f 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -1,2 +1,15 @@ # MIPS headers +generic-y += cputime.h +generic-y += current.h +generic-y += emergency-restart.h +generic-y += local64.h +generic-y += mutex.h +generic-y += parport.h +generic-y += percpu.h +generic-y += scatterlist.h +generic-y += sections.h +generic-y += segment.h +generic-y += serial.h generic-y += trace_clock.h +generic-y += ucontext.h +generic-y += xor.h diff --git a/arch/mips/include/asm/bmips.h b/arch/mips/include/asm/bmips.h index 552a65a0cf2b..27bd060d716e 100644 --- a/arch/mips/include/asm/bmips.h +++ b/arch/mips/include/asm/bmips.h @@ -65,44 +65,33 @@ static inline unsigned long bmips_read_zscm_reg(unsigned int offset) { unsigned long ret; - __asm__ __volatile__( - ".set push\n" - ".set noreorder\n" - "cache %1, 0(%2)\n" - "sync\n" - "_ssnop\n" - "_ssnop\n" - "_ssnop\n" - "_ssnop\n" - "_ssnop\n" - "_ssnop\n" - "_ssnop\n" - "mfc0 %0, $28, 3\n" - "_ssnop\n" - ".set pop\n" - : "=&r" (ret) - : "i" (Index_Load_Tag_S), "r" (ZSCM_REG_BASE + offset) - : "memory"); + barrier(); + cache_op(Index_Load_Tag_S, ZSCM_REG_BASE + offset); + __sync(); + _ssnop(); + _ssnop(); + _ssnop(); + _ssnop(); + _ssnop(); + _ssnop(); + _ssnop(); + ret = read_c0_ddatalo(); + _ssnop(); + return ret; } static inline void bmips_write_zscm_reg(unsigned int offset, unsigned long data) { - __asm__ __volatile__( - ".set push\n" - ".set noreorder\n" - "mtc0 %0, $28, 3\n" - "_ssnop\n" - "_ssnop\n" - "_ssnop\n" - "cache %1, 0(%2)\n" - "_ssnop\n" - "_ssnop\n" - "_ssnop\n" - : /* no outputs */ - : "r" (data), - "i" (Index_Store_Tag_S), "r" (ZSCM_REG_BASE + offset) - : "memory"); + write_c0_ddatalo(data); + _ssnop(); + _ssnop(); + _ssnop(); + cache_op(Index_Store_Tag_S, ZSCM_REG_BASE + offset); + _ssnop(); + _ssnop(); + _ssnop(); + barrier(); } #endif /* !defined(__ASSEMBLY__) */ diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index fa44f3ec5302..51680d15ca8e 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -13,12 +13,6 @@ #include <asm/cpu-info.h> #include <cpu-feature-overrides.h> -#ifndef current_cpu_type -#define current_cpu_type() current_cpu_data.cputype -#endif - -#define boot_cpu_type() cpu_data[0].cputype - /* * SMP assumption: Options of CPU 0 are a superset of all processors. * This is true for all known MIPS systems. diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h index 41401d8eb7d1..21c8e29c8f91 100644 --- a/arch/mips/include/asm/cpu-info.h +++ b/arch/mips/include/asm/cpu-info.h @@ -84,6 +84,7 @@ struct cpuinfo_mips { extern struct cpuinfo_mips cpu_data[]; #define current_cpu_data cpu_data[smp_processor_id()] #define raw_current_cpu_data cpu_data[raw_smp_processor_id()] +#define boot_cpu_data cpu_data[0] extern void cpu_probe(void); extern void cpu_report(void); diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h new file mode 100644 index 000000000000..4a402cc60c03 --- /dev/null +++ b/arch/mips/include/asm/cpu-type.h @@ -0,0 +1,203 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2003, 2004 Ralf Baechle + * Copyright (C) 2004 Maciej W. Rozycki + */ +#ifndef __ASM_CPU_TYPE_H +#define __ASM_CPU_TYPE_H + +#include <linux/smp.h> +#include <linux/compiler.h> + +static inline int __pure __get_cpu_type(const int cpu_type) +{ + switch (cpu_type) { +#if defined(CONFIG_SYS_HAS_CPU_LOONGSON2E) || \ + defined(CONFIG_SYS_HAS_CPU_LOONGSON2F) + case CPU_LOONGSON2: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_LOONGSON1B + case CPU_LOONGSON1: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_MIPS32_R1 + case CPU_4KC: + case CPU_ALCHEMY: + case CPU_BMIPS3300: + case CPU_BMIPS4350: + case CPU_PR4450: + case CPU_BMIPS32: + case CPU_JZRISC: +#endif + +#if defined(CONFIG_SYS_HAS_CPU_MIPS32_R1) || \ + defined(CONFIG_SYS_HAS_CPU_MIPS32_R2) + case CPU_4KEC: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_MIPS32_R2 + case CPU_4KSC: + case CPU_24K: + case CPU_34K: + case CPU_1004K: + case CPU_74K: + case CPU_M14KC: + case CPU_M14KEC: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_MIPS64_R1 + case CPU_5KC: + case CPU_5KE: + case CPU_20KC: + case CPU_25KF: + case CPU_SB1: + case CPU_SB1A: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_MIPS64_R2 + /* + * All MIPS64 R2 processors have their own special symbols. That is, + * there currently is no pure R2 core + */ +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R3000 + case CPU_R2000: + case CPU_R3000: + case CPU_R3000A: + case CPU_R3041: + case CPU_R3051: + case CPU_R3052: + case CPU_R3081: + case CPU_R3081E: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_TX39XX + case CPU_TX3912: + case CPU_TX3922: + case CPU_TX3927: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_VR41XX + case CPU_VR41XX: + case CPU_VR4111: + case CPU_VR4121: + case CPU_VR4122: + case CPU_VR4131: + case CPU_VR4133: + case CPU_VR4181: + case CPU_VR4181A: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R4300 + case CPU_R4300: + case CPU_R4310: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R4X00 + case CPU_R4000PC: + case CPU_R4000SC: + case CPU_R4000MC: + case CPU_R4200: + case CPU_R4400PC: + case CPU_R4400SC: + case CPU_R4400MC: + case CPU_R4600: + case CPU_R4700: + case CPU_R4640: + case CPU_R4650: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_TX49XX + case CPU_TX49XX: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R5000 + case CPU_R5000: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R5432 + case CPU_R5432: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R5500 + case CPU_R5500: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R6000 + case CPU_R6000: + case CPU_R6000A: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_NEVADA + case CPU_NEVADA: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R8000 + case CPU_R8000: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_R10000 + case CPU_R10000: + case CPU_R12000: + case CPU_R14000: +#endif +#ifdef CONFIG_SYS_HAS_CPU_RM7000 + case CPU_RM7000: + case CPU_SR71000: +#endif +#ifdef CONFIG_SYS_HAS_CPU_RM9000 + case CPU_RM9000: +#endif +#ifdef CONFIG_SYS_HAS_CPU_SB1 + case CPU_SB1: + case CPU_SB1A: +#endif +#ifdef CONFIG_SYS_HAS_CPU_CAVIUM_OCTEON + case CPU_CAVIUM_OCTEON: + case CPU_CAVIUM_OCTEON_PLUS: + case CPU_CAVIUM_OCTEON2: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_BMIPS4380 + case CPU_BMIPS4380: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_BMIPS5000 + case CPU_BMIPS5000: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_XLP + case CPU_XLP: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_XLR + case CPU_XLR: +#endif + break; + default: + unreachable(); + } + + return cpu_type; +} + +static inline int __pure current_cpu_type(void) +{ + const int cpu_type = current_cpu_data.cputype; + + return __get_cpu_type(cpu_type); +} + +static inline int __pure boot_cpu_type(void) +{ + const int cpu_type = cpu_data[0].cputype; + + return __get_cpu_type(cpu_type); +} + +#endif /* __ASM_CPU_TYPE_H */ diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h index 632bbe5a79ea..d2035e16502a 100644 --- a/arch/mips/include/asm/cpu.h +++ b/arch/mips/include/asm/cpu.h @@ -3,15 +3,14 @@ * various MIPS cpu types. * * Copyright (C) 1996 David S. Miller (davem@davemloft.net) - * Copyright (C) 2004 Maciej W. Rozycki + * Copyright (C) 2004, 2013 Maciej W. Rozycki */ #ifndef _ASM_CPU_H #define _ASM_CPU_H -/* Assigned Company values for bits 23:16 of the PRId Register - (CP0 register 15, select 0). As of the MIPS32 and MIPS64 specs from - MTI, the PRId register is defined in this (backwards compatible) - way: +/* + As of the MIPS32 and MIPS64 specs from MTI, the PRId register (CP0 + register 15, select 0) is defined in this (backwards compatible) way: +----------------+----------------+----------------+----------------+ | Company Options| Company ID | Processor ID | Revision | @@ -23,6 +22,14 @@ spec. */ +#define PRID_OPT_MASK 0xff000000 + +/* + * Assigned Company values for bits 23:16 of the PRId register. + */ + +#define PRID_COMP_MASK 0xff0000 + #define PRID_COMP_LEGACY 0x000000 #define PRID_COMP_MIPS 0x010000 #define PRID_COMP_BROADCOM 0x020000 @@ -38,10 +45,17 @@ #define PRID_COMP_INGENIC 0xd00000 /* - * Assigned values for the product ID register. In order to detect a - * certain CPU type exactly eventually additional registers may need to - * be examined. These are valid when 23:16 == PRID_COMP_LEGACY + * Assigned Processor ID (implementation) values for bits 15:8 of the PRId + * register. In order to detect a certain CPU type exactly eventually + * additional registers may need to be examined. */ + +#define PRID_IMP_MASK 0xff00 + +/* + * These are valid when 23:16 == PRID_COMP_LEGACY + */ + #define PRID_IMP_R2000 0x0100 #define PRID_IMP_AU1_REV1 0x0100 #define PRID_IMP_AU1_REV2 0x0200 @@ -141,6 +155,9 @@ #define PRID_IMP_CAVIUM_CN68XX 0x9100 #define PRID_IMP_CAVIUM_CN66XX 0x9200 #define PRID_IMP_CAVIUM_CN61XX 0x9300 +#define PRID_IMP_CAVIUM_CNF71XX 0x9400 +#define PRID_IMP_CAVIUM_CN78XX 0x9500 +#define PRID_IMP_CAVIUM_CN70XX 0x9600 /* * These are the PRID's for when 23:16 == PRID_COMP_INGENIC @@ -176,13 +193,18 @@ #define PRID_IMP_NETLOGIC_XLP8XX 0x1000 #define PRID_IMP_NETLOGIC_XLP3XX 0x1100 +#define PRID_IMP_NETLOGIC_XLP2XX 0x1200 /* - * Definitions for 7:0 on legacy processors + * Particular Revision values for bits 7:0 of the PRId register. */ #define PRID_REV_MASK 0x00ff +/* + * Definitions for 7:0 on legacy processors + */ + #define PRID_REV_TX4927 0x0022 #define PRID_REV_TX4937 0x0030 #define PRID_REV_R4400 0x0040 @@ -223,6 +245,8 @@ * 31 16 15 8 7 0 */ +#define FPIR_IMP_MASK 0xff00 + #define FPIR_IMP_NONE 0x0000 enum cpu_type_enum { @@ -272,7 +296,7 @@ enum cpu_type_enum { */ CPU_5KC, CPU_5KE, CPU_20KC, CPU_25KF, CPU_SB1, CPU_SB1A, CPU_LOONGSON2, CPU_CAVIUM_OCTEON, CPU_CAVIUM_OCTEON_PLUS, CPU_CAVIUM_OCTEON2, - CPU_XLR, CPU_XLP, + CPU_CAVIUM_OCTEON3, CPU_XLR, CPU_XLP, CPU_LAST }; diff --git a/arch/mips/include/asm/cputime.h b/arch/mips/include/asm/cputime.h deleted file mode 100644 index c00eacbdd979..000000000000 --- a/arch/mips/include/asm/cputime.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __MIPS_CPUTIME_H -#define __MIPS_CPUTIME_H - -#include <asm-generic/cputime.h> - -#endif /* __MIPS_CPUTIME_H */ diff --git a/arch/mips/include/asm/current.h b/arch/mips/include/asm/current.h deleted file mode 100644 index 4c51401b5537..000000000000 --- a/arch/mips/include/asm/current.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/current.h> diff --git a/arch/mips/include/asm/dec/ioasic.h b/arch/mips/include/asm/dec/ioasic.h index 98badd6bf22d..a6e505a0e44b 100644 --- a/arch/mips/include/asm/dec/ioasic.h +++ b/arch/mips/include/asm/dec/ioasic.h @@ -31,8 +31,10 @@ static inline u32 ioasic_read(unsigned int reg) return ioasic_base[reg / 4]; } +extern void clear_ioasic_dma_irq(unsigned int irq); + extern void init_ioasic_irqs(int base); -extern void dec_ioasic_clocksource_init(void); +extern int dec_ioasic_clocksource_init(void); #endif /* __ASM_DEC_IOASIC_H */ diff --git a/arch/mips/include/asm/emergency-restart.h b/arch/mips/include/asm/emergency-restart.h deleted file mode 100644 index 108d8c48e42e..000000000000 --- a/arch/mips/include/asm/emergency-restart.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_EMERGENCY_RESTART_H -#define _ASM_EMERGENCY_RESTART_H - -#include <asm-generic/emergency-restart.h> - -#endif /* _ASM_EMERGENCY_RESTART_H */ diff --git a/arch/mips/include/asm/local64.h b/arch/mips/include/asm/local64.h deleted file mode 100644 index 36c93b5cc239..000000000000 --- a/arch/mips/include/asm/local64.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/local64.h> diff --git a/arch/mips/include/asm/mach-ath79/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ath79/cpu-feature-overrides.h index ddb947e9221f..0089a740e5ae 100644 --- a/arch/mips/include/asm/mach-ath79/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ath79/cpu-feature-overrides.h @@ -42,8 +42,6 @@ #define cpu_has_mips64r1 0 #define cpu_has_mips64r2 0 -#define cpu_has_dsp 0 -#define cpu_has_dsp2 0 #define cpu_has_mipsmt 0 #define cpu_has_64bits 0 diff --git a/arch/mips/include/asm/mach-au1x00/au1000.h b/arch/mips/include/asm/mach-au1x00/au1000.h index 3e11a468cdf8..54f9e84db8ac 100644 --- a/arch/mips/include/asm/mach-au1x00/au1000.h +++ b/arch/mips/include/asm/mach-au1x00/au1000.h @@ -43,6 +43,8 @@ #include <linux/io.h> #include <linux/irq.h> +#include <asm/cpu.h> + /* cpu pipeline flush */ void static inline au_sync(void) { @@ -140,7 +142,7 @@ static inline int au1xxx_cpu_needs_config_od(void) static inline int alchemy_get_cputype(void) { - switch (read_c0_prid() & 0xffff0000) { + switch (read_c0_prid() & (PRID_OPT_MASK | PRID_COMP_MASK)) { case 0x00030000: return ALCHEMY_CPU_AU1000; break; diff --git a/arch/mips/include/asm/mach-cavium-octeon/gpio.h b/arch/mips/include/asm/mach-cavium-octeon/gpio.h new file mode 100644 index 000000000000..34e9f7aabab4 --- /dev/null +++ b/arch/mips/include/asm/mach-cavium-octeon/gpio.h @@ -0,0 +1,21 @@ +#ifndef __ASM_MACH_CAVIUM_OCTEON_GPIO_H +#define __ASM_MACH_CAVIUM_OCTEON_GPIO_H + +#ifdef CONFIG_GPIOLIB +#define gpio_get_value __gpio_get_value +#define gpio_set_value __gpio_set_value +#define gpio_cansleep __gpio_cansleep +#else +int gpio_request(unsigned gpio, const char *label); +void gpio_free(unsigned gpio); +int gpio_direction_input(unsigned gpio); +int gpio_direction_output(unsigned gpio, int value); +int gpio_get_value(unsigned gpio); +void gpio_set_value(unsigned gpio, int value); +#endif + +#include <asm-generic/gpio.h> + +#define gpio_to_irq __gpio_to_irq + +#endif /* __ASM_MACH_GENERIC_GPIO_H */ diff --git a/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h index f4caacd25552..1bcb6421205e 100644 --- a/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ip22/cpu-feature-overrides.h @@ -8,6 +8,8 @@ #ifndef __ASM_MACH_IP22_CPU_FEATURE_OVERRIDES_H #define __ASM_MACH_IP22_CPU_FEATURE_OVERRIDES_H +#include <asm/cpu.h> + /* * IP22 with a variety of processors so we can't use defaults for everything. */ diff --git a/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h index 1d2b6ff60d33..d6111aa2e886 100644 --- a/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h @@ -8,6 +8,8 @@ #ifndef __ASM_MACH_IP27_CPU_FEATURE_OVERRIDES_H #define __ASM_MACH_IP27_CPU_FEATURE_OVERRIDES_H +#include <asm/cpu.h> + /* * IP27 only comes with R10000 family processors all using the same config */ diff --git a/arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h index 65e9c856390d..4cec06d133db 100644 --- a/arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ip28/cpu-feature-overrides.h @@ -9,6 +9,8 @@ #ifndef __ASM_MACH_IP28_CPU_FEATURE_OVERRIDES_H #define __ASM_MACH_IP28_CPU_FEATURE_OVERRIDES_H +#include <asm/cpu.h> + /* * IP28 only comes with R10000 family processors all using the same config */ diff --git a/arch/mips/include/asm/mach-lantiq/falcon/cpu-feature-overrides.h b/arch/mips/include/asm/mach-lantiq/falcon/cpu-feature-overrides.h new file mode 100644 index 000000000000..096a10072430 --- /dev/null +++ b/arch/mips/include/asm/mach-lantiq/falcon/cpu-feature-overrides.h @@ -0,0 +1,58 @@ +/* + * Lantiq FALCON specific CPU feature overrides + * + * Copyright (C) 2013 Thomas Langer, Lantiq Deutschland + * + * This file was derived from: include/asm-mips/cpu-features.h + * Copyright (C) 2003, 2004 Ralf Baechle + * Copyright (C) 2004 Maciej W. Rozycki + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + */ +#ifndef __ASM_MACH_FALCON_CPU_FEATURE_OVERRIDES_H +#define __ASM_MACH_FALCON_CPU_FEATURE_OVERRIDES_H + +#define cpu_has_tlb 1 +#define cpu_has_4kex 1 +#define cpu_has_3k_cache 0 +#define cpu_has_4k_cache 1 +#define cpu_has_tx39_cache 0 +#define cpu_has_sb1_cache 0 +#define cpu_has_fpu 0 +#define cpu_has_32fpr 0 +#define cpu_has_counter 1 +#define cpu_has_watch 1 +#define cpu_has_divec 1 + +#define cpu_has_prefetch 1 +#define cpu_has_ejtag 1 +#define cpu_has_llsc 1 + +#define cpu_has_mips16 1 +#define cpu_has_mdmx 0 +#define cpu_has_mips3d 0 +#define cpu_has_smartmips 0 + +#define cpu_has_mips32r1 1 +#define cpu_has_mips32r2 1 +#define cpu_has_mips64r1 0 +#define cpu_has_mips64r2 0 + +#define cpu_has_dsp 1 +#define cpu_has_mipsmt 1 + +#define cpu_has_vint 1 +#define cpu_has_veic 1 + +#define cpu_has_64bits 0 +#define cpu_has_64bit_zero_reg 0 +#define cpu_has_64bit_gp_regs 0 +#define cpu_has_64bit_addresses 0 + +#define cpu_dcache_line_size() 32 +#define cpu_icache_line_size() 32 + +#endif /* __ASM_MACH_FALCON_CPU_FEATURE_OVERRIDES_H */ diff --git a/arch/mips/include/asm/mach-ralink/mt7620.h b/arch/mips/include/asm/mach-ralink/mt7620.h index 9809972ea882..6f9b24f51157 100644 --- a/arch/mips/include/asm/mach-ralink/mt7620.h +++ b/arch/mips/include/asm/mach-ralink/mt7620.h @@ -20,6 +20,8 @@ #define SYSC_REG_CHIP_REV 0x0c #define SYSC_REG_SYSTEM_CONFIG0 0x10 #define SYSC_REG_SYSTEM_CONFIG1 0x14 +#define SYSC_REG_CLKCFG0 0x2c +#define SYSC_REG_CPU_SYS_CLKCFG 0x3c #define SYSC_REG_CPLL_CONFIG0 0x54 #define SYSC_REG_CPLL_CONFIG1 0x58 @@ -29,20 +31,42 @@ #define MT7620A_CHIP_NAME0 0x3637544d #define MT7620A_CHIP_NAME1 0x20203032 +#define SYSCFG0_XTAL_FREQ_SEL BIT(6) + #define CHIP_REV_PKG_MASK 0x1 #define CHIP_REV_PKG_SHIFT 16 #define CHIP_REV_VER_MASK 0xf #define CHIP_REV_VER_SHIFT 8 #define CHIP_REV_ECO_MASK 0xf -#define CPLL_SW_CONFIG_SHIFT 31 -#define CPLL_SW_CONFIG_MASK 0x1 -#define CPLL_CPU_CLK_SHIFT 24 -#define CPLL_CPU_CLK_MASK 0x1 -#define CPLL_MULT_RATIO_SHIFT 16 -#define CPLL_MULT_RATIO 0x7 -#define CPLL_DIV_RATIO_SHIFT 10 -#define CPLL_DIV_RATIO 0x3 +#define CLKCFG0_PERI_CLK_SEL BIT(4) + +#define CPU_SYS_CLKCFG_OCP_RATIO_SHIFT 16 +#define CPU_SYS_CLKCFG_OCP_RATIO_MASK 0xf +#define CPU_SYS_CLKCFG_OCP_RATIO_1 0 /* 1:1 (Reserved) */ +#define CPU_SYS_CLKCFG_OCP_RATIO_1_5 1 /* 1:1.5 (Reserved) */ +#define CPU_SYS_CLKCFG_OCP_RATIO_2 2 /* 1:2 */ +#define CPU_SYS_CLKCFG_OCP_RATIO_2_5 3 /* 1:2.5 (Reserved) */ +#define CPU_SYS_CLKCFG_OCP_RATIO_3 4 /* 1:3 */ +#define CPU_SYS_CLKCFG_OCP_RATIO_3_5 5 /* 1:3.5 (Reserved) */ +#define CPU_SYS_CLKCFG_OCP_RATIO_4 6 /* 1:4 */ +#define CPU_SYS_CLKCFG_OCP_RATIO_5 7 /* 1:5 */ +#define CPU_SYS_CLKCFG_OCP_RATIO_10 8 /* 1:10 */ +#define CPU_SYS_CLKCFG_CPU_FDIV_SHIFT 8 +#define CPU_SYS_CLKCFG_CPU_FDIV_MASK 0x1f +#define CPU_SYS_CLKCFG_CPU_FFRAC_SHIFT 0 +#define CPU_SYS_CLKCFG_CPU_FFRAC_MASK 0x1f + +#define CPLL_CFG0_SW_CFG BIT(31) +#define CPLL_CFG0_PLL_MULT_RATIO_SHIFT 16 +#define CPLL_CFG0_PLL_MULT_RATIO_MASK 0x7 +#define CPLL_CFG0_LC_CURFCK BIT(15) +#define CPLL_CFG0_BYPASS_REF_CLK BIT(14) +#define CPLL_CFG0_PLL_DIV_RATIO_SHIFT 10 +#define CPLL_CFG0_PLL_DIV_RATIO_MASK 0x3 + +#define CPLL_CFG1_CPU_AUX1 BIT(25) +#define CPLL_CFG1_CPU_AUX0 BIT(24) #define SYSCFG0_DRAM_TYPE_MASK 0x3 #define SYSCFG0_DRAM_TYPE_SHIFT 4 diff --git a/arch/mips/include/asm/mach-ralink/mt7620/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/mt7620/cpu-feature-overrides.h new file mode 100644 index 000000000000..f7bb8cfc5eb1 --- /dev/null +++ b/arch/mips/include/asm/mach-ralink/mt7620/cpu-feature-overrides.h @@ -0,0 +1,57 @@ +/* + * Ralink MT7620 specific CPU feature overrides + * + * Copyright (C) 2008-2009 Gabor Juhos <juhosg@openwrt.org> + * Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org> + * + * This file was derived from: include/asm-mips/cpu-features.h + * Copyright (C) 2003, 2004 Ralf Baechle + * Copyright (C) 2004 Maciej W. Rozycki + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + */ +#ifndef _MT7620_CPU_FEATURE_OVERRIDES_H +#define _MT7620_CPU_FEATURE_OVERRIDES_H + +#define cpu_has_tlb 1 +#define cpu_has_4kex 1 +#define cpu_has_3k_cache 0 +#define cpu_has_4k_cache 1 +#define cpu_has_tx39_cache 0 +#define cpu_has_sb1_cache 0 +#define cpu_has_fpu 0 +#define cpu_has_32fpr 0 +#define cpu_has_counter 1 +#define cpu_has_watch 1 +#define cpu_has_divec 1 + +#define cpu_has_prefetch 1 +#define cpu_has_ejtag 1 +#define cpu_has_llsc 1 + +#define cpu_has_mips16 1 +#define cpu_has_mdmx 0 +#define cpu_has_mips3d 0 +#define cpu_has_smartmips 0 + +#define cpu_has_mips32r1 1 +#define cpu_has_mips32r2 1 +#define cpu_has_mips64r1 0 +#define cpu_has_mips64r2 0 + +#define cpu_has_dsp 1 +#define cpu_has_dsp2 0 +#define cpu_has_mipsmt 0 + +#define cpu_has_64bits 0 +#define cpu_has_64bit_zero_reg 0 +#define cpu_has_64bit_gp_regs 0 +#define cpu_has_64bit_addresses 0 + +#define cpu_dcache_line_size() 32 +#define cpu_icache_line_size() 32 + +#endif /* _MT7620_CPU_FEATURE_OVERRIDES_H */ diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index fed1c3e9b486..e0331414c7d6 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -603,6 +603,13 @@ #define MIPS_CONF4_MMUEXTDEF (_ULCAST_(3) << 14) #define MIPS_CONF4_MMUEXTDEF_MMUSIZEEXT (_ULCAST_(1) << 14) +#define MIPS_CONF5_NF (_ULCAST_(1) << 0) +#define MIPS_CONF5_UFR (_ULCAST_(1) << 2) +#define MIPS_CONF5_MSAEN (_ULCAST_(1) << 27) +#define MIPS_CONF5_EVA (_ULCAST_(1) << 28) +#define MIPS_CONF5_CV (_ULCAST_(1) << 29) +#define MIPS_CONF5_K (_ULCAST_(1) << 30) + #define MIPS_CONF6_SYND (_ULCAST_(1) << 13) #define MIPS_CONF7_WII (_ULCAST_(1) << 31) diff --git a/arch/mips/include/asm/mutex.h b/arch/mips/include/asm/mutex.h deleted file mode 100644 index 458c1f7fbc18..000000000000 --- a/arch/mips/include/asm/mutex.h +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Pull in the generic implementation for the mutex fastpath. - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ - -#include <asm-generic/mutex-dec.h> diff --git a/arch/mips/include/asm/netlogic/xlp-hal/bridge.h b/arch/mips/include/asm/netlogic/xlp-hal/bridge.h index 790f0f1e55c6..4e8eacb9588a 100644 --- a/arch/mips/include/asm/netlogic/xlp-hal/bridge.h +++ b/arch/mips/include/asm/netlogic/xlp-hal/bridge.h @@ -88,6 +88,7 @@ #define BRIDGE_DRAM_LIMIT6 0x22 #define BRIDGE_DRAM_LIMIT7 0x23 +#define BRIDGE_DRAM_NODE_TRANSLN(i) (0x24 + (i)) #define BRIDGE_DRAM_NODE_TRANSLN0 0x24 #define BRIDGE_DRAM_NODE_TRANSLN1 0x25 #define BRIDGE_DRAM_NODE_TRANSLN2 0x26 @@ -96,6 +97,8 @@ #define BRIDGE_DRAM_NODE_TRANSLN5 0x29 #define BRIDGE_DRAM_NODE_TRANSLN6 0x2a #define BRIDGE_DRAM_NODE_TRANSLN7 0x2b + +#define BRIDGE_DRAM_CHNL_TRANSLN(i) (0x2c + (i)) #define BRIDGE_DRAM_CHNL_TRANSLN0 0x2c #define BRIDGE_DRAM_CHNL_TRANSLN1 0x2d #define BRIDGE_DRAM_CHNL_TRANSLN2 0x2e @@ -104,6 +107,7 @@ #define BRIDGE_DRAM_CHNL_TRANSLN5 0x31 #define BRIDGE_DRAM_CHNL_TRANSLN6 0x32 #define BRIDGE_DRAM_CHNL_TRANSLN7 0x33 + #define BRIDGE_PCIEMEM_BASE0 0x34 #define BRIDGE_PCIEMEM_BASE1 0x35 #define BRIDGE_PCIEMEM_BASE2 0x36 diff --git a/arch/mips/include/asm/netlogic/xlp-hal/iomap.h b/arch/mips/include/asm/netlogic/xlp-hal/iomap.h index 9fac46fb7913..55eee77adaca 100644 --- a/arch/mips/include/asm/netlogic/xlp-hal/iomap.h +++ b/arch/mips/include/asm/netlogic/xlp-hal/iomap.h @@ -72,6 +72,12 @@ #define XLP_IO_USB_OHCI2_OFFSET(node) XLP_HDR_OFFSET(node, 0, 2, 4) #define XLP_IO_USB_OHCI3_OFFSET(node) XLP_HDR_OFFSET(node, 0, 2, 5) +/* XLP2xx has an updated USB block */ +#define XLP2XX_IO_USB_OFFSET(node, i) XLP_HDR_OFFSET(node, 0, 4, i) +#define XLP2XX_IO_USB_XHCI0_OFFSET(node) XLP_HDR_OFFSET(node, 0, 4, 1) +#define XLP2XX_IO_USB_XHCI1_OFFSET(node) XLP_HDR_OFFSET(node, 0, 4, 2) +#define XLP2XX_IO_USB_XHCI2_OFFSET(node) XLP_HDR_OFFSET(node, 0, 4, 3) + #define XLP_IO_NAE_OFFSET(node) XLP_HDR_OFFSET(node, 0, 3, 0) #define XLP_IO_POE_OFFSET(node) XLP_HDR_OFFSET(node, 0, 3, 1) @@ -88,6 +94,9 @@ #define XLP_IO_I2C0_OFFSET(node) XLP_HDR_OFFSET(node, 0, 6, 2) #define XLP_IO_I2C1_OFFSET(node) XLP_HDR_OFFSET(node, 0, 6, 3) #define XLP_IO_GPIO_OFFSET(node) XLP_HDR_OFFSET(node, 0, 6, 4) +/* on 2XX, all I2C busses are on the same block */ +#define XLP2XX_IO_I2C_OFFSET(node) XLP_HDR_OFFSET(node, 0, 6, 7) + /* system management */ #define XLP_IO_SYS_OFFSET(node) XLP_HDR_OFFSET(node, 0, 6, 5) #define XLP_IO_JTAG_OFFSET(node) XLP_HDR_OFFSET(node, 0, 6, 6) @@ -145,6 +154,7 @@ #define PCI_DEVICE_ID_NLM_NOR 0x1015 #define PCI_DEVICE_ID_NLM_NAND 0x1016 #define PCI_DEVICE_ID_NLM_MMC 0x1018 +#define PCI_DEVICE_ID_NLM_XHCI 0x101d #ifndef __ASSEMBLY__ diff --git a/arch/mips/include/asm/netlogic/xlp-hal/pic.h b/arch/mips/include/asm/netlogic/xlp-hal/pic.h index 4b5108dfaa16..105389b79f09 100644 --- a/arch/mips/include/asm/netlogic/xlp-hal/pic.h +++ b/arch/mips/include/asm/netlogic/xlp-hal/pic.h @@ -208,13 +208,14 @@ #define PIC_LOCAL_SCHEDULING 1 #define PIC_GLOBAL_SCHEDULING 0 -#define PIC_CLK_HZ 133333333 - #define nlm_read_pic_reg(b, r) nlm_read_reg64(b, r) #define nlm_write_pic_reg(b, r, v) nlm_write_reg64(b, r, v) #define nlm_get_pic_pcibase(node) nlm_pcicfg_base(XLP_IO_PIC_OFFSET(node)) #define nlm_get_pic_regbase(node) (nlm_get_pic_pcibase(node) + XLP_IO_PCI_HDRSZ) +/* We use PIC on node 0 as a timer */ +#define pic_timer_freq() nlm_get_pic_frequency(0) + /* IRT and h/w interrupt routines */ static inline int nlm_pic_read_irt(uint64_t base, int irt_index) diff --git a/arch/mips/include/asm/netlogic/xlp-hal/sys.h b/arch/mips/include/asm/netlogic/xlp-hal/sys.h index 470e52bfc061..fcf2833c16ca 100644 --- a/arch/mips/include/asm/netlogic/xlp-hal/sys.h +++ b/arch/mips/include/asm/netlogic/xlp-hal/sys.h @@ -117,6 +117,36 @@ #define SYS_SCRTCH2 0x4b #define SYS_SCRTCH3 0x4c +/* PLL registers XLP2XX */ +#define SYS_PLL_CTRL0 0x240 +#define SYS_PLL_CTRL1 0x241 +#define SYS_PLL_CTRL2 0x242 +#define SYS_PLL_CTRL3 0x243 +#define SYS_DMC_PLL_CTRL0 0x244 +#define SYS_DMC_PLL_CTRL1 0x245 +#define SYS_DMC_PLL_CTRL2 0x246 +#define SYS_DMC_PLL_CTRL3 0x247 + +#define SYS_PLL_CTRL0_DEVX(x) (0x248 + (x) * 4) +#define SYS_PLL_CTRL1_DEVX(x) (0x249 + (x) * 4) +#define SYS_PLL_CTRL2_DEVX(x) (0x24a + (x) * 4) +#define SYS_PLL_CTRL3_DEVX(x) (0x24b + (x) * 4) + +#define SYS_CPU_PLL_CHG_CTRL 0x288 +#define SYS_PLL_CHG_CTRL 0x289 +#define SYS_CLK_DEV_DIS 0x28a +#define SYS_CLK_DEV_SEL 0x28b +#define SYS_CLK_DEV_DIV 0x28c +#define SYS_CLK_DEV_CHG 0x28d +#define SYS_CLK_DEV_SEL_REG 0x28e +#define SYS_CLK_DEV_DIV_REG 0x28f +#define SYS_CPU_PLL_LOCK 0x29f +#define SYS_SYS_PLL_LOCK 0x2a0 +#define SYS_PLL_MEM_CMD 0x2a1 +#define SYS_CPU_PLL_MEM_REQ 0x2a2 +#define SYS_SYS_PLL_MEM_REQ 0x2a3 +#define SYS_PLL_MEM_STAT 0x2a4 + #ifndef __ASSEMBLY__ #define nlm_read_sys_reg(b, r) nlm_read_reg(b, r) @@ -124,5 +154,6 @@ #define nlm_get_sys_pcibase(node) nlm_pcicfg_base(XLP_IO_SYS_OFFSET(node)) #define nlm_get_sys_regbase(node) (nlm_get_sys_pcibase(node) + XLP_IO_PCI_HDRSZ) +unsigned int nlm_get_pic_frequency(int node); #endif #endif diff --git a/arch/mips/include/asm/netlogic/xlp-hal/xlp.h b/arch/mips/include/asm/netlogic/xlp-hal/xlp.h index f4ea0f7f3965..17daffb280a3 100644 --- a/arch/mips/include/asm/netlogic/xlp-hal/xlp.h +++ b/arch/mips/include/asm/netlogic/xlp-hal/xlp.h @@ -41,15 +41,22 @@ #define PIC_PCIE_LINK_1_IRQ 20 #define PIC_PCIE_LINK_2_IRQ 21 #define PIC_PCIE_LINK_3_IRQ 22 + #define PIC_EHCI_0_IRQ 23 #define PIC_EHCI_1_IRQ 24 #define PIC_OHCI_0_IRQ 25 #define PIC_OHCI_1_IRQ 26 #define PIC_OHCI_2_IRQ 27 #define PIC_OHCI_3_IRQ 28 +#define PIC_2XX_XHCI_0_IRQ 23 +#define PIC_2XX_XHCI_1_IRQ 24 +#define PIC_2XX_XHCI_2_IRQ 25 + #define PIC_MMC_IRQ 29 #define PIC_I2C_0_IRQ 30 #define PIC_I2C_1_IRQ 31 +#define PIC_I2C_2_IRQ 32 +#define PIC_I2C_3_IRQ 33 #ifndef __ASSEMBLY__ @@ -59,7 +66,17 @@ void xlp_wakeup_secondary_cpus(void); void xlp_mmu_init(void); void nlm_hal_init(void); +int xlp_get_dram_map(int n, uint64_t *dram_map); + +/* Device tree related */ void *xlp_dt_init(void *fdtp); +static inline int cpu_is_xlpii(void) +{ + int chip = read_c0_prid() & 0xff00; + + return chip == PRID_IMP_NETLOGIC_XLP2XX; +} + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_NLM_XLP_H */ diff --git a/arch/mips/include/asm/netlogic/xlr/pic.h b/arch/mips/include/asm/netlogic/xlr/pic.h index 63c99176dffe..3c80a75233bd 100644 --- a/arch/mips/include/asm/netlogic/xlr/pic.h +++ b/arch/mips/include/asm/netlogic/xlr/pic.h @@ -36,6 +36,8 @@ #define _ASM_NLM_XLR_PIC_H #define PIC_CLK_HZ 66666666 +#define pic_timer_freq() PIC_CLK_HZ + /* PIC hardware interrupt numbers */ #define PIC_IRT_WD_INDEX 0 #define PIC_IRT_TIMER_0_INDEX 1 diff --git a/arch/mips/include/asm/octeon/octeon.h b/arch/mips/include/asm/octeon/octeon.h index a2eed23c49a9..f5d77b91537f 100644 --- a/arch/mips/include/asm/octeon/octeon.h +++ b/arch/mips/include/asm/octeon/octeon.h @@ -251,4 +251,6 @@ extern void (*octeon_irq_setup_secondary)(void); typedef void (*octeon_irq_ip4_handler_t)(void); void octeon_irq_set_ip4_handler(octeon_irq_ip4_handler_t); +extern void octeon_fixup_irqs(void); + #endif /* __ASM_OCTEON_OCTEON_H */ diff --git a/arch/mips/include/asm/parport.h b/arch/mips/include/asm/parport.h deleted file mode 100644 index cf252af64590..000000000000 --- a/arch/mips/include/asm/parport.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/parport.h> diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h index f194c08bd057..12d6842962be 100644 --- a/arch/mips/include/asm/pci.h +++ b/arch/mips/include/asm/pci.h @@ -83,6 +83,18 @@ static inline void pcibios_penalize_isa_irq(int irq, int active) extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine); +#define HAVE_ARCH_PCI_RESOURCE_TO_USER + +static inline void pci_resource_to_user(const struct pci_dev *dev, int bar, + const struct resource *rsrc, resource_size_t *start, + resource_size_t *end) +{ + phys_t size = resource_size(rsrc); + + *start = fixup_bigphys_addr(rsrc->start, size); + *end = rsrc->start + size; +} + /* * Dynamic DMA mapping stuff. * MIPS has everything mapped statically. diff --git a/arch/mips/include/asm/percpu.h b/arch/mips/include/asm/percpu.h deleted file mode 100644 index 844e763e9332..000000000000 --- a/arch/mips/include/asm/percpu.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_PERCPU_H -#define __ASM_PERCPU_H - -#include <asm-generic/percpu.h> - -#endif /* __ASM_PERCPU_H */ diff --git a/arch/mips/include/asm/scatterlist.h b/arch/mips/include/asm/scatterlist.h deleted file mode 100644 index 7ee0e646d82c..000000000000 --- a/arch/mips/include/asm/scatterlist.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_SCATTERLIST_H -#define __ASM_SCATTERLIST_H - -#include <asm-generic/scatterlist.h> - -#endif /* __ASM_SCATTERLIST_H */ diff --git a/arch/mips/include/asm/sections.h b/arch/mips/include/asm/sections.h deleted file mode 100644 index b7e37262c246..000000000000 --- a/arch/mips/include/asm/sections.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_SECTIONS_H -#define _ASM_SECTIONS_H - -#include <asm-generic/sections.h> - -#endif /* _ASM_SECTIONS_H */ diff --git a/arch/mips/include/asm/segment.h b/arch/mips/include/asm/segment.h deleted file mode 100644 index 92ac001fc483..000000000000 --- a/arch/mips/include/asm/segment.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_SEGMENT_H -#define _ASM_SEGMENT_H - -/* Only here because we have some old header files that expect it.. */ - -#endif /* _ASM_SEGMENT_H */ diff --git a/arch/mips/include/asm/serial.h b/arch/mips/include/asm/serial.h deleted file mode 100644 index a0cb0caff152..000000000000 --- a/arch/mips/include/asm/serial.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/serial.h> diff --git a/arch/mips/include/asm/timex.h b/arch/mips/include/asm/timex.h index 6529704aa73a..c5424757da65 100644 --- a/arch/mips/include/asm/timex.h +++ b/arch/mips/include/asm/timex.h @@ -10,7 +10,9 @@ #ifdef __KERNEL__ +#include <asm/cpu-features.h> #include <asm/mipsregs.h> +#include <asm/cpu-type.h> /* * This is the clock rate of the i8253 PIT. A MIPS system may not have @@ -33,9 +35,38 @@ typedef unsigned int cycles_t; +/* + * On R4000/R4400 before version 5.0 an erratum exists such that if the + * cycle counter is read in the exact moment that it is matching the + * compare register, no interrupt will be generated. + * + * There is a suggested workaround and also the erratum can't strike if + * the compare interrupt isn't being used as the clock source device. + * However for now the implementaton of this function doesn't get these + * fine details right. + */ static inline cycles_t get_cycles(void) { - return 0; + switch (boot_cpu_type()) { + case CPU_R4400PC: + case CPU_R4400SC: + case CPU_R4400MC: + if ((read_c0_prid() & 0xff) >= 0x0050) + return read_c0_count(); + break; + + case CPU_R4000PC: + case CPU_R4000SC: + case CPU_R4000MC: + break; + + default: + if (cpu_has_counter) + return read_c0_count(); + break; + } + + return 0; /* no usable counter */ } #endif /* __KERNEL__ */ diff --git a/arch/mips/include/asm/ucontext.h b/arch/mips/include/asm/ucontext.h deleted file mode 100644 index 9bc07b9f30fb..000000000000 --- a/arch/mips/include/asm/ucontext.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ucontext.h> diff --git a/arch/mips/include/asm/vga.h b/arch/mips/include/asm/vga.h index f4cff7e4fa8a..f82c83749a08 100644 --- a/arch/mips/include/asm/vga.h +++ b/arch/mips/include/asm/vga.h @@ -6,6 +6,7 @@ #ifndef _ASM_VGA_H #define _ASM_VGA_H +#include <asm/addrspace.h> #include <asm/byteorder.h> /* @@ -13,7 +14,7 @@ * access the videoram directly without any black magic. */ -#define VGA_MAP_MEM(x, s) (0xb0000000L + (unsigned long)(x)) +#define VGA_MAP_MEM(x, s) CKSEG1ADDR(0x10000000L + (unsigned long)(x)) #define vga_readb(x) (*(x)) #define vga_writeb(x, y) (*(y) = (x)) diff --git a/arch/mips/include/asm/xor.h b/arch/mips/include/asm/xor.h deleted file mode 100644 index c82eb12a5b18..000000000000 --- a/arch/mips/include/asm/xor.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/xor.h> diff --git a/arch/mips/include/uapi/asm/Kbuild b/arch/mips/include/uapi/asm/Kbuild index 350ccccadcb9..be7196eacb88 100644 --- a/arch/mips/include/uapi/asm/Kbuild +++ b/arch/mips/include/uapi/asm/Kbuild @@ -1,7 +1,9 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm -header-y += auxvec.h +generic-y += auxvec.h +generic-y += ipcbuf.h + header-y += bitsperlong.h header-y += break.h header-y += byteorder.h @@ -11,7 +13,6 @@ header-y += fcntl.h header-y += inst.h header-y += ioctl.h header-y += ioctls.h -header-y += ipcbuf.h header-y += kvm_para.h header-y += mman.h header-y += msgbuf.h diff --git a/arch/mips/include/uapi/asm/auxvec.h b/arch/mips/include/uapi/asm/auxvec.h deleted file mode 100644 index 7cf7f2d21943..000000000000 --- a/arch/mips/include/uapi/asm/auxvec.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef _ASM_AUXVEC_H -#define _ASM_AUXVEC_H - -#endif /* _ASM_AUXVEC_H */ diff --git a/arch/mips/include/uapi/asm/ipcbuf.h b/arch/mips/include/uapi/asm/ipcbuf.h deleted file mode 100644 index 84c7e51cb6d0..000000000000 --- a/arch/mips/include/uapi/asm/ipcbuf.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ipcbuf.h> diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 4c6167a17875..5465dc183e5a 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -20,6 +20,7 @@ #include <asm/bugs.h> #include <asm/cpu.h> +#include <asm/cpu-type.h> #include <asm/fpu.h> #include <asm/mipsregs.h> #include <asm/watch.h> @@ -55,7 +56,7 @@ static inline void check_errata(void) { struct cpuinfo_mips *c = ¤t_cpu_data; - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_34K: /* * Erratum "RPS May Cause Incorrect Instruction Execution" @@ -122,7 +123,7 @@ static inline unsigned long cpu_get_fpu_id(void) */ static inline int __cpu_has_fpu(void) { - return ((cpu_get_fpu_id() & 0xff00) != FPIR_IMP_NONE); + return ((cpu_get_fpu_id() & FPIR_IMP_MASK) != FPIR_IMP_NONE); } static inline void cpu_probe_vmbits(struct cpuinfo_mips *c) @@ -290,6 +291,17 @@ static inline unsigned int decode_config4(struct cpuinfo_mips *c) return config4 & MIPS_CONF_M; } +static inline unsigned int decode_config5(struct cpuinfo_mips *c) +{ + unsigned int config5; + + config5 = read_c0_config5(); + config5 &= ~MIPS_CONF5_UFR; + write_c0_config5(config5); + + return config5 & MIPS_CONF_M; +} + static void decode_configs(struct cpuinfo_mips *c) { int ok; @@ -310,6 +322,8 @@ static void decode_configs(struct cpuinfo_mips *c) ok = decode_config3(c); if (ok) ok = decode_config4(c); + if (ok) + ok = decode_config5(c); mips_probe_watch_registers(c); @@ -322,7 +336,7 @@ static void decode_configs(struct cpuinfo_mips *c) static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) { - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_R2000: c->cputype = CPU_R2000; __cpu_name[cpu] = "R2000"; @@ -333,7 +347,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) c->tlbsize = 64; break; case PRID_IMP_R3000: - if ((c->processor_id & 0xff) == PRID_REV_R3000A) { + if ((c->processor_id & PRID_REV_MASK) == PRID_REV_R3000A) { if (cpu_has_confreg()) { c->cputype = CPU_R3081E; __cpu_name[cpu] = "R3081"; @@ -353,7 +367,8 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) break; case PRID_IMP_R4000: if (read_c0_config() & CONF_SC) { - if ((c->processor_id & 0xff) >= PRID_REV_R4400) { + if ((c->processor_id & PRID_REV_MASK) >= + PRID_REV_R4400) { c->cputype = CPU_R4400PC; __cpu_name[cpu] = "R4400PC"; } else { @@ -361,7 +376,8 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) __cpu_name[cpu] = "R4000PC"; } } else { - if ((c->processor_id & 0xff) >= PRID_REV_R4400) { + if ((c->processor_id & PRID_REV_MASK) >= + PRID_REV_R4400) { c->cputype = CPU_R4400SC; __cpu_name[cpu] = "R4400SC"; } else { @@ -454,7 +470,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) __cpu_name[cpu] = "TX3927"; c->tlbsize = 64; } else { - switch (c->processor_id & 0xff) { + switch (c->processor_id & PRID_REV_MASK) { case PRID_REV_TX3912: c->cputype = CPU_TX3912; __cpu_name[cpu] = "TX3912"; @@ -640,7 +656,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) static inline void cpu_probe_mips(struct cpuinfo_mips *c, unsigned int cpu) { decode_configs(c); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_4KC: c->cputype = CPU_4KC; __cpu_name[cpu] = "MIPS 4Kc"; @@ -711,7 +727,7 @@ static inline void cpu_probe_mips(struct cpuinfo_mips *c, unsigned int cpu) static inline void cpu_probe_alchemy(struct cpuinfo_mips *c, unsigned int cpu) { decode_configs(c); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_AU1_REV1: case PRID_IMP_AU1_REV2: c->cputype = CPU_ALCHEMY; @@ -730,7 +746,7 @@ static inline void cpu_probe_alchemy(struct cpuinfo_mips *c, unsigned int cpu) break; case 4: __cpu_name[cpu] = "Au1200"; - if ((c->processor_id & 0xff) == 2) + if ((c->processor_id & PRID_REV_MASK) == 2) __cpu_name[cpu] = "Au1250"; break; case 5: @@ -748,12 +764,12 @@ static inline void cpu_probe_sibyte(struct cpuinfo_mips *c, unsigned int cpu) { decode_configs(c); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_SB1: c->cputype = CPU_SB1; __cpu_name[cpu] = "SiByte SB1"; /* FPU in pass1 is known to have issues. */ - if ((c->processor_id & 0xff) < 0x02) + if ((c->processor_id & PRID_REV_MASK) < 0x02) c->options &= ~(MIPS_CPU_FPU | MIPS_CPU_32FPR); break; case PRID_IMP_SB1A: @@ -766,7 +782,7 @@ static inline void cpu_probe_sibyte(struct cpuinfo_mips *c, unsigned int cpu) static inline void cpu_probe_sandcraft(struct cpuinfo_mips *c, unsigned int cpu) { decode_configs(c); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_SR71000: c->cputype = CPU_SR71000; __cpu_name[cpu] = "Sandcraft SR71000"; @@ -779,7 +795,7 @@ static inline void cpu_probe_sandcraft(struct cpuinfo_mips *c, unsigned int cpu) static inline void cpu_probe_nxp(struct cpuinfo_mips *c, unsigned int cpu) { decode_configs(c); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_PR4450: c->cputype = CPU_PR4450; __cpu_name[cpu] = "Philips PR4450"; @@ -791,7 +807,7 @@ static inline void cpu_probe_nxp(struct cpuinfo_mips *c, unsigned int cpu) static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu) { decode_configs(c); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_BMIPS32_REV4: case PRID_IMP_BMIPS32_REV8: c->cputype = CPU_BMIPS32; @@ -806,7 +822,7 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu) set_elf_platform(cpu, "bmips3300"); break; case PRID_IMP_BMIPS43XX: { - int rev = c->processor_id & 0xff; + int rev = c->processor_id & PRID_REV_MASK; if (rev >= PRID_REV_BMIPS4380_LO && rev <= PRID_REV_BMIPS4380_HI) { @@ -832,7 +848,7 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu) static inline void cpu_probe_cavium(struct cpuinfo_mips *c, unsigned int cpu) { decode_configs(c); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_CAVIUM_CN38XX: case PRID_IMP_CAVIUM_CN31XX: case PRID_IMP_CAVIUM_CN30XX: @@ -852,10 +868,17 @@ platform: case PRID_IMP_CAVIUM_CN63XX: case PRID_IMP_CAVIUM_CN66XX: case PRID_IMP_CAVIUM_CN68XX: + case PRID_IMP_CAVIUM_CNF71XX: c->cputype = CPU_CAVIUM_OCTEON2; __cpu_name[cpu] = "Cavium Octeon II"; set_elf_platform(cpu, "octeon2"); break; + case PRID_IMP_CAVIUM_CN70XX: + case PRID_IMP_CAVIUM_CN78XX: + c->cputype = CPU_CAVIUM_OCTEON3; + __cpu_name[cpu] = "Cavium Octeon III"; + set_elf_platform(cpu, "octeon3"); + break; default: printk(KERN_INFO "Unknown Octeon chip!\n"); c->cputype = CPU_UNKNOWN; @@ -868,7 +891,7 @@ static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu) decode_configs(c); /* JZRISC does not implement the CP0 counter. */ c->options &= ~MIPS_CPU_COUNTER; - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { case PRID_IMP_JZRISC: c->cputype = CPU_JZRISC; __cpu_name[cpu] = "Ingenic JZRISC"; @@ -883,7 +906,7 @@ static inline void cpu_probe_netlogic(struct cpuinfo_mips *c, int cpu) { decode_configs(c); - if ((c->processor_id & 0xff00) == PRID_IMP_NETLOGIC_AU13XX) { + if ((c->processor_id & PRID_IMP_MASK) == PRID_IMP_NETLOGIC_AU13XX) { c->cputype = CPU_ALCHEMY; __cpu_name[cpu] = "Au1300"; /* following stuff is not for Alchemy */ @@ -898,7 +921,12 @@ static inline void cpu_probe_netlogic(struct cpuinfo_mips *c, int cpu) MIPS_CPU_EJTAG | MIPS_CPU_LLSC); - switch (c->processor_id & 0xff00) { + switch (c->processor_id & PRID_IMP_MASK) { + case PRID_IMP_NETLOGIC_XLP2XX: + c->cputype = CPU_XLP; + __cpu_name[cpu] = "Broadcom XLPII"; + break; + case PRID_IMP_NETLOGIC_XLP8XX: case PRID_IMP_NETLOGIC_XLP3XX: c->cputype = CPU_XLP; @@ -972,7 +1000,7 @@ void cpu_probe(void) c->cputype = CPU_UNKNOWN; c->processor_id = read_c0_prid(); - switch (c->processor_id & 0xff0000) { + switch (c->processor_id & PRID_COMP_MASK) { case PRID_COMP_LEGACY: cpu_probe_legacy(c, cpu); break; diff --git a/arch/mips/kernel/csrc-ioasic.c b/arch/mips/kernel/csrc-ioasic.c index 0654bff9b69c..6cbbf6e106b9 100644 --- a/arch/mips/kernel/csrc-ioasic.c +++ b/arch/mips/kernel/csrc-ioasic.c @@ -37,13 +37,13 @@ static struct clocksource clocksource_dec = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -void __init dec_ioasic_clocksource_init(void) +int __init dec_ioasic_clocksource_init(void) { unsigned int freq; u32 start, end; - int i = HZ / 10; - + int i = HZ / 8; + ds1287_timer_state(); while (!ds1287_timer_state()) ; @@ -55,9 +55,15 @@ void __init dec_ioasic_clocksource_init(void) end = dec_ioasic_hpt_read(&clocksource_dec); - freq = (end - start) * 10; + freq = (end - start) * 8; + + /* An early revision of the I/O ASIC didn't have the counter. */ + if (!freq) + return -ENXIO; + printk(KERN_INFO "I/O ASIC clock frequency %dHz\n", freq); clocksource_dec.rating = 200 + freq / 10000000; clocksource_register_hz(&clocksource_dec, freq); + return 0; } diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c index 0c655deeea4a..f7991d95bff9 100644 --- a/arch/mips/kernel/idle.c +++ b/arch/mips/kernel/idle.c @@ -18,6 +18,7 @@ #include <linux/sched.h> #include <asm/cpu.h> #include <asm/cpu-info.h> +#include <asm/cpu-type.h> #include <asm/idle.h> #include <asm/mipsregs.h> @@ -136,7 +137,7 @@ void __init check_wait(void) return; } - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_R3081: case CPU_R3081E: cpu_wait = r3081_wait; @@ -166,6 +167,7 @@ void __init check_wait(void) case CPU_CAVIUM_OCTEON: case CPU_CAVIUM_OCTEON_PLUS: case CPU_CAVIUM_OCTEON2: + case CPU_CAVIUM_OCTEON3: case CPU_JZRISC: case CPU_LOONGSON1: case CPU_XLR: diff --git a/arch/mips/kernel/mcount.S b/arch/mips/kernel/mcount.S index a03e93c4a946..539b6294b613 100644 --- a/arch/mips/kernel/mcount.S +++ b/arch/mips/kernel/mcount.S @@ -83,7 +83,7 @@ _mcount: PTR_S MCOUNT_RA_ADDRESS_REG, PT_R12(sp) #endif - move a0, ra /* arg1: self return address */ + PTR_SUBU a0, ra, 8 /* arg1: self address */ .globl ftrace_call ftrace_call: nop /* a placeholder for the call to a real tracing function */ diff --git a/arch/mips/kernel/relocate_kernel.S b/arch/mips/kernel/relocate_kernel.S index 43d2d78d3287..74bab9ddd0e1 100644 --- a/arch/mips/kernel/relocate_kernel.S +++ b/arch/mips/kernel/relocate_kernel.S @@ -26,6 +26,12 @@ process_entry: PTR_L s2, (s0) PTR_ADD s0, s0, SZREG + /* + * In case of a kdump/crash kernel, the indirection page is not + * populated as the kernel is directly copied to a reserved location + */ + beqz s2, done + /* destination page */ and s3, s2, 0x1 beq s3, zero, 1f diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index c7f90519e58c..c538d6e01b7b 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -552,6 +552,52 @@ static void __init arch_mem_addpart(phys_t mem, phys_t end, int type) add_memory_region(mem, size, type); } +#ifdef CONFIG_KEXEC +static inline unsigned long long get_total_mem(void) +{ + unsigned long long total; + + total = max_pfn - min_low_pfn; + return total << PAGE_SHIFT; +} + +static void __init mips_parse_crashkernel(void) +{ + unsigned long long total_mem; + unsigned long long crash_size, crash_base; + int ret; + + total_mem = get_total_mem(); + ret = parse_crashkernel(boot_command_line, total_mem, + &crash_size, &crash_base); + if (ret != 0 || crash_size <= 0) + return; + + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; +} + +static void __init request_crashkernel(struct resource *res) +{ + int ret; + + ret = request_resource(res, &crashk_res); + if (!ret) + pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n", + (unsigned long)((crashk_res.end - + crashk_res.start + 1) >> 20), + (unsigned long)(crashk_res.start >> 20)); +} +#else /* !defined(CONFIG_KEXEC) */ +static void __init mips_parse_crashkernel(void) +{ +} + +static void __init request_crashkernel(struct resource *res) +{ +} +#endif /* !defined(CONFIG_KEXEC) */ + static void __init arch_mem_init(char **cmdline_p) { extern void plat_mem_setup(void); @@ -608,6 +654,8 @@ static void __init arch_mem_init(char **cmdline_p) BOOTMEM_DEFAULT); } #endif + + mips_parse_crashkernel(); #ifdef CONFIG_KEXEC if (crashk_res.start != crashk_res.end) reserve_bootmem(crashk_res.start, @@ -620,52 +668,6 @@ static void __init arch_mem_init(char **cmdline_p) paging_init(); } -#ifdef CONFIG_KEXEC -static inline unsigned long long get_total_mem(void) -{ - unsigned long long total; - - total = max_pfn - min_low_pfn; - return total << PAGE_SHIFT; -} - -static void __init mips_parse_crashkernel(void) -{ - unsigned long long total_mem; - unsigned long long crash_size, crash_base; - int ret; - - total_mem = get_total_mem(); - ret = parse_crashkernel(boot_command_line, total_mem, - &crash_size, &crash_base); - if (ret != 0 || crash_size <= 0) - return; - - crashk_res.start = crash_base; - crashk_res.end = crash_base + crash_size - 1; -} - -static void __init request_crashkernel(struct resource *res) -{ - int ret; - - ret = request_resource(res, &crashk_res); - if (!ret) - pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n", - (unsigned long)((crashk_res.end - - crashk_res.start + 1) >> 20), - (unsigned long)(crashk_res.start >> 20)); -} -#else /* !defined(CONFIG_KEXEC) */ -static void __init mips_parse_crashkernel(void) -{ -} - -static void __init request_crashkernel(struct resource *res) -{ -} -#endif /* !defined(CONFIG_KEXEC) */ - static void __init resource_init(void) { int i; @@ -678,11 +680,6 @@ static void __init resource_init(void) data_resource.start = __pa_symbol(&_etext); data_resource.end = __pa_symbol(&_edata) - 1; - /* - * Request address space for all standard RAM. - */ - mips_parse_crashkernel(); - for (i = 0; i < boot_mem_map.nr_map; i++) { struct resource *res; unsigned long start, end; diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c index c2e5d74739b4..5969f1e9b62a 100644 --- a/arch/mips/kernel/smp-cmp.c +++ b/arch/mips/kernel/smp-cmp.c @@ -99,7 +99,9 @@ static void cmp_init_secondary(void) c->core = (read_c0_ebase() >> 1) & 0x1ff; #if defined(CONFIG_MIPS_MT_SMP) || defined(CONFIG_MIPS_MT_SMTC) - c->vpe_id = (read_c0_tcbind() >> TCBIND_CURVPE_SHIFT) & TCBIND_CURVPE; + if (cpu_has_mipsmt) + c->vpe_id = (read_c0_tcbind() >> TCBIND_CURVPE_SHIFT) & + TCBIND_CURVPE; #endif #ifdef CONFIG_MIPS_MT_SMTC c->tc_id = (read_c0_tcbind() & TCBIND_CURTC) >> TCBIND_CURTC_SHIFT; @@ -177,9 +179,16 @@ void __init cmp_smp_setup(void) } if (cpu_has_mipsmt) { - unsigned int nvpe, mvpconf0 = read_c0_mvpconf0(); + unsigned int nvpe = 1; +#ifdef CONFIG_MIPS_MT_SMP + unsigned int mvpconf0 = read_c0_mvpconf0(); + + nvpe = ((mvpconf0 & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT) + 1; +#elif defined(CONFIG_MIPS_MT_SMTC) + unsigned int mvpconf0 = read_c0_mvpconf0(); nvpe = ((mvpconf0 & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1; +#endif smp_num_siblings = nvpe; } pr_info("Detected %i available secondary CPU(s)\n", ncpu); diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c index 9d686bf97b0e..dcb8e5d3bb8a 100644 --- a/arch/mips/kernel/time.c +++ b/arch/mips/kernel/time.c @@ -24,6 +24,7 @@ #include <linux/export.h> #include <asm/cpu-features.h> +#include <asm/cpu-type.h> #include <asm/div64.h> #include <asm/smtc_ipi.h> #include <asm/time.h> @@ -121,6 +122,14 @@ void __init time_init(void) { plat_time_init(); - if (!mips_clockevent_init() || !cpu_has_mfc0_count_bug()) + /* + * The use of the R4k timer as a clock event takes precedence; + * if reading the Count register might interfere with the timer + * interrupt, then we don't use the timer as a clock source. + * We may still use the timer as a clock source though if the + * timer interrupt isn't reliable; the interference doesn't + * matter then, because we don't use the interrupt. + */ + if (mips_clockevent_init() != 0 || !cpu_has_mfc0_count_bug()) init_mips_clocksource(); } diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index aec3408edd4b..524841f02803 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -39,6 +39,7 @@ #include <asm/break.h> #include <asm/cop2.h> #include <asm/cpu.h> +#include <asm/cpu-type.h> #include <asm/dsp.h> #include <asm/fpu.h> #include <asm/fpu_emulator.h> @@ -622,7 +623,7 @@ static int simulate_rdhwr(struct pt_regs *regs, int rd, int rt) regs->regs[rt] = read_c0_count(); return 0; case 3: /* Count register resolution */ - switch (current_cpu_data.cputype) { + switch (current_cpu_type()) { case CPU_20KC: case CPU_25KF: regs->regs[rt] = 1; diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 05826d20a792..3b46f7ce9ca7 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -179,5 +179,6 @@ SECTIONS *(.options) *(.pdr) *(.reginfo) + *(.eh_frame) } } diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c index faf84c5f2629..59b2b3cd7885 100644 --- a/arch/mips/kernel/vpe.c +++ b/arch/mips/kernel/vpe.c @@ -1368,7 +1368,7 @@ out_einval: } static DEVICE_ATTR_RW(ntcs); -static struct attribute vpe_attrs[] = { +static struct attribute *vpe_attrs[] = { &dev_attr_kill.attr, &dev_attr_ntcs.attr, NULL, diff --git a/arch/mips/lantiq/falcon/sysctrl.c b/arch/mips/lantiq/falcon/sysctrl.c index ff4894a833ee..8f1866d8124d 100644 --- a/arch/mips/lantiq/falcon/sysctrl.c +++ b/arch/mips/lantiq/falcon/sysctrl.c @@ -48,6 +48,7 @@ #define CPU0CC_CPUDIV 0x0001 /* Activation Status Register */ +#define ACTS_ASC0_ACT 0x00001000 #define ACTS_ASC1_ACT 0x00000800 #define ACTS_I2C_ACT 0x00004000 #define ACTS_P0 0x00010000 @@ -108,6 +109,7 @@ static void sysctl_deactivate(struct clk *clk) static int sysctl_clken(struct clk *clk) { sysctl_w32(clk->module, clk->bits, SYSCTL_CLKEN); + sysctl_w32(clk->module, clk->bits, SYSCTL_ACT); sysctl_wait(clk, clk->bits, SYSCTL_CLKS); return 0; } @@ -256,6 +258,7 @@ void __init ltq_soc_init(void) clkdev_add_sys("1e800400.pad", SYSCTL_SYS1, ACTS_PADCTRL1); clkdev_add_sys("1e800500.pad", SYSCTL_SYS1, ACTS_PADCTRL3); clkdev_add_sys("1e800600.pad", SYSCTL_SYS1, ACTS_PADCTRL4); - clkdev_add_sys("1e100C00.serial", SYSCTL_SYS1, ACTS_ASC1_ACT); + clkdev_add_sys("1e100b00.serial", SYSCTL_SYS1, ACTS_ASC1_ACT); + clkdev_add_sys("1e100c00.serial", SYSCTL_SYS1, ACTS_ASC0_ACT); clkdev_add_sys("1e200000.i2c", SYSCTL_SYS1, ACTS_I2C_ACT); } diff --git a/arch/mips/lantiq/xway/Makefile b/arch/mips/lantiq/xway/Makefile index 7a13660d630d..087497d97357 100644 --- a/arch/mips/lantiq/xway/Makefile +++ b/arch/mips/lantiq/xway/Makefile @@ -1,3 +1,3 @@ -obj-y := prom.o sysctrl.o clk.o reset.o dma.o gptu.o +obj-y := prom.o sysctrl.o clk.o reset.o dma.o gptu.o dcdc.o obj-$(CONFIG_XRX200_PHY_FW) += xrx200_phy_fw.o diff --git a/arch/mips/lantiq/xway/dcdc.c b/arch/mips/lantiq/xway/dcdc.c new file mode 100644 index 000000000000..7688ac0f06d0 --- /dev/null +++ b/arch/mips/lantiq/xway/dcdc.c @@ -0,0 +1,63 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2012 John Crispin <blogic@openwrt.org> + * Copyright (C) 2010 Sameer Ahmad, Lantiq GmbH + */ + +#include <linux/ioport.h> +#include <linux/of_platform.h> + +#include <lantiq_soc.h> + +/* Bias and regulator Setup Register */ +#define DCDC_BIAS_VREG0 0xa +/* Bias and regulator Setup Register */ +#define DCDC_BIAS_VREG1 0xb + +#define dcdc_w8(x, y) ltq_w8((x), dcdc_membase + (y)) +#define dcdc_r8(x) ltq_r8(dcdc_membase + (x)) + +static void __iomem *dcdc_membase; + +static int dcdc_probe(struct platform_device *pdev) +{ + struct resource *res; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + dcdc_membase = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(dcdc_membase)) + return PTR_ERR(dcdc_membase); + + dev_info(&pdev->dev, "Core Voltage : %d mV\n", + dcdc_r8(DCDC_BIAS_VREG1) * 8); + + return 0; +} + +static const struct of_device_id dcdc_match[] = { + { .compatible = "lantiq,dcdc-xrx200" }, + {}, +}; + +static struct platform_driver dcdc_driver = { + .probe = dcdc_probe, + .driver = { + .name = "dcdc-xrx200", + .owner = THIS_MODULE, + .of_match_table = dcdc_match, + }, +}; + +int __init dcdc_init(void) +{ + int ret = platform_driver_register(&dcdc_driver); + + if (ret) + pr_info("dcdc: Error registering platform driver\n"); + return ret; +} + +arch_initcall(dcdc_init); diff --git a/arch/mips/lasat/image/Makefile b/arch/mips/lasat/image/Makefile index dfb509d21d8e..fd32075679c6 100644 --- a/arch/mips/lasat/image/Makefile +++ b/arch/mips/lasat/image/Makefile @@ -13,13 +13,11 @@ endif MKLASATIMG = mklasatimg MKLASATIMG_ARCH = mq2,mqpro,sp100,sp200 KERNEL_IMAGE = vmlinux -KERNEL_START = $(shell $(NM) $(KERNEL_IMAGE) | grep " _text" | cut -f1 -d\ ) -KERNEL_ENTRY = $(shell $(NM) $(KERNEL_IMAGE) | grep kernel_entry | cut -f1 -d\ ) LDSCRIPT= -L$(srctree)/$(src) -Tromscript.normal -HEAD_DEFINES := -D_kernel_start=0x$(KERNEL_START) \ - -D_kernel_entry=0x$(KERNEL_ENTRY) \ +HEAD_DEFINES := -D_kernel_start=$(VMLINUX_LOAD_ADDRESS) \ + -D_kernel_entry=$(VMLINUX_ENTRY_ADDRESS) \ -D VERSION="\"$(Version)\"" \ -D TIMESTAMP=$(shell date +%s) diff --git a/arch/mips/loongson/common/Makefile b/arch/mips/loongson/common/Makefile index 4c57b3e5743f..9e4484ccbb03 100644 --- a/arch/mips/loongson/common/Makefile +++ b/arch/mips/loongson/common/Makefile @@ -3,8 +3,9 @@ # obj-y += setup.o init.o cmdline.o env.o time.o reset.o irq.o \ - pci.o bonito-irq.o mem.o machtype.o platform.o + bonito-irq.o mem.o machtype.o platform.o obj-$(CONFIG_GPIOLIB) += gpio.o +obj-$(CONFIG_PCI) += pci.o # # Serial port support diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 46048d24328c..efe008846ed0 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -436,7 +436,6 @@ static int microMIPS32_to_MIPS32(union mips_instruction *insn_ptr) break; default: return SIGILL; - break; } break; case mm_32f_74_op: /* c.cond.fmt */ @@ -451,12 +450,10 @@ static int microMIPS32_to_MIPS32(union mips_instruction *insn_ptr) break; default: return SIGILL; - break; } break; default: return SIGILL; - break; } *insn_ptr = mips32_insn; @@ -491,7 +488,6 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, dec_insn.next_pc_inc; *contpc = regs->regs[insn.mm_i_format.rs]; return 1; - break; } } break; @@ -513,7 +509,6 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, dec_insn.pc_inc + dec_insn.next_pc_inc; return 1; - break; case mm_bgezals_op: case mm_bgezal_op: regs->regs[31] = regs->cp0_epc + @@ -530,7 +525,6 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, dec_insn.pc_inc + dec_insn.next_pc_inc; return 1; - break; case mm_blez_op: if ((long)regs->regs[insn.mm_i_format.rs] <= 0) *contpc = regs->cp0_epc + @@ -541,7 +535,6 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, dec_insn.pc_inc + dec_insn.next_pc_inc; return 1; - break; case mm_bgtz_op: if ((long)regs->regs[insn.mm_i_format.rs] <= 0) *contpc = regs->cp0_epc + @@ -552,7 +545,6 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, dec_insn.pc_inc + dec_insn.next_pc_inc; return 1; - break; case mm_bc2f_op: case mm_bc1f_op: bc_false = 1; @@ -580,7 +572,6 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, *contpc = regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; return 1; - break; } break; case mm_pool16c_op: @@ -593,7 +584,6 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, case mm_jr16_op: *contpc = regs->regs[insn.mm_i_format.rs]; return 1; - break; } break; case mm_beqz16_op: @@ -605,7 +595,6 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, *contpc = regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; return 1; - break; case mm_bnez16_op: if ((long)regs->regs[reg16to32map[insn.mm_b1_format.rs]] != 0) *contpc = regs->cp0_epc + @@ -615,12 +604,10 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, *contpc = regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; return 1; - break; case mm_b16_op: *contpc = regs->cp0_epc + dec_insn.pc_inc + (insn.mm_b0_format.simmediate << 1); return 1; - break; case mm_beq32_op: if (regs->regs[insn.mm_i_format.rs] == regs->regs[insn.mm_i_format.rt]) @@ -632,7 +619,6 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, dec_insn.pc_inc + dec_insn.next_pc_inc; return 1; - break; case mm_bne32_op: if (regs->regs[insn.mm_i_format.rs] != regs->regs[insn.mm_i_format.rt]) @@ -643,7 +629,6 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, *contpc = regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; return 1; - break; case mm_jalx32_op: regs->regs[31] = regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; @@ -652,7 +637,6 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, *contpc <<= 28; *contpc |= (insn.j_format.target << 2); return 1; - break; case mm_jals32_op: case mm_jal32_op: regs->regs[31] = regs->cp0_epc + @@ -665,7 +649,6 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, *contpc |= (insn.j_format.target << 1); set_isa16_mode(*contpc); return 1; - break; } return 0; } @@ -694,7 +677,6 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, case jr_op: *contpc = regs->regs[insn.r_format.rs]; return 1; - break; } break; case bcond_op: @@ -716,7 +698,6 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, dec_insn.pc_inc + dec_insn.next_pc_inc; return 1; - break; case bgezal_op: case bgezall_op: regs->regs[31] = regs->cp0_epc + @@ -734,7 +715,6 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, dec_insn.pc_inc + dec_insn.next_pc_inc; return 1; - break; } break; case jalx_op: @@ -752,7 +732,6 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, /* Set microMIPS mode bit: XOR for jalx. */ *contpc ^= bit; return 1; - break; case beq_op: case beql_op: if (regs->regs[insn.i_format.rs] == @@ -765,7 +744,6 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, dec_insn.pc_inc + dec_insn.next_pc_inc; return 1; - break; case bne_op: case bnel_op: if (regs->regs[insn.i_format.rs] != @@ -778,7 +756,6 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, dec_insn.pc_inc + dec_insn.next_pc_inc; return 1; - break; case blez_op: case blezl_op: if ((long)regs->regs[insn.i_format.rs] <= 0) @@ -790,7 +767,6 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, dec_insn.pc_inc + dec_insn.next_pc_inc; return 1; - break; case bgtz_op: case bgtzl_op: if ((long)regs->regs[insn.i_format.rs] > 0) @@ -802,7 +778,6 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, dec_insn.pc_inc + dec_insn.next_pc_inc; return 1; - break; #ifdef CONFIG_CPU_CAVIUM_OCTEON case lwc2_op: /* This is bbit0 on Octeon */ if ((regs->regs[insn.i_format.rs] & (1ull<<insn.i_format.rt)) == 0) @@ -856,7 +831,6 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, dec_insn.pc_inc + dec_insn.next_pc_inc; return 1; - break; case 1: /* bc1t */ case 3: /* bc1tl */ if (fcr31 & (1 << bit)) @@ -868,7 +842,6 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, dec_insn.pc_inc + dec_insn.next_pc_inc; return 1; - break; } } break; diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c index a0bcdbb81d41..c8efdb5b6ee0 100644 --- a/arch/mips/mm/c-octeon.c +++ b/arch/mips/mm/c-octeon.c @@ -19,6 +19,7 @@ #include <asm/bootinfo.h> #include <asm/cacheops.h> #include <asm/cpu-features.h> +#include <asm/cpu-type.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/r4kcache.h> @@ -186,9 +187,10 @@ static void probe_octeon(void) unsigned long dcache_size; unsigned int config1; struct cpuinfo_mips *c = ¤t_cpu_data; + int cputype = current_cpu_type(); config1 = read_c0_config1(); - switch (c->cputype) { + switch (cputype) { case CPU_CAVIUM_OCTEON: case CPU_CAVIUM_OCTEON_PLUS: c->icache.linesz = 2 << ((config1 >> 19) & 7); @@ -199,7 +201,7 @@ static void probe_octeon(void) c->icache.sets * c->icache.ways * c->icache.linesz; c->icache.waybit = ffs(icache_size / c->icache.ways) - 1; c->dcache.linesz = 128; - if (c->cputype == CPU_CAVIUM_OCTEON_PLUS) + if (cputype == CPU_CAVIUM_OCTEON_PLUS) c->dcache.sets = 2; /* CN5XXX has two Dcache sets */ else c->dcache.sets = 1; /* CN3XXX has one Dcache set */ @@ -224,6 +226,20 @@ static void probe_octeon(void) c->options |= MIPS_CPU_PREFETCH; break; + case CPU_CAVIUM_OCTEON3: + c->icache.linesz = 128; + c->icache.sets = 16; + c->icache.ways = 39; + c->icache.flags |= MIPS_CACHE_VTAG; + icache_size = c->icache.sets * c->icache.ways * c->icache.linesz; + + c->dcache.linesz = 128; + c->dcache.ways = 32; + c->dcache.sets = 8; + dcache_size = c->dcache.sets * c->dcache.ways * c->dcache.linesz; + c->options |= MIPS_CPU_PREFETCH; + break; + default: panic("Unsupported Cavium Networks CPU type"); break; diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index f749f687ee87..627883bc6d5f 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -12,6 +12,7 @@ #include <linux/highmem.h> #include <linux/kernel.h> #include <linux/linkage.h> +#include <linux/preempt.h> #include <linux/sched.h> #include <linux/smp.h> #include <linux/mm.h> @@ -24,6 +25,7 @@ #include <asm/cacheops.h> #include <asm/cpu.h> #include <asm/cpu-features.h> +#include <asm/cpu-type.h> #include <asm/io.h> #include <asm/page.h> #include <asm/pgtable.h> @@ -601,6 +603,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) /* Catch bad driver code */ BUG_ON(size == 0); + preempt_disable(); if (cpu_has_inclusive_pcaches) { if (size >= scache_size) r4k_blast_scache(); @@ -621,6 +624,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) R4600_HIT_CACHEOP_WAR_IMPL; blast_dcache_range(addr, addr + size); } + preempt_enable(); bc_wback_inv(addr, size); __sync(); @@ -631,6 +635,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) /* Catch bad driver code */ BUG_ON(size == 0); + preempt_disable(); if (cpu_has_inclusive_pcaches) { if (size >= scache_size) r4k_blast_scache(); @@ -655,6 +660,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) R4600_HIT_CACHEOP_WAR_IMPL; blast_inv_dcache_range(addr, addr + size); } + preempt_enable(); bc_inv(addr, size); __sync(); @@ -780,20 +786,30 @@ static inline void rm7k_erratum31(void) static inline void alias_74k_erratum(struct cpuinfo_mips *c) { + unsigned int imp = c->processor_id & PRID_IMP_MASK; + unsigned int rev = c->processor_id & PRID_REV_MASK; + /* * Early versions of the 74K do not update the cache tags on a * vtag miss/ptag hit which can occur in the case of KSEG0/KUSEG * aliases. In this case it is better to treat the cache as always * having aliases. */ - if ((c->processor_id & 0xff) <= PRID_REV_ENCODE_332(2, 4, 0)) - c->dcache.flags |= MIPS_CACHE_VTAG; - if ((c->processor_id & 0xff) == PRID_REV_ENCODE_332(2, 4, 0)) - write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND); - if (((c->processor_id & 0xff00) == PRID_IMP_1074K) && - ((c->processor_id & 0xff) <= PRID_REV_ENCODE_332(1, 1, 0))) { - c->dcache.flags |= MIPS_CACHE_VTAG; - write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND); + switch (imp) { + case PRID_IMP_74K: + if (rev <= PRID_REV_ENCODE_332(2, 4, 0)) + c->dcache.flags |= MIPS_CACHE_VTAG; + if (rev == PRID_REV_ENCODE_332(2, 4, 0)) + write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND); + break; + case PRID_IMP_1074K: + if (rev <= PRID_REV_ENCODE_332(1, 1, 0)) { + c->dcache.flags |= MIPS_CACHE_VTAG; + write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND); + } + break; + default: + BUG(); } } @@ -809,7 +825,7 @@ static void probe_pcache(void) unsigned long config1; unsigned int lsize; - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_R4600: /* QED style two way caches? */ case CPU_R4700: case CPU_R5000: @@ -1025,7 +1041,8 @@ static void probe_pcache(void) * presumably no vendor is shipping his hardware in the "bad" * configuration. */ - if ((prid & 0xff00) == PRID_IMP_R4000 && (prid & 0xff) < 0x40 && + if ((prid & PRID_IMP_MASK) == PRID_IMP_R4000 && + (prid & PRID_REV_MASK) < PRID_REV_R4400 && !(config & CONF_SC) && c->icache.linesz != 16 && PAGE_SIZE <= 0x8000) panic("Improper R4000SC processor configuration detected"); @@ -1045,7 +1062,7 @@ static void probe_pcache(void) * normally they'd suffer from aliases but magic in the hardware deals * with that for us so we don't need to take care ourselves. */ - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_20KC: case CPU_25KF: case CPU_SB1: @@ -1065,7 +1082,7 @@ static void probe_pcache(void) case CPU_34K: case CPU_74K: case CPU_1004K: - if (c->cputype == CPU_74K) + if (current_cpu_type() == CPU_74K) alias_74k_erratum(c); if ((read_c0_config7() & (1 << 16))) { /* effectively physically indexed dcache, @@ -1078,7 +1095,7 @@ static void probe_pcache(void) c->dcache.flags |= MIPS_CACHE_ALIASES; } - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_20KC: /* * Some older 20Kc chips doesn't have the 'VI' bit in @@ -1207,7 +1224,7 @@ static void setup_scache(void) * processors don't have a S-cache that would be relevant to the * Linux memory management. */ - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_R4000SC: case CPU_R4000MC: case CPU_R4400SC: @@ -1384,9 +1401,8 @@ static void r4k_cache_error_setup(void) { extern char __weak except_vec2_generic; extern char __weak except_vec2_sb1; - struct cpuinfo_mips *c = ¤t_cpu_data; - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_SB1: case CPU_SB1A: set_uncached_handler(0x100, &except_vec2_sb1, 0x80); diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index aaccf1c10699..f25a7e9f8cbc 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -18,6 +18,7 @@ #include <linux/highmem.h> #include <asm/cache.h> +#include <asm/cpu-type.h> #include <asm/io.h> #include <dma-coherence.h> @@ -50,16 +51,20 @@ static inline struct page *dma_addr_to_page(struct device *dev, } /* + * The affected CPUs below in 'cpu_needs_post_dma_flush()' can + * speculatively fill random cachelines with stale data at any time, + * requiring an extra flush post-DMA. + * * Warning on the terminology - Linux calls an uncached area coherent; * MIPS terminology calls memory areas with hardware maintained coherency * coherent. */ - -static inline int cpu_is_noncoherent_r10000(struct device *dev) +static inline int cpu_needs_post_dma_flush(struct device *dev) { return !plat_device_is_coherent(dev) && - (current_cpu_type() == CPU_R10000 || - current_cpu_type() == CPU_R12000); + (boot_cpu_type() == CPU_R10000 || + boot_cpu_type() == CPU_R12000 || + boot_cpu_type() == CPU_BMIPS5000); } static gfp_t massage_gfp_flags(const struct device *dev, gfp_t gfp) @@ -230,7 +235,7 @@ static inline void __dma_sync(struct page *page, static void mips_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction direction, struct dma_attrs *attrs) { - if (cpu_is_noncoherent_r10000(dev)) + if (cpu_needs_post_dma_flush(dev)) __dma_sync(dma_addr_to_page(dev, dma_addr), dma_addr & ~PAGE_MASK, size, direction); @@ -284,7 +289,7 @@ static void mips_dma_unmap_sg(struct device *dev, struct scatterlist *sg, static void mips_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction) { - if (cpu_is_noncoherent_r10000(dev)) + if (cpu_needs_post_dma_flush(dev)) __dma_sync(dma_addr_to_page(dev, dma_handle), dma_handle & ~PAGE_MASK, size, direction); } @@ -305,7 +310,7 @@ static void mips_dma_sync_sg_for_cpu(struct device *dev, /* Make sure that gcc doesn't leave the empty loop body. */ for (i = 0; i < nelems; i++, sg++) { - if (cpu_is_noncoherent_r10000(dev)) + if (cpu_needs_post_dma_flush(dev)) __dma_sync(sg_page(sg), sg->offset, sg->length, direction); } diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index 85df1cd8d446..becc42bb1849 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -42,8 +42,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write, const int field = sizeof(unsigned long) * 2; siginfo_t info; int fault; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (write ? FAULT_FLAG_WRITE : 0); + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; #if 0 printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", raw_smp_processor_id(), @@ -93,6 +92,8 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write, if (in_atomic() || !mm) goto bad_area_nosemaphore; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); @@ -114,6 +115,7 @@ good_area: if (write) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; + flags |= FAULT_FLAG_WRITE; } else { if (cpu_has_rixi) { if (address == regs->cp0_epc && !(vma->vm_flags & VM_EXEC)) { @@ -241,6 +243,8 @@ out_of_memory: * (which will retry the fault, or kill us if we got oom-killed). */ up_read(&mm->mmap_sem); + if (!user_mode(regs)) + goto no_context; pagefault_out_of_memory(); return; diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c index d4ea5c9c4a93..06ce17c2a905 100644 --- a/arch/mips/mm/gup.c +++ b/arch/mips/mm/gup.c @@ -12,6 +12,7 @@ #include <linux/swap.h> #include <linux/hugetlb.h> +#include <asm/cpu-features.h> #include <asm/pgtable.h> static inline pte_t gup_get_pte(pte_t *ptep) @@ -273,7 +274,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, len = (unsigned long) nr_pages << PAGE_SHIFT; end = start + len; - if (end < start) + if (end < start || cpu_has_dc_aliases) goto slow_irqon; /* XXX: batch / limit 'nr' */ diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 4e73f10a7519..e205ef598e97 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -254,6 +254,7 @@ void copy_from_user_page(struct vm_area_struct *vma, SetPageDcacheDirty(page); } } +EXPORT_SYMBOL_GPL(copy_from_user_page); void __init fixrange_init(unsigned long start, unsigned long end, pgd_t *pgd_base) diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c index 218c2109a55d..cbd81d17793a 100644 --- a/arch/mips/mm/page.c +++ b/arch/mips/mm/page.c @@ -18,6 +18,7 @@ #include <asm/bugs.h> #include <asm/cacheops.h> +#include <asm/cpu-type.h> #include <asm/inst.h> #include <asm/io.h> #include <asm/page.h> diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c index 5d01392e3518..08d05aee8788 100644 --- a/arch/mips/mm/sc-mips.c +++ b/arch/mips/mm/sc-mips.c @@ -6,6 +6,7 @@ #include <linux/sched.h> #include <linux/mm.h> +#include <asm/cpu-type.h> #include <asm/mipsregs.h> #include <asm/bcache.h> #include <asm/cacheops.h> @@ -71,7 +72,7 @@ static inline int mips_sc_is_activated(struct cpuinfo_mips *c) unsigned int tmp; /* Check the bypass bit (L2B) */ - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_34K: case CPU_74K: case CPU_1004K: diff --git a/arch/mips/mm/tlb-funcs.S b/arch/mips/mm/tlb-funcs.S index 30a494db99c2..79bca3130bd1 100644 --- a/arch/mips/mm/tlb-funcs.S +++ b/arch/mips/mm/tlb-funcs.S @@ -16,10 +16,12 @@ #define FASTPATH_SIZE 128 +#ifdef CONFIG_MIPS_PGD_C0_CONTEXT LEAF(tlbmiss_handler_setup_pgd) .space 16 * 4 END(tlbmiss_handler_setup_pgd) EXPORT(tlbmiss_handler_setup_pgd_end) +#endif LEAF(handle_tlbm) .space FASTPATH_SIZE * 4 diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index 00b26a67a06d..bb3a5f643e97 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -16,6 +16,7 @@ #include <linux/module.h> #include <asm/cpu.h> +#include <asm/cpu-type.h> #include <asm/bootinfo.h> #include <asm/mmu_context.h> #include <asm/pgtable.h> diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 556cb4815770..9bb3a9363b06 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -30,6 +30,7 @@ #include <linux/cache.h> #include <asm/cacheflush.h> +#include <asm/cpu-type.h> #include <asm/pgtable.h> #include <asm/war.h> #include <asm/uasm.h> @@ -85,6 +86,7 @@ static int use_bbit_insns(void) case CPU_CAVIUM_OCTEON: case CPU_CAVIUM_OCTEON_PLUS: case CPU_CAVIUM_OCTEON2: + case CPU_CAVIUM_OCTEON3: return 1; default: return 0; @@ -95,6 +97,7 @@ static int use_lwx_insns(void) { switch (current_cpu_type()) { case CPU_CAVIUM_OCTEON2: + case CPU_CAVIUM_OCTEON3: return 1; default: return 0; diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c index 53aad4a35375..a18af5fce67e 100644 --- a/arch/mips/mti-malta/malta-time.c +++ b/arch/mips/mti-malta/malta-time.c @@ -27,6 +27,7 @@ #include <linux/timex.h> #include <linux/mc146818rtc.h> +#include <asm/cpu.h> #include <asm/mipsregs.h> #include <asm/mipsmtregs.h> #include <asm/hardirq.h> @@ -76,7 +77,7 @@ static void __init estimate_frequencies(void) #endif #if defined (CONFIG_KVM_GUEST) && defined (CONFIG_KVM_HOST_FREQ) - unsigned int prid = read_c0_prid() & 0xffff00; + unsigned int prid = read_c0_prid() & (PRID_COMP_MASK | PRID_IMP_MASK); /* * XXXKYMA: hardwire the CPU frequency to Host Freq/4 @@ -169,7 +170,7 @@ unsigned int get_c0_compare_int(void) void __init plat_time_init(void) { - unsigned int prid = read_c0_prid() & 0xffff00; + unsigned int prid = read_c0_prid() & (PRID_COMP_MASK | PRID_IMP_MASK); unsigned int freq; estimate_frequencies(); diff --git a/arch/mips/mti-sead3/sead3-time.c b/arch/mips/mti-sead3/sead3-time.c index a43ea3cc0a3b..552d26c34386 100644 --- a/arch/mips/mti-sead3/sead3-time.c +++ b/arch/mips/mti-sead3/sead3-time.c @@ -7,6 +7,7 @@ */ #include <linux/init.h> +#include <asm/cpu.h> #include <asm/setup.h> #include <asm/time.h> #include <asm/irq.h> @@ -34,7 +35,7 @@ static void __iomem *status_reg = (void __iomem *)0xbf000410; */ static unsigned int __init estimate_cpu_frequency(void) { - unsigned int prid = read_c0_prid() & 0xffff00; + unsigned int prid = read_c0_prid() & (PRID_COMP_MASK | PRID_IMP_MASK); unsigned int tick = 0; unsigned int freq; unsigned int orig; diff --git a/arch/mips/netlogic/Kconfig b/arch/mips/netlogic/Kconfig index 2447bf97d35a..852a4ee09954 100644 --- a/arch/mips/netlogic/Kconfig +++ b/arch/mips/netlogic/Kconfig @@ -19,6 +19,15 @@ config DT_XLP_SVP pointer to the kernel. The corresponding DTS file is at arch/mips/netlogic/dts/xlp_svp.dts +config DT_XLP_FVP + bool "Built-in device tree for XLP FVP boards" + default y + help + Add an FDT blob for XLP FVP board into the kernel. + This DTB will be used if the firmware does not pass in a DTB + pointer to the kernel. The corresponding DTS file is at + arch/mips/netlogic/dts/xlp_fvp.dts + config NLM_MULTINODE bool "Support for multi-chip boards" depends on NLM_XLP_BOARD diff --git a/arch/mips/netlogic/common/smp.c b/arch/mips/netlogic/common/smp.c index 4e35d9c453e2..6f8feb9efcff 100644 --- a/arch/mips/netlogic/common/smp.c +++ b/arch/mips/netlogic/common/smp.c @@ -106,9 +106,7 @@ void nlm_early_init_secondary(int cpu) { change_c0_config(CONF_CM_CMASK, 0x3); #ifdef CONFIG_CPU_XLP - /* mmu init, once per core */ - if (cpu % NLM_THREADS_PER_CORE == 0) - xlp_mmu_init(); + xlp_mmu_init(); #endif write_c0_ebase(nlm_current_node()->ebase); } diff --git a/arch/mips/netlogic/common/time.c b/arch/mips/netlogic/common/time.c index 045a396c57ce..13391b8a6031 100644 --- a/arch/mips/netlogic/common/time.c +++ b/arch/mips/netlogic/common/time.c @@ -45,6 +45,7 @@ #if defined(CONFIG_CPU_XLP) #include <asm/netlogic/xlp-hal/iomap.h> #include <asm/netlogic/xlp-hal/xlp.h> +#include <asm/netlogic/xlp-hal/sys.h> #include <asm/netlogic/xlp-hal/pic.h> #elif defined(CONFIG_CPU_XLR) #include <asm/netlogic/xlr/iomap.h> @@ -91,7 +92,7 @@ static void nlm_init_pic_timer(void) csrc_pic.read = nlm_get_pic_timer; } csrc_pic.rating = 1000; - clocksource_register_hz(&csrc_pic, PIC_CLK_HZ); + clocksource_register_hz(&csrc_pic, pic_timer_freq()); } void __init plat_time_init(void) diff --git a/arch/mips/netlogic/dts/Makefile b/arch/mips/netlogic/dts/Makefile index aecb6fa9a9c3..0b9be5fd2e46 100644 --- a/arch/mips/netlogic/dts/Makefile +++ b/arch/mips/netlogic/dts/Makefile @@ -1,2 +1,3 @@ obj-$(CONFIG_DT_XLP_EVP) := xlp_evp.dtb.o obj-$(CONFIG_DT_XLP_SVP) += xlp_svp.dtb.o +obj-$(CONFIG_DT_XLP_FVP) += xlp_fvp.dtb.o diff --git a/arch/mips/netlogic/dts/xlp_evp.dts b/arch/mips/netlogic/dts/xlp_evp.dts index 06407033678e..89ad04808c02 100644 --- a/arch/mips/netlogic/dts/xlp_evp.dts +++ b/arch/mips/netlogic/dts/xlp_evp.dts @@ -9,19 +9,12 @@ #address-cells = <2>; #size-cells = <2>; - memory { - device_type = "memory"; - reg = <0 0x00100000 0 0x0FF00000 // 255M at 1M - 0 0x20000000 0 0xa0000000 // 2560M at 512M - 0 0xe0000000 1 0x00000000>; - }; - soc { #address-cells = <2>; #size-cells = <1>; compatible = "simple-bus"; ranges = <0 0 0 0x18000000 0x04000000 // PCIe CFG - 1 0 0 0x16000000 0x01000000>; // GBU chipselects + 1 0 0 0x16000000 0x02000000>; // GBU chipselects serial0: serial@30000 { device_type = "serial"; diff --git a/arch/mips/netlogic/dts/xlp_fvp.dts b/arch/mips/netlogic/dts/xlp_fvp.dts new file mode 100644 index 000000000000..63e62b7bd758 --- /dev/null +++ b/arch/mips/netlogic/dts/xlp_fvp.dts @@ -0,0 +1,118 @@ +/* + * XLP2XX Device Tree Source for FVP boards + */ + +/dts-v1/; +/ { + model = "netlogic,XLP-FVP"; + compatible = "netlogic,xlp"; + #address-cells = <2>; + #size-cells = <2>; + + soc { + #address-cells = <2>; + #size-cells = <1>; + compatible = "simple-bus"; + ranges = <0 0 0 0x18000000 0x04000000 // PCIe CFG + 1 0 0 0x16000000 0x02000000>; // GBU chipselects + + serial0: serial@30000 { + device_type = "serial"; + compatible = "ns16550"; + reg = <0 0x30100 0xa00>; + reg-shift = <2>; + reg-io-width = <4>; + clock-frequency = <133333333>; + interrupt-parent = <&pic>; + interrupts = <17>; + }; + serial1: serial@31000 { + device_type = "serial"; + compatible = "ns16550"; + reg = <0 0x31100 0xa00>; + reg-shift = <2>; + reg-io-width = <4>; + clock-frequency = <133333333>; + interrupt-parent = <&pic>; + interrupts = <18>; + }; + i2c0: ocores@37100 { + compatible = "opencores,i2c-ocores"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0 0x37100 0x20>; + reg-shift = <2>; + reg-io-width = <4>; + clock-frequency = <32000000>; + interrupt-parent = <&pic>; + interrupts = <30>; + }; + i2c1: ocores@37120 { + compatible = "opencores,i2c-ocores"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0 0x37120 0x20>; + reg-shift = <2>; + reg-io-width = <4>; + clock-frequency = <32000000>; + interrupt-parent = <&pic>; + interrupts = <31>; + + rtc@68 { + compatible = "dallas,ds1374"; + reg = <0x68>; + }; + + dtt@4c { + compatible = "national,lm90"; + reg = <0x4c>; + }; + }; + pic: pic@4000 { + compatible = "netlogic,xlp-pic"; + #address-cells = <0>; + #interrupt-cells = <1>; + reg = <0 0x4000 0x200>; + interrupt-controller; + }; + + nor_flash@1,0 { + compatible = "cfi-flash"; + #address-cells = <1>; + #size-cells = <1>; + bank-width = <2>; + reg = <1 0 0x1000000>; + + partition@0 { + label = "x-loader"; + reg = <0x0 0x100000>; /* 1M */ + read-only; + }; + + partition@100000 { + label = "u-boot"; + reg = <0x100000 0x100000>; /* 1M */ + }; + + partition@200000 { + label = "kernel"; + reg = <0x200000 0x500000>; /* 5M */ + }; + + partition@700000 { + label = "rootfs"; + reg = <0x700000 0x800000>; /* 8M */ + }; + + partition@f00000 { + label = "env"; + reg = <0xf00000 0x100000>; /* 1M */ + read-only; + }; + }; + }; + + chosen { + bootargs = "console=ttyS0,115200 rdinit=/sbin/init"; + }; +}; diff --git a/arch/mips/netlogic/dts/xlp_svp.dts b/arch/mips/netlogic/dts/xlp_svp.dts index 9c5db102df53..1ebd00edaacc 100644 --- a/arch/mips/netlogic/dts/xlp_svp.dts +++ b/arch/mips/netlogic/dts/xlp_svp.dts @@ -9,19 +9,12 @@ #address-cells = <2>; #size-cells = <2>; - memory { - device_type = "memory"; - reg = <0 0x00100000 0 0x0FF00000 // 255M at 1M - 0 0x20000000 0 0xa0000000 // 2560M at 512M - 0 0xe0000000 0 0x40000000>; - }; - soc { #address-cells = <2>; #size-cells = <1>; compatible = "simple-bus"; ranges = <0 0 0 0x18000000 0x04000000 // PCIe CFG - 1 0 0 0x16000000 0x01000000>; // GBU chipselects + 1 0 0 0x16000000 0x02000000>; // GBU chipselects serial0: serial@30000 { device_type = "serial"; diff --git a/arch/mips/netlogic/xlp/Makefile b/arch/mips/netlogic/xlp/Makefile index 85ac4a892ced..ed9a93c04650 100644 --- a/arch/mips/netlogic/xlp/Makefile +++ b/arch/mips/netlogic/xlp/Makefile @@ -1,3 +1,4 @@ obj-y += setup.o nlm_hal.o cop2-ex.o dt.o obj-$(CONFIG_SMP) += wakeup.o obj-$(CONFIG_USB) += usb-init.o +obj-$(CONFIG_USB) += usb-init-xlp2.o diff --git a/arch/mips/netlogic/xlp/dt.c b/arch/mips/netlogic/xlp/dt.c index a15cdbb8d0bd..88df445dda76 100644 --- a/arch/mips/netlogic/xlp/dt.c +++ b/arch/mips/netlogic/xlp/dt.c @@ -39,12 +39,18 @@ #include <linux/of_platform.h> #include <linux/of_device.h> -extern u32 __dtb_xlp_evp_begin[], __dtb_xlp_svp_begin[], __dtb_start[]; +extern u32 __dtb_xlp_evp_begin[], __dtb_xlp_svp_begin[], + __dtb_xlp_fvp_begin[], __dtb_start[]; void __init *xlp_dt_init(void *fdtp) { if (!fdtp) { switch (current_cpu_data.processor_id & 0xff00) { +#ifdef CONFIG_DT_XLP_FVP + case PRID_IMP_NETLOGIC_XLP2XX: + fdtp = __dtb_xlp_fvp_begin; + break; +#endif #ifdef CONFIG_DT_XLP_SVP case PRID_IMP_NETLOGIC_XLP3XX: fdtp = __dtb_xlp_svp_begin; diff --git a/arch/mips/netlogic/xlp/nlm_hal.c b/arch/mips/netlogic/xlp/nlm_hal.c index 87560e4db35f..56c50ba43c9b 100644 --- a/arch/mips/netlogic/xlp/nlm_hal.c +++ b/arch/mips/netlogic/xlp/nlm_hal.c @@ -44,6 +44,7 @@ #include <asm/netlogic/haldefs.h> #include <asm/netlogic/xlp-hal/iomap.h> #include <asm/netlogic/xlp-hal/xlp.h> +#include <asm/netlogic/xlp-hal/bridge.h> #include <asm/netlogic/xlp-hal/pic.h> #include <asm/netlogic/xlp-hal/sys.h> @@ -64,6 +65,7 @@ int nlm_irq_to_irt(int irq) uint64_t pcibase; int devoff, irt; + devoff = 0; switch (irq) { case PIC_UART_0_IRQ: devoff = XLP_IO_UART0_OFFSET(0); @@ -71,44 +73,68 @@ int nlm_irq_to_irt(int irq) case PIC_UART_1_IRQ: devoff = XLP_IO_UART1_OFFSET(0); break; - case PIC_EHCI_0_IRQ: - devoff = XLP_IO_USB_EHCI0_OFFSET(0); - break; - case PIC_EHCI_1_IRQ: - devoff = XLP_IO_USB_EHCI1_OFFSET(0); - break; - case PIC_OHCI_0_IRQ: - devoff = XLP_IO_USB_OHCI0_OFFSET(0); - break; - case PIC_OHCI_1_IRQ: - devoff = XLP_IO_USB_OHCI1_OFFSET(0); - break; - case PIC_OHCI_2_IRQ: - devoff = XLP_IO_USB_OHCI2_OFFSET(0); - break; - case PIC_OHCI_3_IRQ: - devoff = XLP_IO_USB_OHCI3_OFFSET(0); - break; case PIC_MMC_IRQ: devoff = XLP_IO_SD_OFFSET(0); break; - case PIC_I2C_0_IRQ: - devoff = XLP_IO_I2C0_OFFSET(0); - break; + case PIC_I2C_0_IRQ: /* I2C will be fixed up */ case PIC_I2C_1_IRQ: - devoff = XLP_IO_I2C1_OFFSET(0); + case PIC_I2C_2_IRQ: + case PIC_I2C_3_IRQ: + if (cpu_is_xlpii()) + devoff = XLP2XX_IO_I2C_OFFSET(0); + else + devoff = XLP_IO_I2C0_OFFSET(0); break; default: - devoff = 0; - break; + if (cpu_is_xlpii()) { + switch (irq) { + /* XLP2XX has three XHCI USB controller */ + case PIC_2XX_XHCI_0_IRQ: + devoff = XLP2XX_IO_USB_XHCI0_OFFSET(0); + break; + case PIC_2XX_XHCI_1_IRQ: + devoff = XLP2XX_IO_USB_XHCI1_OFFSET(0); + break; + case PIC_2XX_XHCI_2_IRQ: + devoff = XLP2XX_IO_USB_XHCI2_OFFSET(0); + break; + } + } else { + switch (irq) { + case PIC_EHCI_0_IRQ: + devoff = XLP_IO_USB_EHCI0_OFFSET(0); + break; + case PIC_EHCI_1_IRQ: + devoff = XLP_IO_USB_EHCI1_OFFSET(0); + break; + case PIC_OHCI_0_IRQ: + devoff = XLP_IO_USB_OHCI0_OFFSET(0); + break; + case PIC_OHCI_1_IRQ: + devoff = XLP_IO_USB_OHCI1_OFFSET(0); + break; + case PIC_OHCI_2_IRQ: + devoff = XLP_IO_USB_OHCI2_OFFSET(0); + break; + case PIC_OHCI_3_IRQ: + devoff = XLP_IO_USB_OHCI3_OFFSET(0); + break; + } + } } if (devoff != 0) { pcibase = nlm_pcicfg_base(devoff); irt = nlm_read_reg(pcibase, XLP_PCI_IRTINFO_REG) & 0xffff; - /* HW bug, I2C 1 irt entry is off by one */ - if (irq == PIC_I2C_1_IRQ) - irt = irt + 1; + /* HW weirdness, I2C IRT entry has to be fixed up */ + switch (irq) { + case PIC_I2C_1_IRQ: + irt = irt + 1; break; + case PIC_I2C_2_IRQ: + irt = irt + 2; break; + case PIC_I2C_3_IRQ: + irt = irt + 3; break; + } } else if (irq >= PIC_PCIE_LINK_0_IRQ && irq <= PIC_PCIE_LINK_3_IRQ) { /* HW bug, PCI IRT entries are bad on early silicon, fix */ irt = PIC_IRT_PCIE_LINK_INDEX(irq - PIC_PCIE_LINK_0_IRQ); @@ -126,19 +152,160 @@ unsigned int nlm_get_core_frequency(int node, int core) sysbase = nlm_get_node(node)->sysbase; rstval = nlm_read_sys_reg(sysbase, SYS_POWER_ON_RESET_CFG); - dfsval = nlm_read_sys_reg(sysbase, SYS_CORE_DFS_DIV_VALUE); - pll_divf = ((rstval >> 10) & 0x7f) + 1; - pll_divr = ((rstval >> 8) & 0x3) + 1; - ext_div = ((rstval >> 30) & 0x3) + 1; - dfs_div = ((dfsval >> (core * 4)) & 0xf) + 1; - - num = 800000000ULL * pll_divf; - denom = 3 * pll_divr * ext_div * dfs_div; + if (cpu_is_xlpii()) { + num = 1000000ULL * (400 * 3 + 100 * (rstval >> 26)); + denom = 3; + } else { + dfsval = nlm_read_sys_reg(sysbase, SYS_CORE_DFS_DIV_VALUE); + pll_divf = ((rstval >> 10) & 0x7f) + 1; + pll_divr = ((rstval >> 8) & 0x3) + 1; + ext_div = ((rstval >> 30) & 0x3) + 1; + dfs_div = ((dfsval >> (core * 4)) & 0xf) + 1; + + num = 800000000ULL * pll_divf; + denom = 3 * pll_divr * ext_div * dfs_div; + } do_div(num, denom); return (unsigned int)num; } +/* Calculate Frequency to the PIC from PLL. + * freq_out = ( ref_freq/2 * (6 + ctrl2[7:0]) + ctrl2[20:8]/2^13 ) / + * ((2^ctrl0[7:5]) * Table(ctrl0[26:24])) + */ +static unsigned int nlm_2xx_get_pic_frequency(int node) +{ + u32 ctrl_val0, ctrl_val2, vco_post_div, pll_post_div; + u32 mdiv, fdiv, pll_out_freq_den, reg_select, ref_div, pic_div; + u64 ref_clk, sysbase, pll_out_freq_num, ref_clk_select; + + sysbase = nlm_get_node(node)->sysbase; + + /* Find ref_clk_base */ + ref_clk_select = + (nlm_read_sys_reg(sysbase, SYS_POWER_ON_RESET_CFG) >> 18) & 0x3; + switch (ref_clk_select) { + case 0: + ref_clk = 200000000ULL; + ref_div = 3; + break; + case 1: + ref_clk = 100000000ULL; + ref_div = 1; + break; + case 2: + ref_clk = 125000000ULL; + ref_div = 1; + break; + case 3: + ref_clk = 400000000ULL; + ref_div = 3; + break; + } + + /* Find the clock source PLL device for PIC */ + reg_select = (nlm_read_sys_reg(sysbase, SYS_CLK_DEV_SEL) >> 22) & 0x3; + switch (reg_select) { + case 0: + ctrl_val0 = nlm_read_sys_reg(sysbase, SYS_PLL_CTRL0); + ctrl_val2 = nlm_read_sys_reg(sysbase, SYS_PLL_CTRL2); + break; + case 1: + ctrl_val0 = nlm_read_sys_reg(sysbase, SYS_PLL_CTRL0_DEVX(0)); + ctrl_val2 = nlm_read_sys_reg(sysbase, SYS_PLL_CTRL2_DEVX(0)); + break; + case 2: + ctrl_val0 = nlm_read_sys_reg(sysbase, SYS_PLL_CTRL0_DEVX(1)); + ctrl_val2 = nlm_read_sys_reg(sysbase, SYS_PLL_CTRL2_DEVX(1)); + break; + case 3: + ctrl_val0 = nlm_read_sys_reg(sysbase, SYS_PLL_CTRL0_DEVX(2)); + ctrl_val2 = nlm_read_sys_reg(sysbase, SYS_PLL_CTRL2_DEVX(2)); + break; + } + + vco_post_div = (ctrl_val0 >> 5) & 0x7; + pll_post_div = (ctrl_val0 >> 24) & 0x7; + mdiv = ctrl_val2 & 0xff; + fdiv = (ctrl_val2 >> 8) & 0xfff; + + /* Find PLL post divider value */ + switch (pll_post_div) { + case 1: + pll_post_div = 2; + break; + case 3: + pll_post_div = 4; + break; + case 7: + pll_post_div = 8; + break; + case 6: + pll_post_div = 16; + break; + case 0: + default: + pll_post_div = 1; + break; + } + + fdiv = fdiv/(1 << 13); + pll_out_freq_num = ((ref_clk >> 1) * (6 + mdiv)) + fdiv; + pll_out_freq_den = (1 << vco_post_div) * pll_post_div * 3; + + if (pll_out_freq_den > 0) + do_div(pll_out_freq_num, pll_out_freq_den); + + /* PIC post divider, which happens after PLL */ + pic_div = (nlm_read_sys_reg(sysbase, SYS_CLK_DEV_DIV) >> 22) & 0x3; + do_div(pll_out_freq_num, 1 << pic_div); + + return pll_out_freq_num; +} + +unsigned int nlm_get_pic_frequency(int node) +{ + if (cpu_is_xlpii()) + return nlm_2xx_get_pic_frequency(node); + else + return 133333333; +} + unsigned int nlm_get_cpu_frequency(void) { return nlm_get_core_frequency(0, 0); } + +/* + * Fills upto 8 pairs of entries containing the DRAM map of a node + * if n < 0, get dram map for all nodes + */ +int xlp_get_dram_map(int n, uint64_t *dram_map) +{ + uint64_t bridgebase, base, lim; + uint32_t val; + int i, node, rv; + + /* Look only at mapping on Node 0, we don't handle crazy configs */ + bridgebase = nlm_get_bridge_regbase(0); + rv = 0; + for (i = 0; i < 8; i++) { + val = nlm_read_bridge_reg(bridgebase, + BRIDGE_DRAM_NODE_TRANSLN(i)); + node = (val >> 1) & 0x3; + if (n >= 0 && n != node) + continue; + val = nlm_read_bridge_reg(bridgebase, BRIDGE_DRAM_BAR(i)); + val = (val >> 12) & 0xfffff; + base = (uint64_t) val << 20; + val = nlm_read_bridge_reg(bridgebase, BRIDGE_DRAM_LIMIT(i)); + val = (val >> 12) & 0xfffff; + if (val == 0) /* BAR not used */ + continue; + lim = ((uint64_t)val + 1) << 20; + dram_map[rv] = base; + dram_map[rv + 1] = lim; + rv += 2; + } + return rv; +} diff --git a/arch/mips/netlogic/xlp/setup.c b/arch/mips/netlogic/xlp/setup.c index 7b638f7be491..76a7131e486e 100644 --- a/arch/mips/netlogic/xlp/setup.c +++ b/arch/mips/netlogic/xlp/setup.c @@ -73,6 +73,23 @@ static void nlm_fixup_mem(void) } } +static void __init xlp_init_mem_from_bars(void) +{ + uint64_t map[16]; + int i, n; + + n = xlp_get_dram_map(-1, map); /* -1: info for all nodes */ + for (i = 0; i < n; i += 2) { + /* exclude 0x1000_0000-0x2000_0000, u-boot device */ + if (map[i] <= 0x10000000 && map[i+1] > 0x10000000) + map[i+1] = 0x10000000; + if (map[i] > 0x10000000 && map[i] < 0x20000000) + map[i] = 0x20000000; + + add_memory_region(map[i], map[i+1] - map[i], BOOT_MEM_RAM); + } +} + void __init plat_mem_setup(void) { panic_timeout = 5; @@ -82,12 +99,23 @@ void __init plat_mem_setup(void) /* memory and bootargs from DT */ early_init_devtree(initial_boot_params); + + if (boot_mem_map.nr_map == 0) { + pr_info("Using DRAM BARs for memory map.\n"); + xlp_init_mem_from_bars(); + } + /* Calculate and setup wired entries for mapped kernel */ nlm_fixup_mem(); } const char *get_system_type(void) { - return "Netlogic XLP Series"; + switch (read_c0_prid() & 0xff00) { + case PRID_IMP_NETLOGIC_XLP2XX: + return "Broadcom XLPII Series"; + default: + return "Netlogic XLP Series"; + } } void __init prom_free_prom_memory(void) @@ -97,12 +125,20 @@ void __init prom_free_prom_memory(void) void xlp_mmu_init(void) { - /* enable extended TLB and Large Fixed TLB */ - write_c0_config6(read_c0_config6() | 0x24); - - /* set page mask of Fixed TLB in config7 */ - write_c0_config7(PM_DEFAULT_MASK >> - (13 + (ffz(PM_DEFAULT_MASK >> 13) / 2))); + u32 conf4; + + if (cpu_is_xlpii()) { + /* XLPII series has extended pagesize in config 4 */ + conf4 = read_c0_config4() & ~0x1f00u; + write_c0_config4(conf4 | ((PAGE_SHIFT - 10) / 2 << 8)); + } else { + /* enable extended TLB and Large Fixed TLB */ + write_c0_config6(read_c0_config6() | 0x24); + + /* set page mask of extended Fixed TLB in config7 */ + write_c0_config7(PM_DEFAULT_MASK >> + (13 + (ffz(PM_DEFAULT_MASK >> 13) / 2))); + } } void nlm_percpu_init(int hwcpuid) diff --git a/arch/mips/netlogic/xlp/usb-init-xlp2.c b/arch/mips/netlogic/xlp/usb-init-xlp2.c new file mode 100644 index 000000000000..36e9c22afc46 --- /dev/null +++ b/arch/mips/netlogic/xlp/usb-init-xlp2.c @@ -0,0 +1,218 @@ +/* + * Copyright (c) 2003-2013 Broadcom Corporation + * All Rights Reserved + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the Broadcom + * license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY BROADCOM ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL BROADCOM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/dma-mapping.h> +#include <linux/kernel.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/platform_device.h> +#include <linux/irq.h> + +#include <asm/netlogic/common.h> +#include <asm/netlogic/haldefs.h> +#include <asm/netlogic/xlp-hal/iomap.h> +#include <asm/netlogic/xlp-hal/xlp.h> + +#define XLPII_USB3_CTL_0 0xc0 +#define XLPII_VAUXRST BIT(0) +#define XLPII_VCCRST BIT(1) +#define XLPII_NUM2PORT 9 +#define XLPII_NUM3PORT 13 +#define XLPII_RTUNEREQ BIT(20) +#define XLPII_MS_CSYSREQ BIT(21) +#define XLPII_XS_CSYSREQ BIT(22) +#define XLPII_RETENABLEN BIT(23) +#define XLPII_TX2RX BIT(24) +#define XLPII_XHCIREV BIT(25) +#define XLPII_ECCDIS BIT(26) + +#define XLPII_USB3_INT_REG 0xc2 +#define XLPII_USB3_INT_MASK 0xc3 + +#define XLPII_USB_PHY_TEST 0xc6 +#define XLPII_PRESET BIT(0) +#define XLPII_ATERESET BIT(1) +#define XLPII_LOOPEN BIT(2) +#define XLPII_TESTPDHSP BIT(3) +#define XLPII_TESTPDSSP BIT(4) +#define XLPII_TESTBURNIN BIT(5) + +#define XLPII_USB_PHY_LOS_LV 0xc9 +#define XLPII_LOSLEV 0 +#define XLPII_LOSBIAS 5 +#define XLPII_SQRXTX 8 +#define XLPII_TXBOOST 11 +#define XLPII_RSLKSEL 16 +#define XLPII_FSEL 20 + +#define XLPII_USB_RFCLK_REG 0xcc +#define XLPII_VVLD 30 + +#define nlm_read_usb_reg(b, r) nlm_read_reg(b, r) +#define nlm_write_usb_reg(b, r, v) nlm_write_reg(b, r, v) + +#define nlm_xlpii_get_usb_pcibase(node, inst) \ + nlm_pcicfg_base(XLP2XX_IO_USB_OFFSET(node, inst)) +#define nlm_xlpii_get_usb_regbase(node, inst) \ + (nlm_xlpii_get_usb_pcibase(node, inst) + XLP_IO_PCI_HDRSZ) + +static void xlpii_usb_ack(struct irq_data *data) +{ + u64 port_addr; + + switch (data->irq) { + case PIC_2XX_XHCI_0_IRQ: + port_addr = nlm_xlpii_get_usb_regbase(0, 1); + break; + case PIC_2XX_XHCI_1_IRQ: + port_addr = nlm_xlpii_get_usb_regbase(0, 2); + break; + case PIC_2XX_XHCI_2_IRQ: + port_addr = nlm_xlpii_get_usb_regbase(0, 3); + break; + default: + pr_err("No matching USB irq!\n"); + return; + } + nlm_write_usb_reg(port_addr, XLPII_USB3_INT_REG, 0xffffffff); +} + +static void nlm_xlpii_usb_hw_reset(int node, int port) +{ + u64 port_addr, xhci_base, pci_base; + void __iomem *corebase; + u32 val; + + port_addr = nlm_xlpii_get_usb_regbase(node, port); + + /* Set frequency */ + val = nlm_read_usb_reg(port_addr, XLPII_USB_PHY_LOS_LV); + val &= ~(0x3f << XLPII_FSEL); + val |= (0x27 << XLPII_FSEL); + nlm_write_usb_reg(port_addr, XLPII_USB_PHY_LOS_LV, val); + + val = nlm_read_usb_reg(port_addr, XLPII_USB_RFCLK_REG); + val |= (1 << XLPII_VVLD); + nlm_write_usb_reg(port_addr, XLPII_USB_RFCLK_REG, val); + + /* PHY reset */ + val = nlm_read_usb_reg(port_addr, XLPII_USB_PHY_TEST); + val &= (XLPII_ATERESET | XLPII_LOOPEN | XLPII_TESTPDHSP + | XLPII_TESTPDSSP | XLPII_TESTBURNIN); + nlm_write_usb_reg(port_addr, XLPII_USB_PHY_TEST, val); + + /* Setup control register */ + val = XLPII_VAUXRST | XLPII_VCCRST | (1 << XLPII_NUM2PORT) + | (1 << XLPII_NUM3PORT) | XLPII_MS_CSYSREQ | XLPII_XS_CSYSREQ + | XLPII_RETENABLEN | XLPII_XHCIREV; + nlm_write_usb_reg(port_addr, XLPII_USB3_CTL_0, val); + + /* Enable interrupts */ + nlm_write_usb_reg(port_addr, XLPII_USB3_INT_MASK, 0x00000001); + + /* Clear all interrupts */ + nlm_write_usb_reg(port_addr, XLPII_USB3_INT_REG, 0xffffffff); + + udelay(2000); + + /* XHCI configuration at PCI mem */ + pci_base = nlm_xlpii_get_usb_pcibase(node, port); + xhci_base = nlm_read_usb_reg(pci_base, 0x4) & ~0xf; + corebase = ioremap(xhci_base, 0x10000); + if (!corebase) + return; + + writel(0x240002, corebase + 0xc2c0); + /* GCTL 0xc110 */ + val = readl(corebase + 0xc110); + val &= ~(0x3 << 12); + val |= (1 << 12); + writel(val, corebase + 0xc110); + udelay(100); + + /* PHYCFG 0xc200 */ + val = readl(corebase + 0xc200); + val &= ~(1 << 6); + writel(val, corebase + 0xc200); + udelay(100); + + /* PIPECTL 0xc2c0 */ + val = readl(corebase + 0xc2c0); + val &= ~(1 << 17); + writel(val, corebase + 0xc2c0); + + iounmap(corebase); +} + +static int __init nlm_platform_xlpii_usb_init(void) +{ + if (!cpu_is_xlpii()) + return 0; + + pr_info("Initializing 2XX USB Interface\n"); + nlm_xlpii_usb_hw_reset(0, 1); + nlm_xlpii_usb_hw_reset(0, 2); + nlm_xlpii_usb_hw_reset(0, 3); + nlm_set_pic_extra_ack(0, PIC_2XX_XHCI_0_IRQ, xlpii_usb_ack); + nlm_set_pic_extra_ack(0, PIC_2XX_XHCI_1_IRQ, xlpii_usb_ack); + nlm_set_pic_extra_ack(0, PIC_2XX_XHCI_2_IRQ, xlpii_usb_ack); + + return 0; +} + +arch_initcall(nlm_platform_xlpii_usb_init); + +static u64 xlp_usb_dmamask = ~(u32)0; + +/* Fixup IRQ for USB devices on XLP the SoC PCIe bus */ +static void nlm_usb_fixup_final(struct pci_dev *dev) +{ + dev->dev.dma_mask = &xlp_usb_dmamask; + dev->dev.coherent_dma_mask = DMA_BIT_MASK(32); + switch (dev->devfn) { + case 0x21: + dev->irq = PIC_2XX_XHCI_0_IRQ; + break; + case 0x22: + dev->irq = PIC_2XX_XHCI_1_IRQ; + break; + case 0x23: + dev->irq = PIC_2XX_XHCI_2_IRQ; + break; + } +} + +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_NETLOGIC, PCI_DEVICE_ID_NLM_XHCI, + nlm_usb_fixup_final); diff --git a/arch/mips/netlogic/xlp/usb-init.c b/arch/mips/netlogic/xlp/usb-init.c index ef3897ef0dc7..f8117985f0f8 100644 --- a/arch/mips/netlogic/xlp/usb-init.c +++ b/arch/mips/netlogic/xlp/usb-init.c @@ -75,8 +75,7 @@ static void nlm_usb_intr_en(int node, int port) port_addr = nlm_get_usb_regbase(node, port); val = nlm_read_usb_reg(port_addr, USB_INT_EN); val = USB_CTRL_INTERRUPT_EN | USB_OHCI_INTERRUPT_EN | - USB_OHCI_INTERRUPT1_EN | USB_CTRL_INTERRUPT_EN | - USB_OHCI_INTERRUPT_EN | USB_OHCI_INTERRUPT2_EN; + USB_OHCI_INTERRUPT1_EN | USB_OHCI_INTERRUPT2_EN; nlm_write_usb_reg(port_addr, USB_INT_EN, val); } @@ -100,6 +99,9 @@ static void nlm_usb_hw_reset(int node, int port) static int __init nlm_platform_usb_init(void) { + if (cpu_is_xlpii()) + return 0; + pr_info("Initializing USB Interface\n"); nlm_usb_hw_reset(0, 0); nlm_usb_hw_reset(0, 3); diff --git a/arch/mips/netlogic/xlp/wakeup.c b/arch/mips/netlogic/xlp/wakeup.c index 0cce37cbffef..682d5638dc01 100644 --- a/arch/mips/netlogic/xlp/wakeup.c +++ b/arch/mips/netlogic/xlp/wakeup.c @@ -58,10 +58,12 @@ static int xlp_wakeup_core(uint64_t sysbase, int node, int core) coremask = (1 << core); - /* Enable CPU clock */ - value = nlm_read_sys_reg(sysbase, SYS_CORE_DFS_DIS_CTRL); - value &= ~coremask; - nlm_write_sys_reg(sysbase, SYS_CORE_DFS_DIS_CTRL, value); + /* Enable CPU clock in case of 8xx/3xx */ + if (!cpu_is_xlpii()) { + value = nlm_read_sys_reg(sysbase, SYS_CORE_DFS_DIS_CTRL); + value &= ~coremask; + nlm_write_sys_reg(sysbase, SYS_CORE_DFS_DIS_CTRL, value); + } /* Remove CPU Reset */ value = nlm_read_sys_reg(sysbase, SYS_CPU_RESET); diff --git a/arch/mips/netlogic/xlr/fmn-config.c b/arch/mips/netlogic/xlr/fmn-config.c index ed3bf0e3f309..c7622c6e5f67 100644 --- a/arch/mips/netlogic/xlr/fmn-config.c +++ b/arch/mips/netlogic/xlr/fmn-config.c @@ -36,6 +36,7 @@ #include <linux/irq.h> #include <linux/interrupt.h> +#include <asm/cpu.h> #include <asm/mipsregs.h> #include <asm/netlogic/xlr/fmn.h> #include <asm/netlogic/xlr/xlr.h> @@ -187,7 +188,7 @@ void xlr_board_info_setup(void) int processor_id, num_core; num_core = hweight32(nlm_current_node()->coremask); - processor_id = read_c0_prid() & 0xff00; + processor_id = read_c0_prid() & PRID_IMP_MASK; setup_cpu_fmninfo(cpu, num_core); switch (processor_id) { diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c index 5e5424753b56..4d1736fc1955 100644 --- a/arch/mips/oprofile/common.c +++ b/arch/mips/oprofile/common.c @@ -12,6 +12,7 @@ #include <linux/oprofile.h> #include <linux/smp.h> #include <asm/cpu-info.h> +#include <asm/cpu-type.h> #include "op_impl.h" diff --git a/arch/mips/pci/Makefile b/arch/mips/pci/Makefile index c382042911dd..719e4557e22e 100644 --- a/arch/mips/pci/Makefile +++ b/arch/mips/pci/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_SIBYTE_BCM1x80) += pci-bcm1480.o pci-bcm1480ht.o obj-$(CONFIG_SNI_RM) += fixup-sni.o ops-sni.o obj-$(CONFIG_LANTIQ) += fixup-lantiq.o obj-$(CONFIG_PCI_LANTIQ) += pci-lantiq.o ops-lantiq.o +obj-$(CONFIG_SOC_RT3883) += pci-rt3883.o obj-$(CONFIG_TANBAC_TB0219) += fixup-tb0219.o obj-$(CONFIG_TANBAC_TB0226) += fixup-tb0226.o obj-$(CONFIG_TANBAC_TB0287) += fixup-tb0287.o diff --git a/arch/mips/pci/pci-bcm1480.c b/arch/mips/pci/pci-bcm1480.c index 44dd5aa2e36f..5ec2a7bae02c 100644 --- a/arch/mips/pci/pci-bcm1480.c +++ b/arch/mips/pci/pci-bcm1480.c @@ -39,6 +39,7 @@ #include <linux/mm.h> #include <linux/console.h> #include <linux/tty.h> +#include <linux/vt.h> #include <asm/sibyte/bcm1480_regs.h> #include <asm/sibyte/bcm1480_scd.h> diff --git a/arch/mips/pci/pci-octeon.c b/arch/mips/pci/pci-octeon.c index 95c2ea815cac..59cccd95688b 100644 --- a/arch/mips/pci/pci-octeon.c +++ b/arch/mips/pci/pci-octeon.c @@ -586,15 +586,16 @@ static int __init octeon_pci_setup(void) else octeon_dma_bar_type = OCTEON_DMA_BAR_TYPE_BIG; - /* PCI I/O and PCI MEM values */ - set_io_port_base(OCTEON_PCI_IOSPACE_BASE); - ioport_resource.start = 0; - ioport_resource.end = OCTEON_PCI_IOSPACE_SIZE - 1; if (!octeon_is_pci_host()) { pr_notice("Not in host mode, PCI Controller not initialized\n"); return 0; } + /* PCI I/O and PCI MEM values */ + set_io_port_base(OCTEON_PCI_IOSPACE_BASE); + ioport_resource.start = 0; + ioport_resource.end = OCTEON_PCI_IOSPACE_SIZE - 1; + pr_notice("%s Octeon big bar support\n", (octeon_dma_bar_type == OCTEON_DMA_BAR_TYPE_BIG) ? "Enabling" : "Disabling"); diff --git a/arch/mips/pci/pci-rt3883.c b/arch/mips/pci/pci-rt3883.c new file mode 100644 index 000000000000..95c9d41382e7 --- /dev/null +++ b/arch/mips/pci/pci-rt3883.c @@ -0,0 +1,636 @@ +/* + * Ralink RT3662/RT3883 SoC PCI support + * + * Copyright (C) 2011-2013 Gabor Juhos <juhosg@openwrt.org> + * + * Parts of this file are based on Ralink's 2.6.21 BSP + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/io.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_irq.h> +#include <linux/of_pci.h> +#include <linux/platform_device.h> + +#include <asm/mach-ralink/rt3883.h> +#include <asm/mach-ralink/ralink_regs.h> + +#define RT3883_MEMORY_BASE 0x00000000 +#define RT3883_MEMORY_SIZE 0x02000000 + +#define RT3883_PCI_REG_PCICFG 0x00 +#define RT3883_PCICFG_P2P_BR_DEVNUM_M 0xf +#define RT3883_PCICFG_P2P_BR_DEVNUM_S 16 +#define RT3883_PCICFG_PCIRST BIT(1) +#define RT3883_PCI_REG_PCIRAW 0x04 +#define RT3883_PCI_REG_PCIINT 0x08 +#define RT3883_PCI_REG_PCIENA 0x0c + +#define RT3883_PCI_REG_CFGADDR 0x20 +#define RT3883_PCI_REG_CFGDATA 0x24 +#define RT3883_PCI_REG_MEMBASE 0x28 +#define RT3883_PCI_REG_IOBASE 0x2c +#define RT3883_PCI_REG_ARBCTL 0x80 + +#define RT3883_PCI_REG_BASE(_x) (0x1000 + (_x) * 0x1000) +#define RT3883_PCI_REG_BAR0SETUP(_x) (RT3883_PCI_REG_BASE((_x)) + 0x10) +#define RT3883_PCI_REG_IMBASEBAR0(_x) (RT3883_PCI_REG_BASE((_x)) + 0x18) +#define RT3883_PCI_REG_ID(_x) (RT3883_PCI_REG_BASE((_x)) + 0x30) +#define RT3883_PCI_REG_CLASS(_x) (RT3883_PCI_REG_BASE((_x)) + 0x34) +#define RT3883_PCI_REG_SUBID(_x) (RT3883_PCI_REG_BASE((_x)) + 0x38) +#define RT3883_PCI_REG_STATUS(_x) (RT3883_PCI_REG_BASE((_x)) + 0x50) + +#define RT3883_PCI_MODE_NONE 0 +#define RT3883_PCI_MODE_PCI BIT(0) +#define RT3883_PCI_MODE_PCIE BIT(1) +#define RT3883_PCI_MODE_BOTH (RT3883_PCI_MODE_PCI | RT3883_PCI_MODE_PCIE) + +#define RT3883_PCI_IRQ_COUNT 32 + +#define RT3883_P2P_BR_DEVNUM 1 + +struct rt3883_pci_controller { + void __iomem *base; + spinlock_t lock; + + struct device_node *intc_of_node; + struct irq_domain *irq_domain; + + struct pci_controller pci_controller; + struct resource io_res; + struct resource mem_res; + + bool pcie_ready; +}; + +static inline struct rt3883_pci_controller * +pci_bus_to_rt3883_controller(struct pci_bus *bus) +{ + struct pci_controller *hose; + + hose = (struct pci_controller *) bus->sysdata; + return container_of(hose, struct rt3883_pci_controller, pci_controller); +} + +static inline u32 rt3883_pci_r32(struct rt3883_pci_controller *rpc, + unsigned reg) +{ + return ioread32(rpc->base + reg); +} + +static inline void rt3883_pci_w32(struct rt3883_pci_controller *rpc, + u32 val, unsigned reg) +{ + iowrite32(val, rpc->base + reg); +} + +static inline u32 rt3883_pci_get_cfgaddr(unsigned int bus, unsigned int slot, + unsigned int func, unsigned int where) +{ + return (bus << 16) | (slot << 11) | (func << 8) | (where & 0xfc) | + 0x80000000; +} + +static u32 rt3883_pci_read_cfg32(struct rt3883_pci_controller *rpc, + unsigned bus, unsigned slot, + unsigned func, unsigned reg) +{ + unsigned long flags; + u32 address; + u32 ret; + + address = rt3883_pci_get_cfgaddr(bus, slot, func, reg); + + spin_lock_irqsave(&rpc->lock, flags); + rt3883_pci_w32(rpc, address, RT3883_PCI_REG_CFGADDR); + ret = rt3883_pci_r32(rpc, RT3883_PCI_REG_CFGDATA); + spin_unlock_irqrestore(&rpc->lock, flags); + + return ret; +} + +static void rt3883_pci_write_cfg32(struct rt3883_pci_controller *rpc, + unsigned bus, unsigned slot, + unsigned func, unsigned reg, u32 val) +{ + unsigned long flags; + u32 address; + + address = rt3883_pci_get_cfgaddr(bus, slot, func, reg); + + spin_lock_irqsave(&rpc->lock, flags); + rt3883_pci_w32(rpc, address, RT3883_PCI_REG_CFGADDR); + rt3883_pci_w32(rpc, val, RT3883_PCI_REG_CFGDATA); + spin_unlock_irqrestore(&rpc->lock, flags); +} + +static void rt3883_pci_irq_handler(unsigned int irq, struct irq_desc *desc) +{ + struct rt3883_pci_controller *rpc; + u32 pending; + + rpc = irq_get_handler_data(irq); + + pending = rt3883_pci_r32(rpc, RT3883_PCI_REG_PCIINT) & + rt3883_pci_r32(rpc, RT3883_PCI_REG_PCIENA); + + if (!pending) { + spurious_interrupt(); + return; + } + + while (pending) { + unsigned bit = __ffs(pending); + + irq = irq_find_mapping(rpc->irq_domain, bit); + generic_handle_irq(irq); + + pending &= ~BIT(bit); + } +} + +static void rt3883_pci_irq_unmask(struct irq_data *d) +{ + struct rt3883_pci_controller *rpc; + u32 t; + + rpc = irq_data_get_irq_chip_data(d); + + t = rt3883_pci_r32(rpc, RT3883_PCI_REG_PCIENA); + rt3883_pci_w32(rpc, t | BIT(d->hwirq), RT3883_PCI_REG_PCIENA); + /* flush write */ + rt3883_pci_r32(rpc, RT3883_PCI_REG_PCIENA); +} + +static void rt3883_pci_irq_mask(struct irq_data *d) +{ + struct rt3883_pci_controller *rpc; + u32 t; + + rpc = irq_data_get_irq_chip_data(d); + + t = rt3883_pci_r32(rpc, RT3883_PCI_REG_PCIENA); + rt3883_pci_w32(rpc, t & ~BIT(d->hwirq), RT3883_PCI_REG_PCIENA); + /* flush write */ + rt3883_pci_r32(rpc, RT3883_PCI_REG_PCIENA); +} + +static struct irq_chip rt3883_pci_irq_chip = { + .name = "RT3883 PCI", + .irq_mask = rt3883_pci_irq_mask, + .irq_unmask = rt3883_pci_irq_unmask, + .irq_mask_ack = rt3883_pci_irq_mask, +}; + +static int rt3883_pci_irq_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hw) +{ + irq_set_chip_and_handler(irq, &rt3883_pci_irq_chip, handle_level_irq); + irq_set_chip_data(irq, d->host_data); + + return 0; +} + +static const struct irq_domain_ops rt3883_pci_irq_domain_ops = { + .map = rt3883_pci_irq_map, + .xlate = irq_domain_xlate_onecell, +}; + +static int rt3883_pci_irq_init(struct device *dev, + struct rt3883_pci_controller *rpc) +{ + int irq; + + irq = irq_of_parse_and_map(rpc->intc_of_node, 0); + if (irq == 0) { + dev_err(dev, "%s has no IRQ", + of_node_full_name(rpc->intc_of_node)); + return -EINVAL; + } + + /* disable all interrupts */ + rt3883_pci_w32(rpc, 0, RT3883_PCI_REG_PCIENA); + + rpc->irq_domain = + irq_domain_add_linear(rpc->intc_of_node, RT3883_PCI_IRQ_COUNT, + &rt3883_pci_irq_domain_ops, + rpc); + if (!rpc->irq_domain) { + dev_err(dev, "unable to add IRQ domain\n"); + return -ENODEV; + } + + irq_set_handler_data(irq, rpc); + irq_set_chained_handler(irq, rt3883_pci_irq_handler); + + return 0; +} + +static int rt3883_pci_config_read(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 *val) +{ + struct rt3883_pci_controller *rpc; + unsigned long flags; + u32 address; + u32 data; + + rpc = pci_bus_to_rt3883_controller(bus); + + if (!rpc->pcie_ready && bus->number == 1) + return PCIBIOS_DEVICE_NOT_FOUND; + + address = rt3883_pci_get_cfgaddr(bus->number, PCI_SLOT(devfn), + PCI_FUNC(devfn), where); + + spin_lock_irqsave(&rpc->lock, flags); + rt3883_pci_w32(rpc, address, RT3883_PCI_REG_CFGADDR); + data = rt3883_pci_r32(rpc, RT3883_PCI_REG_CFGDATA); + spin_unlock_irqrestore(&rpc->lock, flags); + + switch (size) { + case 1: + *val = (data >> ((where & 3) << 3)) & 0xff; + break; + case 2: + *val = (data >> ((where & 3) << 3)) & 0xffff; + break; + case 4: + *val = data; + break; + } + + return PCIBIOS_SUCCESSFUL; +} + +static int rt3883_pci_config_write(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 val) +{ + struct rt3883_pci_controller *rpc; + unsigned long flags; + u32 address; + u32 data; + + rpc = pci_bus_to_rt3883_controller(bus); + + if (!rpc->pcie_ready && bus->number == 1) + return PCIBIOS_DEVICE_NOT_FOUND; + + address = rt3883_pci_get_cfgaddr(bus->number, PCI_SLOT(devfn), + PCI_FUNC(devfn), where); + + spin_lock_irqsave(&rpc->lock, flags); + rt3883_pci_w32(rpc, address, RT3883_PCI_REG_CFGADDR); + data = rt3883_pci_r32(rpc, RT3883_PCI_REG_CFGDATA); + + switch (size) { + case 1: + data = (data & ~(0xff << ((where & 3) << 3))) | + (val << ((where & 3) << 3)); + break; + case 2: + data = (data & ~(0xffff << ((where & 3) << 3))) | + (val << ((where & 3) << 3)); + break; + case 4: + data = val; + break; + } + + rt3883_pci_w32(rpc, data, RT3883_PCI_REG_CFGDATA); + spin_unlock_irqrestore(&rpc->lock, flags); + + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops rt3883_pci_ops = { + .read = rt3883_pci_config_read, + .write = rt3883_pci_config_write, +}; + +static void rt3883_pci_preinit(struct rt3883_pci_controller *rpc, unsigned mode) +{ + u32 syscfg1; + u32 rstctrl; + u32 clkcfg1; + u32 t; + + rstctrl = rt_sysc_r32(RT3883_SYSC_REG_RSTCTRL); + syscfg1 = rt_sysc_r32(RT3883_SYSC_REG_SYSCFG1); + clkcfg1 = rt_sysc_r32(RT3883_SYSC_REG_CLKCFG1); + + if (mode & RT3883_PCI_MODE_PCIE) { + rstctrl |= RT3883_RSTCTRL_PCIE; + rt_sysc_w32(rstctrl, RT3883_SYSC_REG_RSTCTRL); + + /* setup PCI PAD drive mode */ + syscfg1 &= ~(0x30); + syscfg1 |= (2 << 4); + rt_sysc_w32(syscfg1, RT3883_SYSC_REG_SYSCFG1); + + t = rt_sysc_r32(RT3883_SYSC_REG_PCIE_CLK_GEN0); + t &= ~BIT(31); + rt_sysc_w32(t, RT3883_SYSC_REG_PCIE_CLK_GEN0); + + t = rt_sysc_r32(RT3883_SYSC_REG_PCIE_CLK_GEN1); + t &= 0x80ffffff; + rt_sysc_w32(t, RT3883_SYSC_REG_PCIE_CLK_GEN1); + + t = rt_sysc_r32(RT3883_SYSC_REG_PCIE_CLK_GEN1); + t |= 0xa << 24; + rt_sysc_w32(t, RT3883_SYSC_REG_PCIE_CLK_GEN1); + + t = rt_sysc_r32(RT3883_SYSC_REG_PCIE_CLK_GEN0); + t |= BIT(31); + rt_sysc_w32(t, RT3883_SYSC_REG_PCIE_CLK_GEN0); + + msleep(50); + + rstctrl &= ~RT3883_RSTCTRL_PCIE; + rt_sysc_w32(rstctrl, RT3883_SYSC_REG_RSTCTRL); + } + + syscfg1 |= (RT3883_SYSCFG1_PCIE_RC_MODE | RT3883_SYSCFG1_PCI_HOST_MODE); + + clkcfg1 &= ~(RT3883_CLKCFG1_PCI_CLK_EN | RT3883_CLKCFG1_PCIE_CLK_EN); + + if (mode & RT3883_PCI_MODE_PCI) { + clkcfg1 |= RT3883_CLKCFG1_PCI_CLK_EN; + rstctrl &= ~RT3883_RSTCTRL_PCI; + } + + if (mode & RT3883_PCI_MODE_PCIE) { + clkcfg1 |= RT3883_CLKCFG1_PCIE_CLK_EN; + rstctrl &= ~RT3883_RSTCTRL_PCIE; + } + + rt_sysc_w32(syscfg1, RT3883_SYSC_REG_SYSCFG1); + rt_sysc_w32(rstctrl, RT3883_SYSC_REG_RSTCTRL); + rt_sysc_w32(clkcfg1, RT3883_SYSC_REG_CLKCFG1); + + msleep(500); + + /* + * setup the device number of the P2P bridge + * and de-assert the reset line + */ + t = (RT3883_P2P_BR_DEVNUM << RT3883_PCICFG_P2P_BR_DEVNUM_S); + rt3883_pci_w32(rpc, t, RT3883_PCI_REG_PCICFG); + + /* flush write */ + rt3883_pci_r32(rpc, RT3883_PCI_REG_PCICFG); + msleep(500); + + if (mode & RT3883_PCI_MODE_PCIE) { + msleep(500); + + t = rt3883_pci_r32(rpc, RT3883_PCI_REG_STATUS(1)); + + rpc->pcie_ready = t & BIT(0); + + if (!rpc->pcie_ready) { + /* reset the PCIe block */ + t = rt_sysc_r32(RT3883_SYSC_REG_RSTCTRL); + t |= RT3883_RSTCTRL_PCIE; + rt_sysc_w32(t, RT3883_SYSC_REG_RSTCTRL); + t &= ~RT3883_RSTCTRL_PCIE; + rt_sysc_w32(t, RT3883_SYSC_REG_RSTCTRL); + + /* turn off PCIe clock */ + t = rt_sysc_r32(RT3883_SYSC_REG_CLKCFG1); + t &= ~RT3883_CLKCFG1_PCIE_CLK_EN; + rt_sysc_w32(t, RT3883_SYSC_REG_CLKCFG1); + + t = rt_sysc_r32(RT3883_SYSC_REG_PCIE_CLK_GEN0); + t &= ~0xf000c080; + rt_sysc_w32(t, RT3883_SYSC_REG_PCIE_CLK_GEN0); + } + } + + /* enable PCI arbiter */ + rt3883_pci_w32(rpc, 0x79, RT3883_PCI_REG_ARBCTL); +} + +static int rt3883_pci_probe(struct platform_device *pdev) +{ + struct rt3883_pci_controller *rpc; + struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; + struct resource *res; + struct device_node *child; + u32 val; + int err; + int mode; + + rpc = devm_kzalloc(dev, sizeof(*rpc), GFP_KERNEL); + if (!rpc) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -EINVAL; + + rpc->base = devm_ioremap_resource(dev, res); + if (IS_ERR(rpc->base)) + return PTR_ERR(rpc->base); + + /* find the interrupt controller child node */ + for_each_child_of_node(np, child) { + if (of_get_property(child, "interrupt-controller", NULL) && + of_node_get(child)) { + rpc->intc_of_node = child; + break; + } + } + + if (!rpc->intc_of_node) { + dev_err(dev, "%s has no %s child node", + of_node_full_name(rpc->intc_of_node), + "interrupt controller"); + return -EINVAL; + } + + /* find the PCI host bridge child node */ + for_each_child_of_node(np, child) { + if (child->type && + of_node_cmp(child->type, "pci") == 0 && + of_node_get(child)) { + rpc->pci_controller.of_node = child; + break; + } + } + + if (!rpc->pci_controller.of_node) { + dev_err(dev, "%s has no %s child node", + of_node_full_name(rpc->intc_of_node), + "PCI host bridge"); + err = -EINVAL; + goto err_put_intc_node; + } + + mode = RT3883_PCI_MODE_NONE; + for_each_available_child_of_node(rpc->pci_controller.of_node, child) { + int devfn; + + if (!child->type || + of_node_cmp(child->type, "pci") != 0) + continue; + + devfn = of_pci_get_devfn(child); + if (devfn < 0) + continue; + + switch (PCI_SLOT(devfn)) { + case 1: + mode |= RT3883_PCI_MODE_PCIE; + break; + + case 17: + case 18: + mode |= RT3883_PCI_MODE_PCI; + break; + } + } + + if (mode == RT3883_PCI_MODE_NONE) { + dev_err(dev, "unable to determine PCI mode\n"); + err = -EINVAL; + goto err_put_hb_node; + } + + dev_info(dev, "mode:%s%s\n", + (mode & RT3883_PCI_MODE_PCI) ? " PCI" : "", + (mode & RT3883_PCI_MODE_PCIE) ? " PCIe" : ""); + + rt3883_pci_preinit(rpc, mode); + + rpc->pci_controller.pci_ops = &rt3883_pci_ops; + rpc->pci_controller.io_resource = &rpc->io_res; + rpc->pci_controller.mem_resource = &rpc->mem_res; + + /* Load PCI I/O and memory resources from DT */ + pci_load_of_ranges(&rpc->pci_controller, + rpc->pci_controller.of_node); + + rt3883_pci_w32(rpc, rpc->mem_res.start, RT3883_PCI_REG_MEMBASE); + rt3883_pci_w32(rpc, rpc->io_res.start, RT3883_PCI_REG_IOBASE); + + ioport_resource.start = rpc->io_res.start; + ioport_resource.end = rpc->io_res.end; + + /* PCI */ + rt3883_pci_w32(rpc, 0x03ff0000, RT3883_PCI_REG_BAR0SETUP(0)); + rt3883_pci_w32(rpc, RT3883_MEMORY_BASE, RT3883_PCI_REG_IMBASEBAR0(0)); + rt3883_pci_w32(rpc, 0x08021814, RT3883_PCI_REG_ID(0)); + rt3883_pci_w32(rpc, 0x00800001, RT3883_PCI_REG_CLASS(0)); + rt3883_pci_w32(rpc, 0x28801814, RT3883_PCI_REG_SUBID(0)); + + /* PCIe */ + rt3883_pci_w32(rpc, 0x03ff0000, RT3883_PCI_REG_BAR0SETUP(1)); + rt3883_pci_w32(rpc, RT3883_MEMORY_BASE, RT3883_PCI_REG_IMBASEBAR0(1)); + rt3883_pci_w32(rpc, 0x08021814, RT3883_PCI_REG_ID(1)); + rt3883_pci_w32(rpc, 0x06040001, RT3883_PCI_REG_CLASS(1)); + rt3883_pci_w32(rpc, 0x28801814, RT3883_PCI_REG_SUBID(1)); + + err = rt3883_pci_irq_init(dev, rpc); + if (err) + goto err_put_hb_node; + + /* PCIe */ + val = rt3883_pci_read_cfg32(rpc, 0, 0x01, 0, PCI_COMMAND); + val |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER; + rt3883_pci_write_cfg32(rpc, 0, 0x01, 0, PCI_COMMAND, val); + + /* PCI */ + val = rt3883_pci_read_cfg32(rpc, 0, 0x00, 0, PCI_COMMAND); + val |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER; + rt3883_pci_write_cfg32(rpc, 0, 0x00, 0, PCI_COMMAND, val); + + if (mode == RT3883_PCI_MODE_PCIE) { + rt3883_pci_w32(rpc, 0x03ff0001, RT3883_PCI_REG_BAR0SETUP(0)); + rt3883_pci_w32(rpc, 0x03ff0001, RT3883_PCI_REG_BAR0SETUP(1)); + + rt3883_pci_write_cfg32(rpc, 0, RT3883_P2P_BR_DEVNUM, 0, + PCI_BASE_ADDRESS_0, + RT3883_MEMORY_BASE); + /* flush write */ + rt3883_pci_read_cfg32(rpc, 0, RT3883_P2P_BR_DEVNUM, 0, + PCI_BASE_ADDRESS_0); + } else { + rt3883_pci_write_cfg32(rpc, 0, RT3883_P2P_BR_DEVNUM, 0, + PCI_IO_BASE, 0x00000101); + } + + register_pci_controller(&rpc->pci_controller); + + return 0; + +err_put_hb_node: + of_node_put(rpc->pci_controller.of_node); +err_put_intc_node: + of_node_put(rpc->intc_of_node); + return err; +} + +int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + struct of_irq dev_irq; + int err; + int irq; + + err = of_irq_map_pci(dev, &dev_irq); + if (err) { + pr_err("pci %s: unable to get irq map, err=%d\n", + pci_name((struct pci_dev *) dev), err); + return 0; + } + + irq = irq_create_of_mapping(dev_irq.controller, + dev_irq.specifier, + dev_irq.size); + + if (irq == 0) + pr_crit("pci %s: no irq found for pin %u\n", + pci_name((struct pci_dev *) dev), pin); + else + pr_info("pci %s: using irq %d for pin %u\n", + pci_name((struct pci_dev *) dev), irq, pin); + + return irq; +} + +int pcibios_plat_dev_init(struct pci_dev *dev) +{ + return 0; +} + +static const struct of_device_id rt3883_pci_ids[] = { + { .compatible = "ralink,rt3883-pci" }, + {}, +}; +MODULE_DEVICE_TABLE(of, rt3883_pci_ids); + +static struct platform_driver rt3883_pci_driver = { + .probe = rt3883_pci_probe, + .driver = { + .name = "rt3883-pci", + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(rt3883_pci_ids), + }, +}; + +static int __init rt3883_pci_init(void) +{ + return platform_driver_register(&rt3883_pci_driver); +} + +postcore_initcall(rt3883_pci_init); diff --git a/arch/mips/powertv/Kconfig b/arch/mips/powertv/Kconfig index 1a1b03ea6398..dd91fbacbcba 100644 --- a/arch/mips/powertv/Kconfig +++ b/arch/mips/powertv/Kconfig @@ -1,14 +1,7 @@ -config BOOTLOADER_DRIVER - bool "PowerTV Bootloader Driver Support" - default n - depends on POWERTV - help - Use this option if you want to load bootloader driver. - config BOOTLOADER_FAMILY string "POWERTV Bootloader Family string" default "85" - depends on POWERTV && !BOOTLOADER_DRIVER + depends on POWERTV help This value should be specified when the bootloader driver is disabled and must be exactly two characters long. Families supported are: diff --git a/arch/mips/powertv/asic/asic_devices.c b/arch/mips/powertv/asic/asic_devices.c index 0238af1ba503..8380605d597d 100644 --- a/arch/mips/powertv/asic/asic_devices.c +++ b/arch/mips/powertv/asic/asic_devices.c @@ -147,20 +147,10 @@ static __init noinline void platform_set_family(void) if (check_forcefamily(forced_family) == 0) bootldr_family = BOOTLDRFAMILY(forced_family[0], forced_family[1]); - else { - -#ifdef CONFIG_BOOTLOADER_DRIVER - bootldr_family = (unsigned short) kbldr_GetSWFamily(); -#else -#if defined(CONFIG_BOOTLOADER_FAMILY) + else bootldr_family = (unsigned short) BOOTLDRFAMILY( CONFIG_BOOTLOADER_FAMILY[0], CONFIG_BOOTLOADER_FAMILY[1]); -#else -#error "Unknown Bootloader Family" -#endif -#endif - } pr_info("Bootloader Family = 0x%04X\n", bootldr_family); diff --git a/arch/mips/powertv/init.c b/arch/mips/powertv/init.c index a01baff52cae..498926377e51 100644 --- a/arch/mips/powertv/init.c +++ b/arch/mips/powertv/init.c @@ -87,8 +87,4 @@ void __init prom_init(void) configure_platform(); prom_meminit(); - -#ifndef CONFIG_BOOTLOADER_DRIVER - pr_info("\nBootloader driver isn't loaded...\n"); -#endif } diff --git a/arch/mips/powertv/reset.c b/arch/mips/powertv/reset.c index 0007652cb774..11c32fbf2784 100644 --- a/arch/mips/powertv/reset.c +++ b/arch/mips/powertv/reset.c @@ -21,24 +21,12 @@ #include <linux/io.h> #include <asm/reboot.h> /* Not included by linux/reboot.h */ -#ifdef CONFIG_BOOTLOADER_DRIVER -#include <asm/mach-powertv/kbldr.h> -#endif - #include <asm/mach-powertv/asic_regs.h> #include "reset.h" static void mips_machine_restart(char *command) { -#ifdef CONFIG_BOOTLOADER_DRIVER - /* - * Call the bootloader's reset function to ensure - * that persistent data is flushed before hard reset - */ - kbldr_SetCauseAndReset(); -#else writel(0x1, asic_reg_addr(watchdog)); -#endif } void mips_reboot_setup(void) diff --git a/arch/mips/ralink/Kconfig b/arch/mips/ralink/Kconfig index 026e823d871d..424f03496d14 100644 --- a/arch/mips/ralink/Kconfig +++ b/arch/mips/ralink/Kconfig @@ -1,5 +1,12 @@ if RALINK +config CLKEVT_RT3352 + bool + depends on SOC_RT305X || SOC_MT7620 + default y + select CLKSRC_OF + select CLKSRC_MMIO + choice prompt "Ralink SoC selection" default SOC_RT305X @@ -19,9 +26,12 @@ choice bool "RT3883" select USB_ARCH_HAS_OHCI select USB_ARCH_HAS_EHCI + select HW_HAS_PCI config SOC_MT7620 bool "MT7620" + select USB_ARCH_HAS_OHCI + select USB_ARCH_HAS_EHCI endchoice diff --git a/arch/mips/ralink/Makefile b/arch/mips/ralink/Makefile index 38cf1a880aaa..98ae349827be 100644 --- a/arch/mips/ralink/Makefile +++ b/arch/mips/ralink/Makefile @@ -6,7 +6,9 @@ # Copyright (C) 2009-2011 Gabor Juhos <juhosg@openwrt.org> # Copyright (C) 2013 John Crispin <blogic@openwrt.org> -obj-y := prom.o of.o reset.o clk.o irq.o +obj-y := prom.o of.o reset.o clk.o irq.o timer.o + +obj-$(CONFIG_CLKEVT_RT3352) += cevt-rt3352.o obj-$(CONFIG_SOC_RT288X) += rt288x.o obj-$(CONFIG_SOC_RT305X) += rt305x.o diff --git a/arch/mips/ralink/Platform b/arch/mips/ralink/Platform index cda4b6645c50..6d9c8c499f98 100644 --- a/arch/mips/ralink/Platform +++ b/arch/mips/ralink/Platform @@ -26,3 +26,4 @@ cflags-$(CONFIG_SOC_RT3883) += -I$(srctree)/arch/mips/include/asm/mach-ralink/rt # Ralink MT7620 # load-$(CONFIG_SOC_MT7620) += 0xffffffff80000000 +cflags-$(CONFIG_SOC_MT7620) += -I$(srctree)/arch/mips/include/asm/mach-ralink/mt7620 diff --git a/arch/mips/ralink/cevt-rt3352.c b/arch/mips/ralink/cevt-rt3352.c new file mode 100644 index 000000000000..cc17566d1934 --- /dev/null +++ b/arch/mips/ralink/cevt-rt3352.c @@ -0,0 +1,145 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2013 by John Crispin <blogic@openwrt.org> + */ + +#include <linux/clockchips.h> +#include <linux/clocksource.h> +#include <linux/interrupt.h> +#include <linux/reset.h> +#include <linux/init.h> +#include <linux/time.h> +#include <linux/of.h> +#include <linux/of_irq.h> +#include <linux/of_address.h> + +#include <asm/mach-ralink/ralink_regs.h> + +#define SYSTICK_FREQ (50 * 1000) + +#define SYSTICK_CONFIG 0x00 +#define SYSTICK_COMPARE 0x04 +#define SYSTICK_COUNT 0x08 + +/* route systick irq to mips irq 7 instead of the r4k-timer */ +#define CFG_EXT_STK_EN 0x2 +/* enable the counter */ +#define CFG_CNT_EN 0x1 + +struct systick_device { + void __iomem *membase; + struct clock_event_device dev; + int irq_requested; + int freq_scale; +}; + +static void systick_set_clock_mode(enum clock_event_mode mode, + struct clock_event_device *evt); + +static int systick_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + struct systick_device *sdev; + u32 count; + + sdev = container_of(evt, struct systick_device, dev); + count = ioread32(sdev->membase + SYSTICK_COUNT); + count = (count + delta) % SYSTICK_FREQ; + iowrite32(count + delta, sdev->membase + SYSTICK_COMPARE); + + return 0; +} + +static void systick_event_handler(struct clock_event_device *dev) +{ + /* noting to do here */ +} + +static irqreturn_t systick_interrupt(int irq, void *dev_id) +{ + struct clock_event_device *dev = (struct clock_event_device *) dev_id; + + dev->event_handler(dev); + + return IRQ_HANDLED; +} + +static struct systick_device systick = { + .dev = { + /* + * cevt-r4k uses 300, make sure systick + * gets used if available + */ + .rating = 310, + .features = CLOCK_EVT_FEAT_ONESHOT, + .set_next_event = systick_next_event, + .set_mode = systick_set_clock_mode, + .event_handler = systick_event_handler, + }, +}; + +static struct irqaction systick_irqaction = { + .handler = systick_interrupt, + .flags = IRQF_PERCPU | IRQF_TIMER, + .dev_id = &systick.dev, +}; + +static void systick_set_clock_mode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + struct systick_device *sdev; + + sdev = container_of(evt, struct systick_device, dev); + + switch (mode) { + case CLOCK_EVT_MODE_ONESHOT: + if (!sdev->irq_requested) + setup_irq(systick.dev.irq, &systick_irqaction); + sdev->irq_requested = 1; + iowrite32(CFG_EXT_STK_EN | CFG_CNT_EN, + systick.membase + SYSTICK_CONFIG); + break; + + case CLOCK_EVT_MODE_SHUTDOWN: + if (sdev->irq_requested) + free_irq(systick.dev.irq, &systick_irqaction); + sdev->irq_requested = 0; + iowrite32(0, systick.membase + SYSTICK_CONFIG); + break; + + default: + pr_err("%s: Unhandeled mips clock_mode\n", systick.dev.name); + break; + } +} + +static void __init ralink_systick_init(struct device_node *np) +{ + systick.membase = of_iomap(np, 0); + if (!systick.membase) + return; + + systick_irqaction.name = np->name; + systick.dev.name = np->name; + clockevents_calc_mult_shift(&systick.dev, SYSTICK_FREQ, 60); + systick.dev.max_delta_ns = clockevent_delta2ns(0x7fff, &systick.dev); + systick.dev.min_delta_ns = clockevent_delta2ns(0x3, &systick.dev); + systick.dev.irq = irq_of_parse_and_map(np, 0); + if (!systick.dev.irq) { + pr_err("%s: request_irq failed", np->name); + return; + } + + clocksource_mmio_init(systick.membase + SYSTICK_COUNT, np->name, + SYSTICK_FREQ, 301, 16, clocksource_mmio_readl_up); + + clockevents_register_device(&systick.dev); + + pr_info("%s: runing - mult: %d, shift: %d\n", + np->name, systick.dev.mult, systick.dev.shift); +} + +CLOCKSOURCE_OF_DECLARE(systick, "ralink,cevt-systick", ralink_systick_init); diff --git a/arch/mips/ralink/clk.c b/arch/mips/ralink/clk.c index 8dfa22ff300b..bba0cdfd83bc 100644 --- a/arch/mips/ralink/clk.c +++ b/arch/mips/ralink/clk.c @@ -69,4 +69,5 @@ void __init plat_time_init(void) pr_info("CPU Clock: %ldMHz\n", clk_get_rate(clk) / 1000000); mips_hpt_frequency = clk_get_rate(clk) / 2; clk_put(clk); + clocksource_of_init(); } diff --git a/arch/mips/ralink/common.h b/arch/mips/ralink/common.h index 83144c3fc5ac..42dfd6100a2d 100644 --- a/arch/mips/ralink/common.h +++ b/arch/mips/ralink/common.h @@ -46,6 +46,8 @@ extern void ralink_of_remap(void); extern void ralink_clk_init(void); extern void ralink_clk_add(const char *dev, unsigned long rate); +extern void ralink_rst_init(void); + extern void prom_soc_init(struct ralink_soc_info *soc_info); __iomem void *plat_of_remap_node(const char *node); diff --git a/arch/mips/ralink/mt7620.c b/arch/mips/ralink/mt7620.c index 0018b1a661f6..d217509e5300 100644 --- a/arch/mips/ralink/mt7620.c +++ b/arch/mips/ralink/mt7620.c @@ -23,9 +23,6 @@ /* does the board have sdram or ddram */ static int dram_type; -/* the pll dividers */ -static u32 mt7620_clk_divider[] = { 2, 3, 4, 8 }; - static struct ralink_pinmux_grp mode_mux[] = { { .name = "i2c", @@ -140,34 +137,189 @@ struct ralink_pinmux rt_gpio_pinmux = { .uart_mask = MT7620_GPIO_MODE_UART0_MASK, }; -void __init ralink_clk_init(void) +static __init u32 +mt7620_calc_rate(u32 ref_rate, u32 mul, u32 div) { - unsigned long cpu_rate, sys_rate; - u32 c0 = rt_sysc_r32(SYSC_REG_CPLL_CONFIG0); - u32 c1 = rt_sysc_r32(SYSC_REG_CPLL_CONFIG1); - u32 swconfig = (c0 >> CPLL_SW_CONFIG_SHIFT) & CPLL_SW_CONFIG_MASK; - u32 cpu_clk = (c1 >> CPLL_CPU_CLK_SHIFT) & CPLL_CPU_CLK_MASK; - - if (cpu_clk) { - cpu_rate = 480000000; - } else if (!swconfig) { - cpu_rate = 600000000; - } else { - u32 m = (c0 >> CPLL_MULT_RATIO_SHIFT) & CPLL_MULT_RATIO; - u32 d = (c0 >> CPLL_DIV_RATIO_SHIFT) & CPLL_DIV_RATIO; + u64 t; - cpu_rate = ((40 * (m + 24)) / mt7620_clk_divider[d]) * 1000000; - } + t = ref_rate; + t *= mul; + do_div(t, div); + + return t; +} + +#define MHZ(x) ((x) * 1000 * 1000) + +static __init unsigned long +mt7620_get_xtal_rate(void) +{ + u32 reg; + + reg = rt_sysc_r32(SYSC_REG_SYSTEM_CONFIG0); + if (reg & SYSCFG0_XTAL_FREQ_SEL) + return MHZ(40); + + return MHZ(20); +} + +static __init unsigned long +mt7620_get_periph_rate(unsigned long xtal_rate) +{ + u32 reg; + + reg = rt_sysc_r32(SYSC_REG_CLKCFG0); + if (reg & CLKCFG0_PERI_CLK_SEL) + return xtal_rate; + + return MHZ(40); +} + +static const u32 mt7620_clk_divider[] __initconst = { 2, 3, 4, 8 }; + +static __init unsigned long +mt7620_get_cpu_pll_rate(unsigned long xtal_rate) +{ + u32 reg; + u32 mul; + u32 div; + + reg = rt_sysc_r32(SYSC_REG_CPLL_CONFIG0); + if (reg & CPLL_CFG0_BYPASS_REF_CLK) + return xtal_rate; + + if ((reg & CPLL_CFG0_SW_CFG) == 0) + return MHZ(600); + + mul = (reg >> CPLL_CFG0_PLL_MULT_RATIO_SHIFT) & + CPLL_CFG0_PLL_MULT_RATIO_MASK; + mul += 24; + if (reg & CPLL_CFG0_LC_CURFCK) + mul *= 2; + + div = (reg >> CPLL_CFG0_PLL_DIV_RATIO_SHIFT) & + CPLL_CFG0_PLL_DIV_RATIO_MASK; + + WARN_ON(div >= ARRAY_SIZE(mt7620_clk_divider)); + + return mt7620_calc_rate(xtal_rate, mul, mt7620_clk_divider[div]); +} + +static __init unsigned long +mt7620_get_pll_rate(unsigned long xtal_rate, unsigned long cpu_pll_rate) +{ + u32 reg; + + reg = rt_sysc_r32(SYSC_REG_CPLL_CONFIG1); + if (reg & CPLL_CFG1_CPU_AUX1) + return xtal_rate; + + if (reg & CPLL_CFG1_CPU_AUX0) + return MHZ(480); + return cpu_pll_rate; +} + +static __init unsigned long +mt7620_get_cpu_rate(unsigned long pll_rate) +{ + u32 reg; + u32 mul; + u32 div; + + reg = rt_sysc_r32(SYSC_REG_CPU_SYS_CLKCFG); + + mul = reg & CPU_SYS_CLKCFG_CPU_FFRAC_MASK; + div = (reg >> CPU_SYS_CLKCFG_CPU_FDIV_SHIFT) & + CPU_SYS_CLKCFG_CPU_FDIV_MASK; + + return mt7620_calc_rate(pll_rate, mul, div); +} + +static const u32 mt7620_ocp_dividers[16] __initconst = { + [CPU_SYS_CLKCFG_OCP_RATIO_2] = 2, + [CPU_SYS_CLKCFG_OCP_RATIO_3] = 3, + [CPU_SYS_CLKCFG_OCP_RATIO_4] = 4, + [CPU_SYS_CLKCFG_OCP_RATIO_5] = 5, + [CPU_SYS_CLKCFG_OCP_RATIO_10] = 10, +}; + +static __init unsigned long +mt7620_get_dram_rate(unsigned long pll_rate) +{ if (dram_type == SYSCFG0_DRAM_TYPE_SDRAM) - sys_rate = cpu_rate / 4; - else - sys_rate = cpu_rate / 3; + return pll_rate / 4; + + return pll_rate / 3; +} + +static __init unsigned long +mt7620_get_sys_rate(unsigned long cpu_rate) +{ + u32 reg; + u32 ocp_ratio; + u32 div; + + reg = rt_sysc_r32(SYSC_REG_CPU_SYS_CLKCFG); + + ocp_ratio = (reg >> CPU_SYS_CLKCFG_OCP_RATIO_SHIFT) & + CPU_SYS_CLKCFG_OCP_RATIO_MASK; + + if (WARN_ON(ocp_ratio >= ARRAY_SIZE(mt7620_ocp_dividers))) + return cpu_rate; + + div = mt7620_ocp_dividers[ocp_ratio]; + if (WARN(!div, "invalid divider for OCP ratio %u", ocp_ratio)) + return cpu_rate; + + return cpu_rate / div; +} + +void __init ralink_clk_init(void) +{ + unsigned long xtal_rate; + unsigned long cpu_pll_rate; + unsigned long pll_rate; + unsigned long cpu_rate; + unsigned long sys_rate; + unsigned long dram_rate; + unsigned long periph_rate; + + xtal_rate = mt7620_get_xtal_rate(); + + cpu_pll_rate = mt7620_get_cpu_pll_rate(xtal_rate); + pll_rate = mt7620_get_pll_rate(xtal_rate, cpu_pll_rate); + + cpu_rate = mt7620_get_cpu_rate(pll_rate); + dram_rate = mt7620_get_dram_rate(pll_rate); + sys_rate = mt7620_get_sys_rate(cpu_rate); + periph_rate = mt7620_get_periph_rate(xtal_rate); + +#define RFMT(label) label ":%lu.%03luMHz " +#define RINT(x) ((x) / 1000000) +#define RFRAC(x) (((x) / 1000) % 1000) + + pr_debug(RFMT("XTAL") RFMT("CPU_PLL") RFMT("PLL"), + RINT(xtal_rate), RFRAC(xtal_rate), + RINT(cpu_pll_rate), RFRAC(cpu_pll_rate), + RINT(pll_rate), RFRAC(pll_rate)); + + pr_debug(RFMT("CPU") RFMT("DRAM") RFMT("SYS") RFMT("PERIPH"), + RINT(cpu_rate), RFRAC(cpu_rate), + RINT(dram_rate), RFRAC(dram_rate), + RINT(sys_rate), RFRAC(sys_rate), + RINT(periph_rate), RFRAC(periph_rate)); + +#undef RFRAC +#undef RINT +#undef RFMT ralink_clk_add("cpu", cpu_rate); - ralink_clk_add("10000100.timer", 40000000); - ralink_clk_add("10000500.uart", 40000000); - ralink_clk_add("10000c00.uartlite", 40000000); + ralink_clk_add("10000100.timer", periph_rate); + ralink_clk_add("10000120.watchdog", periph_rate); + ralink_clk_add("10000500.uart", periph_rate); + ralink_clk_add("10000b00.spi", sys_rate); + ralink_clk_add("10000c00.uartlite", periph_rate); } void __init ralink_of_remap(void) @@ -214,16 +366,19 @@ void prom_soc_init(struct ralink_soc_info *soc_info) switch (dram_type) { case SYSCFG0_DRAM_TYPE_SDRAM: + pr_info("Board has SDRAM\n"); soc_info->mem_size_min = MT7620_SDRAM_SIZE_MIN; soc_info->mem_size_max = MT7620_SDRAM_SIZE_MAX; break; case SYSCFG0_DRAM_TYPE_DDR1: + pr_info("Board has DDR1\n"); soc_info->mem_size_min = MT7620_DDR1_SIZE_MIN; soc_info->mem_size_max = MT7620_DDR1_SIZE_MAX; break; case SYSCFG0_DRAM_TYPE_DDR2: + pr_info("Board has DDR2\n"); soc_info->mem_size_min = MT7620_DDR2_SIZE_MIN; soc_info->mem_size_max = MT7620_DDR2_SIZE_MAX; break; diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c index f25ea5b45051..ce38d11f9da5 100644 --- a/arch/mips/ralink/of.c +++ b/arch/mips/ralink/of.c @@ -110,6 +110,9 @@ static int __init plat_of_setup(void) if (of_platform_populate(NULL, of_ids, NULL, NULL)) panic("failed to populate DT\n"); + /* make sure ithat the reset controller is setup early */ + ralink_rst_init(); + return 0; } diff --git a/arch/mips/ralink/reset.c b/arch/mips/ralink/reset.c index 22120e512e7e..55c7ec59df3c 100644 --- a/arch/mips/ralink/reset.c +++ b/arch/mips/ralink/reset.c @@ -10,6 +10,8 @@ #include <linux/pm.h> #include <linux/io.h> +#include <linux/of.h> +#include <linux/reset-controller.h> #include <asm/reboot.h> @@ -19,6 +21,66 @@ #define SYSC_REG_RESET_CTRL 0x034 #define RSTCTL_RESET_SYSTEM BIT(0) +static int ralink_assert_device(struct reset_controller_dev *rcdev, + unsigned long id) +{ + u32 val; + + if (id < 8) + return -1; + + val = rt_sysc_r32(SYSC_REG_RESET_CTRL); + val |= BIT(id); + rt_sysc_w32(val, SYSC_REG_RESET_CTRL); + + return 0; +} + +static int ralink_deassert_device(struct reset_controller_dev *rcdev, + unsigned long id) +{ + u32 val; + + if (id < 8) + return -1; + + val = rt_sysc_r32(SYSC_REG_RESET_CTRL); + val &= ~BIT(id); + rt_sysc_w32(val, SYSC_REG_RESET_CTRL); + + return 0; +} + +static int ralink_reset_device(struct reset_controller_dev *rcdev, + unsigned long id) +{ + ralink_assert_device(rcdev, id); + return ralink_deassert_device(rcdev, id); +} + +static struct reset_control_ops reset_ops = { + .reset = ralink_reset_device, + .assert = ralink_assert_device, + .deassert = ralink_deassert_device, +}; + +static struct reset_controller_dev reset_dev = { + .ops = &reset_ops, + .owner = THIS_MODULE, + .nr_resets = 32, + .of_reset_n_cells = 1, +}; + +void ralink_rst_init(void) +{ + reset_dev.of_node = of_find_compatible_node(NULL, NULL, + "ralink,rt2880-reset"); + if (!reset_dev.of_node) + pr_err("Failed to find reset controller node"); + else + reset_controller_register(&reset_dev); +} + static void ralink_restart(char *command) { local_irq_disable(); diff --git a/arch/mips/ralink/timer.c b/arch/mips/ralink/timer.c new file mode 100644 index 000000000000..e49241a2c39a --- /dev/null +++ b/arch/mips/ralink/timer.c @@ -0,0 +1,185 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2013 John Crispin <blogic@openwrt.org> +*/ + +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/interrupt.h> +#include <linux/timer.h> +#include <linux/of_gpio.h> +#include <linux/clk.h> + +#include <asm/mach-ralink/ralink_regs.h> + +#define TIMER_REG_TMRSTAT 0x00 +#define TIMER_REG_TMR0LOAD 0x10 +#define TIMER_REG_TMR0CTL 0x18 + +#define TMRSTAT_TMR0INT BIT(0) + +#define TMR0CTL_ENABLE BIT(7) +#define TMR0CTL_MODE_PERIODIC BIT(4) +#define TMR0CTL_PRESCALER 1 +#define TMR0CTL_PRESCALE_VAL (0xf - TMR0CTL_PRESCALER) +#define TMR0CTL_PRESCALE_DIV (65536 / BIT(TMR0CTL_PRESCALER)) + +struct rt_timer { + struct device *dev; + void __iomem *membase; + int irq; + unsigned long timer_freq; + unsigned long timer_div; +}; + +static inline void rt_timer_w32(struct rt_timer *rt, u8 reg, u32 val) +{ + __raw_writel(val, rt->membase + reg); +} + +static inline u32 rt_timer_r32(struct rt_timer *rt, u8 reg) +{ + return __raw_readl(rt->membase + reg); +} + +static irqreturn_t rt_timer_irq(int irq, void *_rt) +{ + struct rt_timer *rt = (struct rt_timer *) _rt; + + rt_timer_w32(rt, TIMER_REG_TMR0LOAD, rt->timer_freq / rt->timer_div); + rt_timer_w32(rt, TIMER_REG_TMRSTAT, TMRSTAT_TMR0INT); + + return IRQ_HANDLED; +} + + +static int rt_timer_request(struct rt_timer *rt) +{ + int err = request_irq(rt->irq, rt_timer_irq, IRQF_DISABLED, + dev_name(rt->dev), rt); + if (err) { + dev_err(rt->dev, "failed to request irq\n"); + } else { + u32 t = TMR0CTL_MODE_PERIODIC | TMR0CTL_PRESCALE_VAL; + rt_timer_w32(rt, TIMER_REG_TMR0CTL, t); + } + return err; +} + +static void rt_timer_free(struct rt_timer *rt) +{ + free_irq(rt->irq, rt); +} + +static int rt_timer_config(struct rt_timer *rt, unsigned long divisor) +{ + if (rt->timer_freq < divisor) + rt->timer_div = rt->timer_freq; + else + rt->timer_div = divisor; + + rt_timer_w32(rt, TIMER_REG_TMR0LOAD, rt->timer_freq / rt->timer_div); + + return 0; +} + +static int rt_timer_enable(struct rt_timer *rt) +{ + u32 t; + + rt_timer_w32(rt, TIMER_REG_TMR0LOAD, rt->timer_freq / rt->timer_div); + + t = rt_timer_r32(rt, TIMER_REG_TMR0CTL); + t |= TMR0CTL_ENABLE; + rt_timer_w32(rt, TIMER_REG_TMR0CTL, t); + + return 0; +} + +static void rt_timer_disable(struct rt_timer *rt) +{ + u32 t; + + t = rt_timer_r32(rt, TIMER_REG_TMR0CTL); + t &= ~TMR0CTL_ENABLE; + rt_timer_w32(rt, TIMER_REG_TMR0CTL, t); +} + +static int rt_timer_probe(struct platform_device *pdev) +{ + struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + struct rt_timer *rt; + struct clk *clk; + + rt = devm_kzalloc(&pdev->dev, sizeof(*rt), GFP_KERNEL); + if (!rt) { + dev_err(&pdev->dev, "failed to allocate memory\n"); + return -ENOMEM; + } + + rt->irq = platform_get_irq(pdev, 0); + if (!rt->irq) { + dev_err(&pdev->dev, "failed to load irq\n"); + return -ENOENT; + } + + rt->membase = devm_request_and_ioremap(&pdev->dev, res); + if (IS_ERR(rt->membase)) + return PTR_ERR(rt->membase); + + clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(clk)) { + dev_err(&pdev->dev, "failed get clock rate\n"); + return PTR_ERR(clk); + } + + rt->timer_freq = clk_get_rate(clk) / TMR0CTL_PRESCALE_DIV; + if (!rt->timer_freq) + return -EINVAL; + + rt->dev = &pdev->dev; + platform_set_drvdata(pdev, rt); + + rt_timer_request(rt); + rt_timer_config(rt, 2); + rt_timer_enable(rt); + + dev_info(&pdev->dev, "maximum frequncy is %luHz\n", rt->timer_freq); + + return 0; +} + +static int rt_timer_remove(struct platform_device *pdev) +{ + struct rt_timer *rt = platform_get_drvdata(pdev); + + rt_timer_disable(rt); + rt_timer_free(rt); + + return 0; +} + +static const struct of_device_id rt_timer_match[] = { + { .compatible = "ralink,rt2880-timer" }, + {}, +}; +MODULE_DEVICE_TABLE(of, rt_timer_match); + +static struct platform_driver rt_timer_driver = { + .probe = rt_timer_probe, + .remove = rt_timer_remove, + .driver = { + .name = "rt-timer", + .owner = THIS_MODULE, + .of_match_table = rt_timer_match + }, +}; + +module_platform_driver(rt_timer_driver); + +MODULE_DESCRIPTION("Ralink RT2880 timer"); +MODULE_AUTHOR("John Crispin <blogic@openwrt.org"); +MODULE_LICENSE("GPL"); diff --git a/arch/mips/sibyte/bcm1480/setup.c b/arch/mips/sibyte/bcm1480/setup.c index 05ed92c92b69..8e2e04f77870 100644 --- a/arch/mips/sibyte/bcm1480/setup.c +++ b/arch/mips/sibyte/bcm1480/setup.c @@ -22,6 +22,7 @@ #include <linux/string.h> #include <asm/bootinfo.h> +#include <asm/cpu.h> #include <asm/mipsregs.h> #include <asm/io.h> #include <asm/sibyte/sb1250.h> @@ -119,7 +120,7 @@ void __init bcm1480_setup(void) uint64_t sys_rev; int plldiv; - sb1_pass = read_c0_prid() & 0xff; + sb1_pass = read_c0_prid() & PRID_REV_MASK; sys_rev = __raw_readq(IOADDR(A_SCD_SYSTEM_REVISION)); soc_type = SYS_SOC_TYPE(sys_rev); part_type = G_SYS_PART(sys_rev); diff --git a/arch/mips/sibyte/sb1250/setup.c b/arch/mips/sibyte/sb1250/setup.c index a14bd4cb0bc0..3c02b2a77ae9 100644 --- a/arch/mips/sibyte/sb1250/setup.c +++ b/arch/mips/sibyte/sb1250/setup.c @@ -22,6 +22,7 @@ #include <linux/string.h> #include <asm/bootinfo.h> +#include <asm/cpu.h> #include <asm/mipsregs.h> #include <asm/io.h> #include <asm/sibyte/sb1250.h> @@ -182,7 +183,7 @@ void __init sb1250_setup(void) int plldiv; int bad_config = 0; - sb1_pass = read_c0_prid() & 0xff; + sb1_pass = read_c0_prid() & PRID_REV_MASK; sys_rev = __raw_readq(IOADDR(A_SCD_SYSTEM_REVISION)); soc_type = SYS_SOC_TYPE(sys_rev); soc_pass = G_SYS_REVISION(sys_rev); diff --git a/arch/mips/sni/setup.c b/arch/mips/sni/setup.c index 5b09b3544edd..efad85c8c823 100644 --- a/arch/mips/sni/setup.c +++ b/arch/mips/sni/setup.c @@ -25,6 +25,7 @@ #endif #include <asm/bootinfo.h> +#include <asm/cpu.h> #include <asm/io.h> #include <asm/reboot.h> #include <asm/sni.h> @@ -173,7 +174,7 @@ void __init plat_mem_setup(void) system_type = "RM300-Cxx"; break; case SNI_BRD_PCI_DESKTOP: - switch (read_c0_prid() & 0xff00) { + switch (read_c0_prid() & PRID_IMP_MASK) { case PRID_IMP_R4600: case PRID_IMP_R4700: system_type = "RM200-C20"; diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c index 681e7f86c080..2b0b83c171e0 100644 --- a/arch/mips/txx9/generic/setup.c +++ b/arch/mips/txx9/generic/setup.c @@ -350,7 +350,7 @@ static void __init select_board(void) } /* select "default" board */ -#ifdef CONFIG_CPU_TX39XX +#ifdef CONFIG_TOSHIBA_JMR3927 txx9_board_vec = &jmr3927_vec; #endif #ifdef CONFIG_CPU_TX49XX diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig index 70e4f663ebd2..6aaa1607001a 100644 --- a/arch/mn10300/Kconfig +++ b/arch/mn10300/Kconfig @@ -1,7 +1,6 @@ config MN10300 def_bool y select HAVE_OPROFILE - select HAVE_GENERIC_HARDIRQS select GENERIC_IRQ_SHOW select ARCH_WANT_IPC_PARSE_VERSION select HAVE_ARCH_TRACEHOOK diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c index 8a2e6ded9a44..3516cbdf1ee9 100644 --- a/arch/mn10300/mm/fault.c +++ b/arch/mn10300/mm/fault.c @@ -171,6 +171,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long fault_code, if (in_atomic() || !mm) goto no_context; + if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR) + flags |= FAULT_FLAG_USER; retry: down_read(&mm->mmap_sem); diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index d60bf98fa5cf..9488209a5253 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -11,7 +11,6 @@ config OPENRISC select HAVE_MEMBLOCK select ARCH_REQUIRE_GPIOLIB select HAVE_ARCH_TRACEHOOK - select HAVE_GENERIC_HARDIRQS select GENERIC_IRQ_CHIP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c index 4a41f8493ab0..0703acf7d327 100644 --- a/arch/openrisc/mm/fault.c +++ b/arch/openrisc/mm/fault.c @@ -86,6 +86,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address, if (user_mode(regs)) { /* Exception was in userspace: reenable interrupts */ local_irq_enable(); + flags |= FAULT_FLAG_USER; } else { /* If exception was in a syscall, then IRQ's may have * been enabled or disabled. If they were enabled, diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index aa399a5259b6..ad2ce8dab996 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -14,7 +14,6 @@ config PARISC select HAVE_PERF_EVENTS select GENERIC_ATOMIC64 if !64BIT select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE - select HAVE_GENERIC_HARDIRQS select BROKEN_RODATA select GENERIC_IRQ_PROBE select GENERIC_PCI_IOMAP diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index f247a3480e8e..d10d27a720c0 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -180,6 +180,10 @@ void do_page_fault(struct pt_regs *regs, unsigned long code, if (in_atomic() || !mm) goto no_context; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + if (acc_type & VM_WRITE) + flags |= FAULT_FLAG_WRITE; retry: down_read(&mm->mmap_sem); vma = find_vma_prev(mm, address, &prev_vma); @@ -203,8 +207,7 @@ good_area: * fault. */ - fault = handle_mm_fault(mm, vma, address, - flags | ((acc_type & VM_WRITE) ? FAULT_FLAG_WRITE : 0)); + fault = handle_mm_fault(mm, vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) return; diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 6b7530f8183c..38f3b7e47ec5 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -114,7 +114,6 @@ config PPC select HAVE_PERF_EVENTS select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64 - select HAVE_GENERIC_HARDIRQS select ARCH_WANT_IPC_PARSE_VERSION select SPARSE_IRQ select IRQ_DOMAIN diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h index 77e97dd0c15d..38faeded7d59 100644 --- a/arch/powerpc/include/asm/device.h +++ b/arch/powerpc/include/asm/device.h @@ -28,6 +28,9 @@ struct dev_archdata { void *iommu_table_base; } dma_data; +#ifdef CONFIG_IOMMU_API + void *iommu_domain; +#endif #ifdef CONFIG_SWIOTLB dma_addr_t max_direct_dma_addr; #endif diff --git a/arch/powerpc/include/asm/fsl_pamu_stash.h b/arch/powerpc/include/asm/fsl_pamu_stash.h new file mode 100644 index 000000000000..caa1b21c25cd --- /dev/null +++ b/arch/powerpc/include/asm/fsl_pamu_stash.h @@ -0,0 +1,39 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) 2013 Freescale Semiconductor, Inc. + * + */ + +#ifndef __FSL_PAMU_STASH_H +#define __FSL_PAMU_STASH_H + +/* cache stash targets */ +enum pamu_stash_target { + PAMU_ATTR_CACHE_L1 = 1, + PAMU_ATTR_CACHE_L2, + PAMU_ATTR_CACHE_L3, +}; + +/* + * This attribute allows configuring stashig specific parameters + * in the PAMU hardware. + */ + +struct pamu_stash_attribute { + u32 cpu; /* cpu number */ + u32 cache; /* cache to stash to: L1,L2,L3 */ +}; + +#endif /* __FSL_PAMU_STASH_H */ diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 2dd69bf4af46..51ab9e7e6c39 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -223,9 +223,6 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, is_write = error_code & ESR_DST; #endif /* CONFIG_4xx || CONFIG_BOOKE */ - if (is_write) - flags |= FAULT_FLAG_WRITE; - #ifdef CONFIG_PPC_ICSWX /* * we need to do this early because this "data storage @@ -288,6 +285,9 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, if (user_mode(regs)) store_update_sp = store_updates_sp(regs); + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the * kernel and should generate an OOPS. Unfortunately, in the case of an @@ -415,6 +415,7 @@ good_area: } else if (is_write) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; + flags |= FAULT_FLAG_WRITE; /* a read */ } else { /* protection fault */ diff --git a/arch/powerpc/sysdev/fsl_pci.h b/arch/powerpc/sysdev/fsl_pci.h index defc422a375f..8d455df58471 100644 --- a/arch/powerpc/sysdev/fsl_pci.h +++ b/arch/powerpc/sysdev/fsl_pci.h @@ -16,6 +16,11 @@ struct platform_device; + +/* FSL PCI controller BRR1 register */ +#define PCI_FSL_BRR1 0xbf8 +#define PCI_FSL_BRR1_VER 0xffff + #define PCIE_LTSSM 0x0404 /* PCIE Link Training and Status */ #define PCIE_LTSSM_L0 0x16 /* L0 state */ #define PCIE_IP_REV_2_2 0x02080202 /* PCIE IP block version Rev2.2 */ diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 3ec272859e1e..dcc6ac2d8026 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -116,7 +116,6 @@ config S390 select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_TRACE_MCOUNT_TEST - select HAVE_GENERIC_HARDIRQS select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZ4 diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 7de4469915f0..fc6679210d83 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -302,6 +302,8 @@ static inline int do_exception(struct pt_regs *regs, int access) address = trans_exc_code & __FAIL_ADDR_MASK; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) flags |= FAULT_FLAG_WRITE; down_read(&mm->mmap_sem); diff --git a/arch/score/Kconfig b/arch/score/Kconfig index 5fc237581caf..a1be70db75fe 100644 --- a/arch/score/Kconfig +++ b/arch/score/Kconfig @@ -2,7 +2,6 @@ menu "Machine selection" config SCORE def_bool y - select HAVE_GENERIC_HARDIRQS select GENERIC_IRQ_SHOW select GENERIC_IOMAP select GENERIC_ATOMIC64 diff --git a/arch/score/mm/fault.c b/arch/score/mm/fault.c index 6b18fb0189ae..52238983527d 100644 --- a/arch/score/mm/fault.c +++ b/arch/score/mm/fault.c @@ -47,6 +47,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; const int field = sizeof(unsigned long) * 2; + unsigned long flags = 0; siginfo_t info; int fault; @@ -75,6 +76,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, if (in_atomic() || !mm) goto bad_area_nosemaphore; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) @@ -95,18 +99,18 @@ good_area: if (write) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; + flags |= FAULT_FLAG_WRITE; } else { if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))) goto bad_area; } -survive: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, address, write); + fault = handle_mm_fault(mm, vma, address, flags); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; @@ -167,11 +171,6 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (is_global_init(tsk)) { - yield(); - down_read(&mm->mmap_sem); - goto survive; - } if (!user_mode(regs)) goto no_context; pagefault_out_of_memory(); diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 1018ed3a3ca5..224f4bc9925e 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -26,7 +26,6 @@ config SUPERH select ARCH_WANT_IPC_PARSE_VERSION select HAVE_SYSCALL_TRACEPOINTS select HAVE_REGS_AND_STACK_ACCESS_API - select HAVE_GENERIC_HARDIRQS select MAY_HAVE_SPARSE_IRQ select IRQ_FORCED_THREADING select RTC_LIB diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index 1f49c28affa9..541dc6101508 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -400,9 +400,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, struct mm_struct *mm; struct vm_area_struct * vma; int fault; - int write = error_code & FAULT_CODE_WRITE; - unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (write ? FAULT_FLAG_WRITE : 0)); + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; tsk = current; mm = tsk->mm; @@ -476,6 +474,11 @@ good_area: set_thread_fault_code(error_code); + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + if (error_code & FAULT_CODE_WRITE) + flags |= FAULT_FLAG_WRITE; + /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 1570ad2802b3..2137ad667438 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -26,7 +26,6 @@ config SPARC select HAVE_DMA_ATTRS select HAVE_DMA_API_DEBUG select HAVE_ARCH_JUMP_LABEL - select HAVE_GENERIC_HARDIRQS select GENERIC_IRQ_SHOW select ARCH_WANT_IPC_PARSE_VERSION select USE_GENERIC_SMP_HELPERS if SMP diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index e98bfda205a2..59dbd4645725 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -177,8 +177,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, unsigned long g2; int from_user = !(regs->psr & PSR_PS); int fault, code; - unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (write ? FAULT_FLAG_WRITE : 0)); + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; if (text_fault) address = regs->pc; @@ -235,6 +234,11 @@ good_area: goto bad_area; } + if (from_user) + flags |= FAULT_FLAG_USER; + if (write) + flags |= FAULT_FLAG_WRITE; + /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo @@ -383,6 +387,7 @@ static void force_user_fault(unsigned long address, int write) struct vm_area_struct *vma; struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; + unsigned int flags = FAULT_FLAG_USER; int code; code = SEGV_MAPERR; @@ -402,11 +407,12 @@ good_area: if (write) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; + flags |= FAULT_FLAG_WRITE; } else { if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; } - switch (handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0)) { + switch (handle_mm_fault(mm, vma, address, flags)) { case VM_FAULT_SIGBUS: case VM_FAULT_OOM: goto do_sigbus; diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 5062ff389e83..2ebec263d685 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -315,7 +315,8 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) bad_kernel_pc(regs, address); return; } - } + } else + flags |= FAULT_FLAG_USER; /* * If we're in an interrupt or have no user @@ -418,13 +419,14 @@ good_area: vma->vm_file != NULL) set_thread_fault_code(fault_code | FAULT_CODE_BLKCOMMIT); + + flags |= FAULT_FLAG_WRITE; } else { /* Allow reads even for write-only mappings */ if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; } - flags |= ((fault_code & FAULT_CODE_WRITE) ? FAULT_FLAG_WRITE : 0); fault = handle_mm_fault(mm, vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 932fa14de5fe..d45a2c48f185 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -11,7 +11,6 @@ config TILE select USE_GENERIC_SMP_HELPERS select CC_OPTIMIZE_FOR_SIZE select HAVE_DEBUG_KMEMLEAK - select HAVE_GENERIC_HARDIRQS select GENERIC_IRQ_PROBE select GENERIC_PENDING_IRQ if SMP select GENERIC_IRQ_SHOW @@ -362,7 +361,7 @@ config CMDLINE_OVERRIDE config VMALLOC_RESERVE hex - default 0x1000000 + default 0x2000000 config HARDWALL bool "Hardwall support to allow access to user dynamic network" diff --git a/arch/tile/gxio/iorpc_mpipe.c b/arch/tile/gxio/iorpc_mpipe.c index 4f8f3d619c4a..e19325c4c431 100644 --- a/arch/tile/gxio/iorpc_mpipe.c +++ b/arch/tile/gxio/iorpc_mpipe.c @@ -21,7 +21,7 @@ struct alloc_buffer_stacks_param { unsigned int flags; }; -int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { @@ -45,7 +45,7 @@ struct init_buffer_stack_aux_param { unsigned int buffer_size_enum; }; -int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t * context, +int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t *context, void *mem_va, size_t mem_size, unsigned int mem_flags, unsigned int stack, unsigned int buffer_size_enum) @@ -80,7 +80,7 @@ struct alloc_notif_rings_param { unsigned int flags; }; -int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { @@ -102,7 +102,7 @@ struct init_notif_ring_aux_param { unsigned int ring; }; -int gxio_mpipe_init_notif_ring_aux(gxio_mpipe_context_t * context, void *mem_va, +int gxio_mpipe_init_notif_ring_aux(gxio_mpipe_context_t *context, void *mem_va, size_t mem_size, unsigned int mem_flags, unsigned int ring) { @@ -133,7 +133,7 @@ struct request_notif_ring_interrupt_param { unsigned int ring; }; -int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t * context, +int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event, unsigned int ring) @@ -158,7 +158,7 @@ struct enable_notif_ring_interrupt_param { unsigned int ring; }; -int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t * context, +int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t *context, unsigned int ring) { struct enable_notif_ring_interrupt_param temp; @@ -179,7 +179,7 @@ struct alloc_notif_groups_param { unsigned int flags; }; -int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { @@ -201,7 +201,7 @@ struct init_notif_group_param { gxio_mpipe_notif_group_bits_t bits; }; -int gxio_mpipe_init_notif_group(gxio_mpipe_context_t * context, +int gxio_mpipe_init_notif_group(gxio_mpipe_context_t *context, unsigned int group, gxio_mpipe_notif_group_bits_t bits) { @@ -223,7 +223,7 @@ struct alloc_buckets_param { unsigned int flags; }; -int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t * context, unsigned int count, +int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { struct alloc_buckets_param temp; @@ -244,7 +244,7 @@ struct init_bucket_param { MPIPE_LBL_INIT_DAT_BSTS_TBL_t bucket_info; }; -int gxio_mpipe_init_bucket(gxio_mpipe_context_t * context, unsigned int bucket, +int gxio_mpipe_init_bucket(gxio_mpipe_context_t *context, unsigned int bucket, MPIPE_LBL_INIT_DAT_BSTS_TBL_t bucket_info) { struct init_bucket_param temp; @@ -265,7 +265,7 @@ struct alloc_edma_rings_param { unsigned int flags; }; -int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { @@ -288,7 +288,7 @@ struct init_edma_ring_aux_param { unsigned int channel; }; -int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t * context, void *mem_va, +int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t *context, void *mem_va, size_t mem_size, unsigned int mem_flags, unsigned int ring, unsigned int channel) { @@ -315,7 +315,7 @@ int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t * context, void *mem_va, EXPORT_SYMBOL(gxio_mpipe_init_edma_ring_aux); -int gxio_mpipe_commit_rules(gxio_mpipe_context_t * context, const void *blob, +int gxio_mpipe_commit_rules(gxio_mpipe_context_t *context, const void *blob, size_t blob_size) { const void *params = blob; @@ -332,7 +332,7 @@ struct register_client_memory_param { unsigned int flags; }; -int gxio_mpipe_register_client_memory(gxio_mpipe_context_t * context, +int gxio_mpipe_register_client_memory(gxio_mpipe_context_t *context, unsigned int iotlb, HV_PTE pte, unsigned int flags) { @@ -355,7 +355,7 @@ struct link_open_aux_param { unsigned int flags; }; -int gxio_mpipe_link_open_aux(gxio_mpipe_context_t * context, +int gxio_mpipe_link_open_aux(gxio_mpipe_context_t *context, _gxio_mpipe_link_name_t name, unsigned int flags) { struct link_open_aux_param temp; @@ -374,7 +374,7 @@ struct link_close_aux_param { int mac; }; -int gxio_mpipe_link_close_aux(gxio_mpipe_context_t * context, int mac) +int gxio_mpipe_link_close_aux(gxio_mpipe_context_t *context, int mac) { struct link_close_aux_param temp; struct link_close_aux_param *params = &temp; @@ -393,7 +393,7 @@ struct link_set_attr_aux_param { int64_t val; }; -int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t * context, int mac, +int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t *context, int mac, uint32_t attr, int64_t val) { struct link_set_attr_aux_param temp; @@ -415,8 +415,8 @@ struct get_timestamp_aux_param { uint64_t cycles; }; -int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t * context, uint64_t * sec, - uint64_t * nsec, uint64_t * cycles) +int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t *context, uint64_t *sec, + uint64_t *nsec, uint64_t *cycles) { int __result; struct get_timestamp_aux_param temp; @@ -440,7 +440,7 @@ struct set_timestamp_aux_param { uint64_t cycles; }; -int gxio_mpipe_set_timestamp_aux(gxio_mpipe_context_t * context, uint64_t sec, +int gxio_mpipe_set_timestamp_aux(gxio_mpipe_context_t *context, uint64_t sec, uint64_t nsec, uint64_t cycles) { struct set_timestamp_aux_param temp; @@ -460,8 +460,7 @@ struct adjust_timestamp_aux_param { int64_t nsec; }; -int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t * context, - int64_t nsec) +int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t *context, int64_t nsec) { struct adjust_timestamp_aux_param temp; struct adjust_timestamp_aux_param *params = &temp; @@ -475,25 +474,6 @@ int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t * context, EXPORT_SYMBOL(gxio_mpipe_adjust_timestamp_aux); -struct adjust_timestamp_freq_param { - int32_t ppb; -}; - -int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t * context, - int32_t ppb) -{ - struct adjust_timestamp_freq_param temp; - struct adjust_timestamp_freq_param *params = &temp; - - params->ppb = ppb; - - return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, - sizeof(*params), - GXIO_MPIPE_OP_ADJUST_TIMESTAMP_FREQ); -} - -EXPORT_SYMBOL(gxio_mpipe_adjust_timestamp_freq); - struct config_edma_ring_blks_param { unsigned int ering; unsigned int max_blks; @@ -501,7 +481,7 @@ struct config_edma_ring_blks_param { unsigned int db; }; -int gxio_mpipe_config_edma_ring_blks(gxio_mpipe_context_t * context, +int gxio_mpipe_config_edma_ring_blks(gxio_mpipe_context_t *context, unsigned int ering, unsigned int max_blks, unsigned int min_snf_blks, unsigned int db) { @@ -520,11 +500,29 @@ int gxio_mpipe_config_edma_ring_blks(gxio_mpipe_context_t * context, EXPORT_SYMBOL(gxio_mpipe_config_edma_ring_blks); +struct adjust_timestamp_freq_param { + int32_t ppb; +}; + +int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t *context, int32_t ppb) +{ + struct adjust_timestamp_freq_param temp; + struct adjust_timestamp_freq_param *params = &temp; + + params->ppb = ppb; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), + GXIO_MPIPE_OP_ADJUST_TIMESTAMP_FREQ); +} + +EXPORT_SYMBOL(gxio_mpipe_adjust_timestamp_freq); + struct arm_pollfd_param { union iorpc_pollfd pollfd; }; -int gxio_mpipe_arm_pollfd(gxio_mpipe_context_t * context, int pollfd_cookie) +int gxio_mpipe_arm_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie) { struct arm_pollfd_param temp; struct arm_pollfd_param *params = &temp; @@ -541,7 +539,7 @@ struct close_pollfd_param { union iorpc_pollfd pollfd; }; -int gxio_mpipe_close_pollfd(gxio_mpipe_context_t * context, int pollfd_cookie) +int gxio_mpipe_close_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie) { struct close_pollfd_param temp; struct close_pollfd_param *params = &temp; @@ -558,7 +556,7 @@ struct get_mmio_base_param { HV_PTE base; }; -int gxio_mpipe_get_mmio_base(gxio_mpipe_context_t * context, HV_PTE *base) +int gxio_mpipe_get_mmio_base(gxio_mpipe_context_t *context, HV_PTE *base) { int __result; struct get_mmio_base_param temp; @@ -579,7 +577,7 @@ struct check_mmio_offset_param { unsigned long size; }; -int gxio_mpipe_check_mmio_offset(gxio_mpipe_context_t * context, +int gxio_mpipe_check_mmio_offset(gxio_mpipe_context_t *context, unsigned long offset, unsigned long size) { struct check_mmio_offset_param temp; diff --git a/arch/tile/gxio/iorpc_mpipe_info.c b/arch/tile/gxio/iorpc_mpipe_info.c index 64883aabeb9c..77019c6e9b4a 100644 --- a/arch/tile/gxio/iorpc_mpipe_info.c +++ b/arch/tile/gxio/iorpc_mpipe_info.c @@ -15,12 +15,11 @@ /* This file is machine-generated; DO NOT EDIT! */ #include "gxio/iorpc_mpipe_info.h" - struct instance_aux_param { _gxio_mpipe_link_name_t name; }; -int gxio_mpipe_info_instance_aux(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_instance_aux(gxio_mpipe_info_context_t *context, _gxio_mpipe_link_name_t name) { struct instance_aux_param temp; @@ -39,10 +38,10 @@ struct enumerate_aux_param { _gxio_mpipe_link_mac_t mac; }; -int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t *context, unsigned int idx, - _gxio_mpipe_link_name_t * name, - _gxio_mpipe_link_mac_t * mac) + _gxio_mpipe_link_name_t *name, + _gxio_mpipe_link_mac_t *mac) { int __result; struct enumerate_aux_param temp; @@ -50,7 +49,7 @@ int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t * context, __result = hv_dev_pread(context->fd, 0, (HV_VirtAddr) params, sizeof(*params), - (((uint64_t) idx << 32) | + (((uint64_t)idx << 32) | GXIO_MPIPE_INFO_OP_ENUMERATE_AUX)); *name = params->name; *mac = params->mac; @@ -64,7 +63,7 @@ struct get_mmio_base_param { HV_PTE base; }; -int gxio_mpipe_info_get_mmio_base(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_get_mmio_base(gxio_mpipe_info_context_t *context, HV_PTE *base) { int __result; @@ -86,7 +85,7 @@ struct check_mmio_offset_param { unsigned long size; }; -int gxio_mpipe_info_check_mmio_offset(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_check_mmio_offset(gxio_mpipe_info_context_t *context, unsigned long offset, unsigned long size) { struct check_mmio_offset_param temp; diff --git a/arch/tile/gxio/iorpc_trio.c b/arch/tile/gxio/iorpc_trio.c index da6e18e049c3..1d3cedb9aeb4 100644 --- a/arch/tile/gxio/iorpc_trio.c +++ b/arch/tile/gxio/iorpc_trio.c @@ -21,7 +21,7 @@ struct alloc_asids_param { unsigned int flags; }; -int gxio_trio_alloc_asids(gxio_trio_context_t * context, unsigned int count, +int gxio_trio_alloc_asids(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { struct alloc_asids_param temp; @@ -44,7 +44,7 @@ struct alloc_memory_maps_param { unsigned int flags; }; -int gxio_trio_alloc_memory_maps(gxio_trio_context_t * context, +int gxio_trio_alloc_memory_maps(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { @@ -67,7 +67,7 @@ struct alloc_scatter_queues_param { unsigned int flags; }; -int gxio_trio_alloc_scatter_queues(gxio_trio_context_t * context, +int gxio_trio_alloc_scatter_queues(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { @@ -91,7 +91,7 @@ struct alloc_pio_regions_param { unsigned int flags; }; -int gxio_trio_alloc_pio_regions(gxio_trio_context_t * context, +int gxio_trio_alloc_pio_regions(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags) { @@ -115,7 +115,7 @@ struct init_pio_region_aux_param { unsigned int flags; }; -int gxio_trio_init_pio_region_aux(gxio_trio_context_t * context, +int gxio_trio_init_pio_region_aux(gxio_trio_context_t *context, unsigned int pio_region, unsigned int mac, uint32_t bus_address_hi, unsigned int flags) { @@ -145,7 +145,7 @@ struct init_memory_map_mmu_aux_param { unsigned int order_mode; }; -int gxio_trio_init_memory_map_mmu_aux(gxio_trio_context_t * context, +int gxio_trio_init_memory_map_mmu_aux(gxio_trio_context_t *context, unsigned int map, unsigned long va, uint64_t size, unsigned int asid, unsigned int mac, uint64_t bus_address, @@ -175,7 +175,7 @@ struct get_port_property_param { struct pcie_trio_ports_property trio_ports; }; -int gxio_trio_get_port_property(gxio_trio_context_t * context, +int gxio_trio_get_port_property(gxio_trio_context_t *context, struct pcie_trio_ports_property *trio_ports) { int __result; @@ -198,7 +198,7 @@ struct config_legacy_intr_param { unsigned int intx; }; -int gxio_trio_config_legacy_intr(gxio_trio_context_t * context, int inter_x, +int gxio_trio_config_legacy_intr(gxio_trio_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event, unsigned int mac, unsigned int intx) { @@ -227,7 +227,7 @@ struct config_msi_intr_param { unsigned int asid; }; -int gxio_trio_config_msi_intr(gxio_trio_context_t * context, int inter_x, +int gxio_trio_config_msi_intr(gxio_trio_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event, unsigned int mac, unsigned int mem_map, uint64_t mem_map_base, uint64_t mem_map_limit, @@ -259,7 +259,7 @@ struct set_mps_mrs_param { unsigned int mac; }; -int gxio_trio_set_mps_mrs(gxio_trio_context_t * context, uint16_t mps, +int gxio_trio_set_mps_mrs(gxio_trio_context_t *context, uint16_t mps, uint16_t mrs, unsigned int mac) { struct set_mps_mrs_param temp; @@ -279,7 +279,7 @@ struct force_rc_link_up_param { unsigned int mac; }; -int gxio_trio_force_rc_link_up(gxio_trio_context_t * context, unsigned int mac) +int gxio_trio_force_rc_link_up(gxio_trio_context_t *context, unsigned int mac) { struct force_rc_link_up_param temp; struct force_rc_link_up_param *params = &temp; @@ -296,7 +296,7 @@ struct force_ep_link_up_param { unsigned int mac; }; -int gxio_trio_force_ep_link_up(gxio_trio_context_t * context, unsigned int mac) +int gxio_trio_force_ep_link_up(gxio_trio_context_t *context, unsigned int mac) { struct force_ep_link_up_param temp; struct force_ep_link_up_param *params = &temp; @@ -313,7 +313,7 @@ struct get_mmio_base_param { HV_PTE base; }; -int gxio_trio_get_mmio_base(gxio_trio_context_t * context, HV_PTE *base) +int gxio_trio_get_mmio_base(gxio_trio_context_t *context, HV_PTE *base) { int __result; struct get_mmio_base_param temp; @@ -334,7 +334,7 @@ struct check_mmio_offset_param { unsigned long size; }; -int gxio_trio_check_mmio_offset(gxio_trio_context_t * context, +int gxio_trio_check_mmio_offset(gxio_trio_context_t *context, unsigned long offset, unsigned long size) { struct check_mmio_offset_param temp; diff --git a/arch/tile/gxio/iorpc_usb_host.c b/arch/tile/gxio/iorpc_usb_host.c index cf3c3cc12204..9c820073bfc0 100644 --- a/arch/tile/gxio/iorpc_usb_host.c +++ b/arch/tile/gxio/iorpc_usb_host.c @@ -19,7 +19,7 @@ struct cfg_interrupt_param { union iorpc_interrupt interrupt; }; -int gxio_usb_host_cfg_interrupt(gxio_usb_host_context_t * context, int inter_x, +int gxio_usb_host_cfg_interrupt(gxio_usb_host_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event) { struct cfg_interrupt_param temp; @@ -41,7 +41,7 @@ struct register_client_memory_param { unsigned int flags; }; -int gxio_usb_host_register_client_memory(gxio_usb_host_context_t * context, +int gxio_usb_host_register_client_memory(gxio_usb_host_context_t *context, HV_PTE pte, unsigned int flags) { struct register_client_memory_param temp; @@ -61,7 +61,7 @@ struct get_mmio_base_param { HV_PTE base; }; -int gxio_usb_host_get_mmio_base(gxio_usb_host_context_t * context, HV_PTE *base) +int gxio_usb_host_get_mmio_base(gxio_usb_host_context_t *context, HV_PTE *base) { int __result; struct get_mmio_base_param temp; @@ -82,7 +82,7 @@ struct check_mmio_offset_param { unsigned long size; }; -int gxio_usb_host_check_mmio_offset(gxio_usb_host_context_t * context, +int gxio_usb_host_check_mmio_offset(gxio_usb_host_context_t *context, unsigned long offset, unsigned long size) { struct check_mmio_offset_param temp; diff --git a/arch/tile/gxio/usb_host.c b/arch/tile/gxio/usb_host.c index 66b002f54ecc..785afad7922e 100644 --- a/arch/tile/gxio/usb_host.c +++ b/arch/tile/gxio/usb_host.c @@ -26,7 +26,7 @@ #include <gxio/kiorpc.h> #include <gxio/usb_host.h> -int gxio_usb_host_init(gxio_usb_host_context_t * context, int usb_index, +int gxio_usb_host_init(gxio_usb_host_context_t *context, int usb_index, int is_ehci) { char file[32]; @@ -63,7 +63,7 @@ int gxio_usb_host_init(gxio_usb_host_context_t * context, int usb_index, EXPORT_SYMBOL_GPL(gxio_usb_host_init); -int gxio_usb_host_destroy(gxio_usb_host_context_t * context) +int gxio_usb_host_destroy(gxio_usb_host_context_t *context) { iounmap((void __force __iomem *)(context->mmio_base)); hv_dev_close(context->fd); @@ -76,14 +76,14 @@ int gxio_usb_host_destroy(gxio_usb_host_context_t * context) EXPORT_SYMBOL_GPL(gxio_usb_host_destroy); -void *gxio_usb_host_get_reg_start(gxio_usb_host_context_t * context) +void *gxio_usb_host_get_reg_start(gxio_usb_host_context_t *context) { return context->mmio_base; } EXPORT_SYMBOL_GPL(gxio_usb_host_get_reg_start); -size_t gxio_usb_host_get_reg_len(gxio_usb_host_context_t * context) +size_t gxio_usb_host_get_reg_len(gxio_usb_host_context_t *context) { return HV_USB_HOST_MMIO_SIZE; } diff --git a/arch/tile/include/arch/mpipe.h b/arch/tile/include/arch/mpipe.h index 8a33912fd6cc..904538e754d8 100644 --- a/arch/tile/include/arch/mpipe.h +++ b/arch/tile/include/arch/mpipe.h @@ -176,7 +176,18 @@ typedef union */ uint_reg_t stack_idx : 5; /* Reserved. */ - uint_reg_t __reserved_2 : 5; + uint_reg_t __reserved_2 : 3; + /* + * Instance ID. For devices that support automatic buffer return between + * mPIPE instances, this field indicates the buffer owner. If the INST + * field does not match the mPIPE's instance number when a packet is + * egressed, buffers with HWB set will be returned to the other mPIPE + * instance. Note that not all devices support multi-mPIPE buffer + * return. The MPIPE_EDMA_INFO.REMOTE_BUFF_RTN_SUPPORT bit indicates + * whether the INST field in the buffer descriptor is populated by iDMA + * hardware. This field is ignored on writes. + */ + uint_reg_t inst : 2; /* * Reads as one to indicate that this is a hardware managed buffer. * Ignored on writes since all buffers on a given stack are the same size. @@ -205,7 +216,8 @@ typedef union uint_reg_t c : 2; uint_reg_t size : 3; uint_reg_t hwb : 1; - uint_reg_t __reserved_2 : 5; + uint_reg_t inst : 2; + uint_reg_t __reserved_2 : 3; uint_reg_t stack_idx : 5; uint_reg_t __reserved_1 : 6; int_reg_t va : 35; @@ -231,9 +243,9 @@ typedef union /* Reserved. */ uint_reg_t __reserved_0 : 3; /* eDMA ring being accessed */ - uint_reg_t ring : 5; + uint_reg_t ring : 6; /* Reserved. */ - uint_reg_t __reserved_1 : 18; + uint_reg_t __reserved_1 : 17; /* * This field of the address selects the region (address space) to be * accessed. For the egress DMA post region, this field must be 5. @@ -250,8 +262,8 @@ typedef union uint_reg_t svc_dom : 5; uint_reg_t __reserved_2 : 6; uint_reg_t region : 3; - uint_reg_t __reserved_1 : 18; - uint_reg_t ring : 5; + uint_reg_t __reserved_1 : 17; + uint_reg_t ring : 6; uint_reg_t __reserved_0 : 3; #endif }; diff --git a/arch/tile/include/arch/mpipe_constants.h b/arch/tile/include/arch/mpipe_constants.h index 410a0400e055..84022ac5fe82 100644 --- a/arch/tile/include/arch/mpipe_constants.h +++ b/arch/tile/include/arch/mpipe_constants.h @@ -16,13 +16,13 @@ #ifndef __ARCH_MPIPE_CONSTANTS_H__ #define __ARCH_MPIPE_CONSTANTS_H__ -#define MPIPE_NUM_CLASSIFIERS 10 +#define MPIPE_NUM_CLASSIFIERS 16 #define MPIPE_CLS_MHZ 1200 -#define MPIPE_NUM_EDMA_RINGS 32 +#define MPIPE_NUM_EDMA_RINGS 64 #define MPIPE_NUM_SGMII_MACS 16 -#define MPIPE_NUM_XAUI_MACS 4 +#define MPIPE_NUM_XAUI_MACS 16 #define MPIPE_NUM_LOOPBACK_CHANNELS 4 #define MPIPE_NUM_NON_LB_CHANNELS 28 diff --git a/arch/tile/include/arch/mpipe_shm.h b/arch/tile/include/arch/mpipe_shm.h index f2e9e122818d..13b3c4300e50 100644 --- a/arch/tile/include/arch/mpipe_shm.h +++ b/arch/tile/include/arch/mpipe_shm.h @@ -44,8 +44,14 @@ typedef union * descriptors toggles each time the ring tail pointer wraps. */ uint_reg_t gen : 1; + /** + * For devices with EDMA reorder support, this field allows the + * descriptor to select the egress FIFO. The associated DMA ring must + * have ALLOW_EFIFO_SEL enabled. + */ + uint_reg_t efifo_sel : 6; /** Reserved. Must be zero. */ - uint_reg_t r0 : 7; + uint_reg_t r0 : 1; /** Checksum generation enabled for this transfer. */ uint_reg_t csum : 1; /** @@ -110,7 +116,8 @@ typedef union uint_reg_t notif : 1; uint_reg_t ns : 1; uint_reg_t csum : 1; - uint_reg_t r0 : 7; + uint_reg_t r0 : 1; + uint_reg_t efifo_sel : 6; uint_reg_t gen : 1; #endif @@ -126,14 +133,16 @@ typedef union /** Reserved. */ uint_reg_t __reserved_1 : 3; /** - * Instance ID. For devices that support more than one mPIPE instance, - * this field indicates the buffer owner. If the INST field does not - * match the mPIPE's instance number when a packet is egressed, buffers - * with HWB set will be returned to the other mPIPE instance. + * Instance ID. For devices that support automatic buffer return between + * mPIPE instances, this field indicates the buffer owner. If the INST + * field does not match the mPIPE's instance number when a packet is + * egressed, buffers with HWB set will be returned to the other mPIPE + * instance. Note that not all devices support multi-mPIPE buffer + * return. The MPIPE_EDMA_INFO.REMOTE_BUFF_RTN_SUPPORT bit indicates + * whether the INST field in the buffer descriptor is populated by iDMA + * hardware. */ - uint_reg_t inst : 1; - /** Reserved. */ - uint_reg_t __reserved_2 : 1; + uint_reg_t inst : 2; /** * Always set to one by hardware in iDMA packet descriptors. For eDMA, * indicates whether the buffer will be released to the buffer stack @@ -166,8 +175,7 @@ typedef union uint_reg_t c : 2; uint_reg_t size : 3; uint_reg_t hwb : 1; - uint_reg_t __reserved_2 : 1; - uint_reg_t inst : 1; + uint_reg_t inst : 2; uint_reg_t __reserved_1 : 3; uint_reg_t stack_idx : 5; uint_reg_t __reserved_0 : 6; @@ -408,7 +416,10 @@ typedef union /** * Sequence number applied when packet is distributed. Classifier * selects which sequence number is to be applied by writing the 13-bit - * SQN-selector into this field. + * SQN-selector into this field. For devices that support EXT_SQN (as + * indicated in IDMA_INFO.EXT_SQN_SUPPORT), the GP_SQN can be extended to + * 32-bits via the IDMA_CTL.EXT_SQN register. In this case the + * PACKET_SQN will be reduced to 32 bits. */ uint_reg_t gp_sqn : 16; /** @@ -451,14 +462,16 @@ typedef union /** Reserved. */ uint_reg_t __reserved_5 : 3; /** - * Instance ID. For devices that support more than one mPIPE instance, - * this field indicates the buffer owner. If the INST field does not - * match the mPIPE's instance number when a packet is egressed, buffers - * with HWB set will be returned to the other mPIPE instance. + * Instance ID. For devices that support automatic buffer return between + * mPIPE instances, this field indicates the buffer owner. If the INST + * field does not match the mPIPE's instance number when a packet is + * egressed, buffers with HWB set will be returned to the other mPIPE + * instance. Note that not all devices support multi-mPIPE buffer + * return. The MPIPE_EDMA_INFO.REMOTE_BUFF_RTN_SUPPORT bit indicates + * whether the INST field in the buffer descriptor is populated by iDMA + * hardware. */ - uint_reg_t inst : 1; - /** Reserved. */ - uint_reg_t __reserved_6 : 1; + uint_reg_t inst : 2; /** * Always set to one by hardware in iDMA packet descriptors. For eDMA, * indicates whether the buffer will be released to the buffer stack @@ -491,8 +504,7 @@ typedef union uint_reg_t c : 2; uint_reg_t size : 3; uint_reg_t hwb : 1; - uint_reg_t __reserved_6 : 1; - uint_reg_t inst : 1; + uint_reg_t inst : 2; uint_reg_t __reserved_5 : 3; uint_reg_t stack_idx : 5; uint_reg_t __reserved_4 : 6; diff --git a/arch/tile/include/arch/trio_constants.h b/arch/tile/include/arch/trio_constants.h index 628b045436b8..85647e91a458 100644 --- a/arch/tile/include/arch/trio_constants.h +++ b/arch/tile/include/arch/trio_constants.h @@ -16,21 +16,21 @@ #ifndef __ARCH_TRIO_CONSTANTS_H__ #define __ARCH_TRIO_CONSTANTS_H__ -#define TRIO_NUM_ASIDS 16 +#define TRIO_NUM_ASIDS 32 #define TRIO_NUM_TLBS_PER_ASID 16 #define TRIO_NUM_TPIO_REGIONS 8 #define TRIO_LOG2_NUM_TPIO_REGIONS 3 -#define TRIO_NUM_MAP_MEM_REGIONS 16 -#define TRIO_LOG2_NUM_MAP_MEM_REGIONS 4 +#define TRIO_NUM_MAP_MEM_REGIONS 32 +#define TRIO_LOG2_NUM_MAP_MEM_REGIONS 5 #define TRIO_NUM_MAP_SQ_REGIONS 8 #define TRIO_LOG2_NUM_MAP_SQ_REGIONS 3 #define TRIO_LOG2_NUM_SQ_FIFO_ENTRIES 6 -#define TRIO_NUM_PUSH_DMA_RINGS 32 +#define TRIO_NUM_PUSH_DMA_RINGS 64 -#define TRIO_NUM_PULL_DMA_RINGS 32 +#define TRIO_NUM_PULL_DMA_RINGS 64 #endif /* __ARCH_TRIO_CONSTANTS_H__ */ diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h index 6346888f7bdc..672768008618 100644 --- a/arch/tile/include/asm/page.h +++ b/arch/tile/include/asm/page.h @@ -182,10 +182,9 @@ static inline __attribute_const__ int get_order(unsigned long size) #define PAGE_OFFSET (-(_AC(1, UL) << (MAX_VA_WIDTH - 1))) #define KERNEL_HIGH_VADDR _AC(0xfffffff800000000, UL) /* high 32GB */ -#define FIXADDR_BASE (KERNEL_HIGH_VADDR - 0x400000000) /* 4 GB */ -#define FIXADDR_TOP (KERNEL_HIGH_VADDR - 0x300000000) /* 4 GB */ +#define FIXADDR_BASE (KERNEL_HIGH_VADDR - 0x300000000) /* 4 GB */ +#define FIXADDR_TOP (KERNEL_HIGH_VADDR - 0x200000000) /* 4 GB */ #define _VMALLOC_START FIXADDR_TOP -#define HUGE_VMAP_BASE (KERNEL_HIGH_VADDR - 0x200000000) /* 4 GB */ #define MEM_SV_START (KERNEL_HIGH_VADDR - 0x100000000) /* 256 MB */ #define MEM_MODULE_START (MEM_SV_START + (256*1024*1024)) /* 256 MB */ #define MEM_MODULE_END (MEM_MODULE_START + (256*1024*1024)) diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h index 63142ab3b3dd..d26a42279036 100644 --- a/arch/tile/include/asm/pgtable_32.h +++ b/arch/tile/include/asm/pgtable_32.h @@ -55,17 +55,9 @@ #define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE*LAST_PKMAP) & PGDIR_MASK) #ifdef CONFIG_HIGHMEM -# define __VMAPPING_END (PKMAP_BASE & ~(HPAGE_SIZE-1)) +# define _VMALLOC_END (PKMAP_BASE & ~(HPAGE_SIZE-1)) #else -# define __VMAPPING_END (FIXADDR_START & ~(HPAGE_SIZE-1)) -#endif - -#ifdef CONFIG_HUGEVMAP -#define HUGE_VMAP_END __VMAPPING_END -#define HUGE_VMAP_BASE (HUGE_VMAP_END - CONFIG_NR_HUGE_VMAPS * HPAGE_SIZE) -#define _VMALLOC_END HUGE_VMAP_BASE -#else -#define _VMALLOC_END __VMAPPING_END +# define _VMALLOC_END (FIXADDR_START & ~(HPAGE_SIZE-1)) #endif /* diff --git a/arch/tile/include/asm/pgtable_64.h b/arch/tile/include/asm/pgtable_64.h index 3421177f7370..2c8a9cd102d3 100644 --- a/arch/tile/include/asm/pgtable_64.h +++ b/arch/tile/include/asm/pgtable_64.h @@ -52,12 +52,10 @@ * memory allocation code). The vmalloc code puts in an internal * guard page between each allocation. */ -#define _VMALLOC_END HUGE_VMAP_BASE +#define _VMALLOC_END MEM_SV_START #define VMALLOC_END _VMALLOC_END #define VMALLOC_START _VMALLOC_START -#define HUGE_VMAP_END (HUGE_VMAP_BASE + PGDIR_SIZE) - #ifndef __ASSEMBLY__ /* We have no pud since we are a three-level page table. */ diff --git a/arch/tile/include/gxio/iorpc_mpipe.h b/arch/tile/include/gxio/iorpc_mpipe.h index fdd07f88cfd7..4cda03de734f 100644 --- a/arch/tile/include/gxio/iorpc_mpipe.h +++ b/arch/tile/include/gxio/iorpc_mpipe.h @@ -56,89 +56,89 @@ #define GXIO_MPIPE_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000) #define GXIO_MPIPE_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) -int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t * context, +int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t *context, void *mem_va, size_t mem_size, unsigned int mem_flags, unsigned int stack, unsigned int buffer_size_enum); -int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_mpipe_init_notif_ring_aux(gxio_mpipe_context_t * context, void *mem_va, +int gxio_mpipe_init_notif_ring_aux(gxio_mpipe_context_t *context, void *mem_va, size_t mem_size, unsigned int mem_flags, unsigned int ring); -int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t * context, +int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event, unsigned int ring); -int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t * context, +int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t *context, unsigned int ring); -int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_mpipe_init_notif_group(gxio_mpipe_context_t * context, +int gxio_mpipe_init_notif_group(gxio_mpipe_context_t *context, unsigned int group, gxio_mpipe_notif_group_bits_t bits); -int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t * context, unsigned int count, +int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_mpipe_init_bucket(gxio_mpipe_context_t * context, unsigned int bucket, +int gxio_mpipe_init_bucket(gxio_mpipe_context_t *context, unsigned int bucket, MPIPE_LBL_INIT_DAT_BSTS_TBL_t bucket_info); -int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t * context, +int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t * context, void *mem_va, +int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t *context, void *mem_va, size_t mem_size, unsigned int mem_flags, unsigned int ring, unsigned int channel); -int gxio_mpipe_commit_rules(gxio_mpipe_context_t * context, const void *blob, +int gxio_mpipe_commit_rules(gxio_mpipe_context_t *context, const void *blob, size_t blob_size); -int gxio_mpipe_register_client_memory(gxio_mpipe_context_t * context, +int gxio_mpipe_register_client_memory(gxio_mpipe_context_t *context, unsigned int iotlb, HV_PTE pte, unsigned int flags); -int gxio_mpipe_link_open_aux(gxio_mpipe_context_t * context, +int gxio_mpipe_link_open_aux(gxio_mpipe_context_t *context, _gxio_mpipe_link_name_t name, unsigned int flags); -int gxio_mpipe_link_close_aux(gxio_mpipe_context_t * context, int mac); +int gxio_mpipe_link_close_aux(gxio_mpipe_context_t *context, int mac); -int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t * context, int mac, +int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t *context, int mac, uint32_t attr, int64_t val); -int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t * context, uint64_t * sec, - uint64_t * nsec, uint64_t * cycles); +int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t *context, uint64_t *sec, + uint64_t *nsec, uint64_t *cycles); -int gxio_mpipe_set_timestamp_aux(gxio_mpipe_context_t * context, uint64_t sec, +int gxio_mpipe_set_timestamp_aux(gxio_mpipe_context_t *context, uint64_t sec, uint64_t nsec, uint64_t cycles); -int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t * context, +int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t *context, int64_t nsec); -int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t * context, +int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t *context, int32_t ppb); -int gxio_mpipe_arm_pollfd(gxio_mpipe_context_t * context, int pollfd_cookie); +int gxio_mpipe_arm_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie); -int gxio_mpipe_close_pollfd(gxio_mpipe_context_t * context, int pollfd_cookie); +int gxio_mpipe_close_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie); -int gxio_mpipe_get_mmio_base(gxio_mpipe_context_t * context, HV_PTE *base); +int gxio_mpipe_get_mmio_base(gxio_mpipe_context_t *context, HV_PTE *base); -int gxio_mpipe_check_mmio_offset(gxio_mpipe_context_t * context, +int gxio_mpipe_check_mmio_offset(gxio_mpipe_context_t *context, unsigned long offset, unsigned long size); #endif /* !__GXIO_MPIPE_LINUX_RPC_H__ */ diff --git a/arch/tile/include/gxio/iorpc_mpipe_info.h b/arch/tile/include/gxio/iorpc_mpipe_info.h index 476c5e5ca22c..f0b04284468b 100644 --- a/arch/tile/include/gxio/iorpc_mpipe_info.h +++ b/arch/tile/include/gxio/iorpc_mpipe_info.h @@ -33,18 +33,18 @@ #define GXIO_MPIPE_INFO_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) -int gxio_mpipe_info_instance_aux(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_instance_aux(gxio_mpipe_info_context_t *context, _gxio_mpipe_link_name_t name); -int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t *context, unsigned int idx, - _gxio_mpipe_link_name_t * name, - _gxio_mpipe_link_mac_t * mac); + _gxio_mpipe_link_name_t *name, + _gxio_mpipe_link_mac_t *mac); -int gxio_mpipe_info_get_mmio_base(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_get_mmio_base(gxio_mpipe_info_context_t *context, HV_PTE *base); -int gxio_mpipe_info_check_mmio_offset(gxio_mpipe_info_context_t * context, +int gxio_mpipe_info_check_mmio_offset(gxio_mpipe_info_context_t *context, unsigned long offset, unsigned long size); #endif /* !__GXIO_MPIPE_INFO_LINUX_RPC_H__ */ diff --git a/arch/tile/include/gxio/iorpc_trio.h b/arch/tile/include/gxio/iorpc_trio.h index d95b96fd6c93..376a4f771167 100644 --- a/arch/tile/include/gxio/iorpc_trio.h +++ b/arch/tile/include/gxio/iorpc_trio.h @@ -46,59 +46,59 @@ #define GXIO_TRIO_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000) #define GXIO_TRIO_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) -int gxio_trio_alloc_asids(gxio_trio_context_t * context, unsigned int count, +int gxio_trio_alloc_asids(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_trio_alloc_memory_maps(gxio_trio_context_t * context, +int gxio_trio_alloc_memory_maps(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_trio_alloc_scatter_queues(gxio_trio_context_t * context, +int gxio_trio_alloc_scatter_queues(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_trio_alloc_pio_regions(gxio_trio_context_t * context, +int gxio_trio_alloc_pio_regions(gxio_trio_context_t *context, unsigned int count, unsigned int first, unsigned int flags); -int gxio_trio_init_pio_region_aux(gxio_trio_context_t * context, +int gxio_trio_init_pio_region_aux(gxio_trio_context_t *context, unsigned int pio_region, unsigned int mac, uint32_t bus_address_hi, unsigned int flags); -int gxio_trio_init_memory_map_mmu_aux(gxio_trio_context_t * context, +int gxio_trio_init_memory_map_mmu_aux(gxio_trio_context_t *context, unsigned int map, unsigned long va, uint64_t size, unsigned int asid, unsigned int mac, uint64_t bus_address, unsigned int node, unsigned int order_mode); -int gxio_trio_get_port_property(gxio_trio_context_t * context, +int gxio_trio_get_port_property(gxio_trio_context_t *context, struct pcie_trio_ports_property *trio_ports); -int gxio_trio_config_legacy_intr(gxio_trio_context_t * context, int inter_x, +int gxio_trio_config_legacy_intr(gxio_trio_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event, unsigned int mac, unsigned int intx); -int gxio_trio_config_msi_intr(gxio_trio_context_t * context, int inter_x, +int gxio_trio_config_msi_intr(gxio_trio_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event, unsigned int mac, unsigned int mem_map, uint64_t mem_map_base, uint64_t mem_map_limit, unsigned int asid); -int gxio_trio_set_mps_mrs(gxio_trio_context_t * context, uint16_t mps, +int gxio_trio_set_mps_mrs(gxio_trio_context_t *context, uint16_t mps, uint16_t mrs, unsigned int mac); -int gxio_trio_force_rc_link_up(gxio_trio_context_t * context, unsigned int mac); +int gxio_trio_force_rc_link_up(gxio_trio_context_t *context, unsigned int mac); -int gxio_trio_force_ep_link_up(gxio_trio_context_t * context, unsigned int mac); +int gxio_trio_force_ep_link_up(gxio_trio_context_t *context, unsigned int mac); -int gxio_trio_get_mmio_base(gxio_trio_context_t * context, HV_PTE *base); +int gxio_trio_get_mmio_base(gxio_trio_context_t *context, HV_PTE *base); -int gxio_trio_check_mmio_offset(gxio_trio_context_t * context, +int gxio_trio_check_mmio_offset(gxio_trio_context_t *context, unsigned long offset, unsigned long size); #endif /* !__GXIO_TRIO_LINUX_RPC_H__ */ diff --git a/arch/tile/include/gxio/iorpc_usb_host.h b/arch/tile/include/gxio/iorpc_usb_host.h index 8622e7d126ad..79962a97de8e 100644 --- a/arch/tile/include/gxio/iorpc_usb_host.h +++ b/arch/tile/include/gxio/iorpc_usb_host.h @@ -31,16 +31,16 @@ #define GXIO_USB_HOST_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000) #define GXIO_USB_HOST_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) -int gxio_usb_host_cfg_interrupt(gxio_usb_host_context_t * context, int inter_x, +int gxio_usb_host_cfg_interrupt(gxio_usb_host_context_t *context, int inter_x, int inter_y, int inter_ipi, int inter_event); -int gxio_usb_host_register_client_memory(gxio_usb_host_context_t * context, +int gxio_usb_host_register_client_memory(gxio_usb_host_context_t *context, HV_PTE pte, unsigned int flags); -int gxio_usb_host_get_mmio_base(gxio_usb_host_context_t * context, +int gxio_usb_host_get_mmio_base(gxio_usb_host_context_t *context, HV_PTE *base); -int gxio_usb_host_check_mmio_offset(gxio_usb_host_context_t * context, +int gxio_usb_host_check_mmio_offset(gxio_usb_host_context_t *context, unsigned long offset, unsigned long size); #endif /* !__GXIO_USB_HOST_LINUX_RPC_H__ */ diff --git a/arch/tile/include/gxio/usb_host.h b/arch/tile/include/gxio/usb_host.h index 5eedec0e988e..93c9636d2dd7 100644 --- a/arch/tile/include/gxio/usb_host.h +++ b/arch/tile/include/gxio/usb_host.h @@ -53,7 +53,7 @@ typedef struct { * @return Zero if the context was successfully initialized, else a * GXIO_ERR_xxx error code. */ -extern int gxio_usb_host_init(gxio_usb_host_context_t * context, int usb_index, +extern int gxio_usb_host_init(gxio_usb_host_context_t *context, int usb_index, int is_ehci); /* Destroy a USB context. @@ -68,20 +68,20 @@ extern int gxio_usb_host_init(gxio_usb_host_context_t * context, int usb_index, * @return Zero if the context was successfully destroyed, else a * GXIO_ERR_xxx error code. */ -extern int gxio_usb_host_destroy(gxio_usb_host_context_t * context); +extern int gxio_usb_host_destroy(gxio_usb_host_context_t *context); /* Retrieve the address of the shim's MMIO registers. * * @param context Pointer to a properly initialized gxio_usb_host_context_t. * @return The address of the shim's MMIO registers. */ -extern void *gxio_usb_host_get_reg_start(gxio_usb_host_context_t * context); +extern void *gxio_usb_host_get_reg_start(gxio_usb_host_context_t *context); /* Retrieve the length of the shim's MMIO registers. * * @param context Pointer to a properly initialized gxio_usb_host_context_t. * @return The length of the shim's MMIO registers. */ -extern size_t gxio_usb_host_get_reg_len(gxio_usb_host_context_t * context); +extern size_t gxio_usb_host_get_reg_len(gxio_usb_host_context_t *context); #endif /* _GXIO_USB_H_ */ diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c index ed378416b86a..49120843ff96 100644 --- a/arch/tile/kernel/compat.c +++ b/arch/tile/kernel/compat.c @@ -84,7 +84,7 @@ COMPAT_SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned int, offset_high, { return sys_llseek(fd, offset_high, offset_low, result, origin); } - + /* Provide the compat syscall number to call mapping. */ #undef __SYSCALL #define __SYSCALL(nr, call) [nr] = (call), diff --git a/arch/tile/kernel/futex_64.S b/arch/tile/kernel/futex_64.S deleted file mode 100644 index f465d1eda20f..000000000000 --- a/arch/tile/kernel/futex_64.S +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright 2011 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - * - * Atomically access user memory, but use MMU to avoid propagating - * kernel exceptions. - */ - -#include <linux/linkage.h> -#include <asm/errno.h> -#include <asm/futex.h> -#include <asm/page.h> -#include <asm/processor.h> - -/* - * Provide a set of atomic memory operations supporting <asm/futex.h>. - * - * r0: user address to manipulate - * r1: new value to write, or for cmpxchg, old value to compare against - * r2: (cmpxchg only) new value to write - * - * Return __get_user struct, r0 with value, r1 with error. - */ -#define FUTEX_OP(name, ...) \ -STD_ENTRY(futex_##name) \ - __VA_ARGS__; \ - { \ - move r1, zero; \ - jrp lr \ - }; \ - STD_ENDPROC(futex_##name); \ - .pushsection __ex_table,"a"; \ - .quad 1b, get_user_fault; \ - .popsection - - .pushsection .fixup,"ax" -get_user_fault: - { movei r1, -EFAULT; jrp lr } - ENDPROC(get_user_fault) - .popsection - -FUTEX_OP(cmpxchg, mtspr CMPEXCH_VALUE, r1; 1: cmpexch4 r0, r0, r2) -FUTEX_OP(set, 1: exch4 r0, r0, r1) -FUTEX_OP(add, 1: fetchadd4 r0, r0, r1) -FUTEX_OP(or, 1: fetchor4 r0, r0, r1) -FUTEX_OP(andn, nor r1, r1, zero; 1: fetchand4 r0, r0, r1) diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 4c34caea9dd3..74c91729a62a 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -1268,8 +1268,7 @@ static void __init validate_va(void) if ((long)VMALLOC_START >= 0) early_panic( "Linux VMALLOC region below the 2GB line (%#lx)!\n" - "Reconfigure the kernel with fewer NR_HUGE_VMAPS\n" - "or smaller VMALLOC_RESERVE.\n", + "Reconfigure the kernel with smaller VMALLOC_RESERVE.\n", VMALLOC_START); #endif } diff --git a/arch/tile/kernel/unaligned.c b/arch/tile/kernel/unaligned.c index b425fb6a480d..b030b4e78845 100644 --- a/arch/tile/kernel/unaligned.c +++ b/arch/tile/kernel/unaligned.c @@ -551,8 +551,8 @@ static tilegx_bundle_bits jit_x1_bnezt(int ra, int broff) /* * This function generates unalign fixup JIT. * - * We fist find unalign load/store instruction's destination, source - * reguisters: ra, rb and rd. and 3 scratch registers by calling + * We first find unalign load/store instruction's destination, source + * registers: ra, rb and rd. and 3 scratch registers by calling * find_regs(...). 3 scratch clobbers should not alias with any register * used in the fault bundle. Then analyze the fault bundle to determine * if it's a load or store, operand width, branch or address increment etc. diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index 111d5a9b76f1..6c0571216a9d 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -149,8 +149,6 @@ static inline int vmalloc_fault(pgd_t *pgd, unsigned long address) pmd_k = vmalloc_sync_one(pgd, address); if (!pmd_k) return -1; - if (pmd_huge(*pmd_k)) - return 0; /* support TILE huge_vmap() API */ pte_k = pte_offset_kernel(pmd_k, address); if (!pte_present(*pte_k)) return -1; @@ -280,8 +278,7 @@ static int handle_page_fault(struct pt_regs *regs, if (!is_page_fault) write = 1; - flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (write ? FAULT_FLAG_WRITE : 0)); + flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; is_kernel_mode = !user_mode(regs); @@ -365,6 +362,9 @@ static int handle_page_fault(struct pt_regs *regs, goto bad_area_nosemaphore; } + if (!is_kernel_mode) + flags |= FAULT_FLAG_USER; + /* * When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the @@ -425,12 +425,12 @@ good_area: #endif if (!(vma->vm_flags & VM_WRITE)) goto bad_area; + flags |= FAULT_FLAG_WRITE; } else { if (!is_page_fault || !(vma->vm_flags & VM_READ)) goto bad_area; } - survive: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo @@ -555,11 +555,6 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (is_global_init(tsk)) { - yield(); - down_read(&mm->mmap_sem); - goto survive; - } if (is_kernel_mode) goto no_context; pagefault_out_of_memory(); diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index 4e316deb92fd..0fa1acfac79a 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -828,10 +828,6 @@ void __init mem_init(void) printk(KERN_DEBUG " PKMAP %#lx - %#lx\n", PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP) - 1); #endif -#ifdef CONFIG_HUGEVMAP - printk(KERN_DEBUG " HUGEMAP %#lx - %#lx\n", - HUGE_VMAP_BASE, HUGE_VMAP_END - 1); -#endif printk(KERN_DEBUG " VMALLOC %#lx - %#lx\n", _VMALLOC_START, _VMALLOC_END - 1); #ifdef __tilegx__ diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 2deaddf3e01f..4fd9ec0b58ed 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c @@ -127,8 +127,7 @@ void shatter_huge_page(unsigned long addr) } /* Shatter the huge page into the preallocated L2 page table. */ - pmd_populate_kernel(&init_mm, pmd, - get_prealloc_pte(pte_pfn(*(pte_t *)pmd))); + pmd_populate_kernel(&init_mm, pmd, get_prealloc_pte(pmd_pfn(*pmd))); #ifdef __PAGETABLE_PMD_FOLDED /* Walk every pgd on the system and update the pmd there. */ diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common index bceee6623b00..8ddea1f8006a 100644 --- a/arch/um/Kconfig.common +++ b/arch/um/Kconfig.common @@ -6,7 +6,6 @@ config DEFCONFIG_LIST config UML bool default y - select HAVE_GENERIC_HARDIRQS select HAVE_UID16 select GENERIC_IRQ_SHOW select GENERIC_CPU_DEVICES diff --git a/arch/um/defconfig b/arch/um/defconfig index 08107a795062..2665e6b683f5 100644 --- a/arch/um/defconfig +++ b/arch/um/defconfig @@ -129,12 +129,10 @@ CONFIG_BSD_PROCESS_ACCT=y # CONFIG_FHANDLE is not set # CONFIG_TASKSTATS is not set # CONFIG_AUDIT is not set -CONFIG_HAVE_GENERIC_HARDIRQS=y # # IRQ subsystem # -CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_IRQ_SHOW=y # diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 089f3987e273..5c3aef74237f 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -30,8 +30,7 @@ int handle_page_fault(unsigned long address, unsigned long ip, pmd_t *pmd; pte_t *pte; int err = -EFAULT; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (is_write ? FAULT_FLAG_WRITE : 0); + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; *code_out = SEGV_MAPERR; @@ -42,6 +41,8 @@ int handle_page_fault(unsigned long address, unsigned long ip, if (in_atomic()) goto out_nosemaphore; + if (is_user) + flags |= FAULT_FLAG_USER; retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); @@ -58,12 +59,15 @@ retry: good_area: *code_out = SEGV_ACCERR; - if (is_write && !(vma->vm_flags & VM_WRITE)) - goto out; - - /* Don't require VM_READ|VM_EXEC for write faults! */ - if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC))) - goto out; + if (is_write) { + if (!(vma->vm_flags & VM_WRITE)) + goto out; + flags |= FAULT_FLAG_WRITE; + } else { + /* Don't require VM_READ|VM_EXEC for write faults! */ + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto out; + } do { int fault; @@ -124,6 +128,8 @@ out_of_memory: * (which will retry the fault, or kill us if we got oom-killed). */ up_read(&mm->mmap_sem); + if (!is_user) + goto out_nosemaphore; pagefault_out_of_memory(); return 0; } diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig index 41bcc0013442..82cdd8906f3d 100644 --- a/arch/unicore32/Kconfig +++ b/arch/unicore32/Kconfig @@ -2,7 +2,6 @@ config UNICORE32 def_bool y select HAVE_MEMBLOCK select HAVE_GENERIC_DMA_COHERENT - select HAVE_GENERIC_HARDIRQS select HAVE_DMA_ATTRS select HAVE_KERNEL_GZIP select HAVE_KERNEL_BZIP2 diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c index f9b5c10bccee..0dc922dba915 100644 --- a/arch/unicore32/mm/fault.c +++ b/arch/unicore32/mm/fault.c @@ -209,8 +209,7 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs) struct task_struct *tsk; struct mm_struct *mm; int fault, sig, code; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - ((!(fsr ^ 0x12)) ? FAULT_FLAG_WRITE : 0); + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; tsk = current; mm = tsk->mm; @@ -222,6 +221,11 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (in_atomic() || !mm) goto no_context; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + if (!(fsr ^ 0x12)) + flags |= FAULT_FLAG_WRITE; + /* * As per x86, we may deadlock here. However, since the kernel only * validly references user space from well defined areas of the code, @@ -278,6 +282,13 @@ retry: (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS)))) return 0; + /* + * If we are in kernel mode at this point, we + * have no context to handle this fault with. + */ + if (!user_mode(regs)) + goto no_context; + if (fault & VM_FAULT_OOM) { /* * We ran out of memory, call the OOM killer, and return to @@ -288,13 +299,6 @@ retry: return 0; } - /* - * If we are in kernel mode at this point, we - * have no context to handle this fault with. - */ - if (!user_mode(regs)) - goto no_context; - if (fault & VM_FAULT_SIGBUS) { /* * We had some memory, but were unable to diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 30c40f08a3d4..ee2fb9d37745 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -82,7 +82,6 @@ config X86 select HAVE_USER_RETURN_NOTIFIER select ARCH_BINFMT_ELF_RANDOMIZE_PIE select HAVE_ARCH_JUMP_LABEL - select HAVE_GENERIC_HARDIRQS select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select SPARSE_IRQ select GENERIC_FIND_FIRST_BIT @@ -482,11 +481,12 @@ config X86_INTEL_LPSS bool "Intel Low Power Subsystem Support" depends on ACPI select COMMON_CLK + select PINCTRL ---help--- Select to build support for Intel Low Power Subsystem such as found on Intel Lynxpoint PCH. Selecting this option enables - things like clock tree (common clock framework) which are needed - by the LPSS peripheral drivers. + things like clock tree (common clock framework) and pincontrol + which are needed by the LPSS peripheral drivers. config X86_RDC321X bool "RDC R-321x SoC" diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 0abf6742a8b0..9db76c31b3c3 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -124,6 +124,7 @@ static struct event_constraint intel_ivb_event_constraints[] __read_mostly = INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */ INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMTPY */ INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */ + INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_LDM_PENDING */ INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */ INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */ INTEL_UEVENT_CONSTRAINT(0x06a3, 0xf), /* CYCLE_ACTIVITY.STALLS_LDM_PENDING */ @@ -898,8 +899,8 @@ static __initconst const u64 atom_hw_cache_event_ids static struct extra_reg intel_slm_extra_regs[] __read_mostly = { /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ - INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffff, RSP_0), - INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x768005ffff, RSP_1), + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x768005ffffull, RSP_1), EVENT_EXTRA_END }; diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 63438aad177f..ab3ba1c1b7dd 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -584,6 +584,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = { INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ EVENT_CONSTRAINT_END }; diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index fd8011ed4dcd..8ed44589b0e4 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -2808,7 +2808,7 @@ uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *eve return c; } - if (event->hw.config == ~0ULL) + if (event->attr.config == UNCORE_FIXED_EVENT) return &constraint_fixed; if (type->constraints) { @@ -3112,7 +3112,9 @@ static int uncore_pmu_event_init(struct perf_event *event) */ if (pmu->type->single_fixed && pmu->pmu_idx > 0) return -EINVAL; - hwc->config = ~0ULL; + + /* fixed counters have event field hardcoded to zero */ + hwc->config = 0ULL; } else { hwc->config = event->attr.config & pmu->type->event_mask; if (pmu->type->ops->hw_config) { diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 1b69951a81e2..b077f4cc225a 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -487,21 +487,6 @@ ENDPROC(native_usergs_sysret64) TRACE_IRQS_OFF .endm -ENTRY(save_rest) - PARTIAL_FRAME 1 (REST_SKIP+8) - movq 5*8+16(%rsp), %r11 /* save return address */ - movq_cfi rbx, RBX+16 - movq_cfi rbp, RBP+16 - movq_cfi r12, R12+16 - movq_cfi r13, R13+16 - movq_cfi r14, R14+16 - movq_cfi r15, R15+16 - movq %r11, 8(%rsp) /* return address */ - FIXUP_TOP_OF_STACK %r11, 16 - ret - CFI_ENDPROC -END(save_rest) - /* save complete stack frame */ .pushsection .kprobes.text, "ax" ENTRY(save_paranoid) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index aecc98a93d1b..6cacab671f9b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -653,6 +653,7 @@ static void announce_cpu(int cpu, int apicid) { static int current_node = -1; int node = early_cpu_to_node(cpu); + int max_cpu_present = find_last_bit(cpumask_bits(cpu_present_mask), NR_CPUS); if (system_state == SYSTEM_BOOTING) { if (node != current_node) { @@ -661,7 +662,7 @@ static void announce_cpu(int cpu, int apicid) current_node = node; pr_info("Booting Node %3d, Processors ", node); } - pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " OK\n" : ""); + pr_cont(" #%4d%s", cpu, cpu == max_cpu_present ? " OK\n" : ""); return; } else pr_info("Booting Node %d Processor %d APIC 0x%x\n", diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 2bc1e81045b0..ddc3f3d2afdb 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2025,6 +2025,17 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) return rc; } +static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt) +{ + int rc; + + rc = em_ret_far(ctxt); + if (rc != X86EMUL_CONTINUE) + return rc; + rsp_increment(ctxt, ctxt->src.val); + return X86EMUL_CONTINUE; +} + static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) { /* Save real source value, then compare EAX against destination. */ @@ -3763,7 +3774,8 @@ static const struct opcode opcode_table[256] = { G(ByteOp, group11), G(0, group11), /* 0xC8 - 0xCF */ I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave), - N, I(ImplicitOps | Stack, em_ret_far), + I(ImplicitOps | Stack | SrcImmU16, em_ret_far_imm), + I(ImplicitOps | Stack, em_ret_far), D(ImplicitOps), DI(SrcImmByte, intn), D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret), /* 0xD0 - 0xD7 */ diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 6e2d2c8f230b..dce0df8150df 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4421,13 +4421,12 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm) } } -static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) +static unsigned long +mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { struct kvm *kvm; int nr_to_scan = sc->nr_to_scan; - - if (nr_to_scan == 0) - goto out; + unsigned long freed = 0; raw_spin_lock(&kvm_lock); @@ -4462,25 +4461,37 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) goto unlock; } - prepare_zap_oldest_mmu_page(kvm, &invalid_list); + if (prepare_zap_oldest_mmu_page(kvm, &invalid_list)) + freed++; kvm_mmu_commit_zap_page(kvm, &invalid_list); unlock: spin_unlock(&kvm->mmu_lock); srcu_read_unlock(&kvm->srcu, idx); + /* + * unfair on small ones + * per-vm shrinkers cry out + * sadness comes quickly + */ list_move_tail(&kvm->vm_list, &vm_list); break; } raw_spin_unlock(&kvm_lock); + return freed; -out: +} + +static unsigned long +mmu_shrink_count(struct shrinker *shrink, struct shrink_control *sc) +{ return percpu_counter_read_positive(&kvm_total_used_mmu_pages); } static struct shrinker mmu_shrinker = { - .shrink = mmu_shrink, + .count_objects = mmu_shrink_count, + .scan_objects = mmu_shrink_scan, .seeks = DEFAULT_SEEKS * 10, }; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 043330159179..ad75d77999d0 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -99,6 +99,7 @@ struct guest_walker { pt_element_t prefetch_ptes[PTE_PREFETCH_NUM]; gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS]; + bool pte_writable[PT_MAX_FULL_LEVELS]; unsigned pt_access; unsigned pte_access; gfn_t gfn; @@ -235,6 +236,22 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, if (pte == orig_pte) continue; + /* + * If the slot is read-only, simply do not process the accessed + * and dirty bits. This is the correct thing to do if the slot + * is ROM, and page tables in read-as-ROM/write-as-MMIO slots + * are only supported if the accessed and dirty bits are already + * set in the ROM (so that MMIO writes are never needed). + * + * Note that NPT does not allow this at all and faults, since + * it always wants nested page table entries for the guest + * page tables to be writable. And EPT works but will simply + * overwrite the read-only memory to set the accessed and dirty + * bits. + */ + if (unlikely(!walker->pte_writable[level - 1])) + continue; + ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte); if (ret) return ret; @@ -309,7 +326,8 @@ retry_walk: goto error; real_gfn = gpa_to_gfn(real_gfn); - host_addr = gfn_to_hva(vcpu->kvm, real_gfn); + host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn, + &walker->pte_writable[walker->level - 1]); if (unlikely(kvm_is_error_hva(host_addr))) goto error; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1f1da43ff2a2..a1216de9ffda 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5339,6 +5339,15 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) return 0; } + /* + * EPT violation happened while executing iret from NMI, + * "blocked by NMI" bit has to be set before next VM entry. + * There are errata that may cause this bit to not be set: + * AAK134, BY25. + */ + if (exit_qualification & INTR_INFO_UNBLOCK_NMI) + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); + gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); trace_kvm_page_fault(gpa, exit_qualification); @@ -7766,6 +7775,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1); vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2); vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3); + __clear_bit(VCPU_EXREG_PDPTR, + (unsigned long *)&vcpu->arch.regs_avail); + __clear_bit(VCPU_EXREG_PDPTR, + (unsigned long *)&vcpu->arch.regs_dirty); } kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 654be4ae3047..3aaeffcfd67a 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -842,23 +842,15 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, force_sig_info_fault(SIGBUS, code, address, tsk, fault); } -static noinline int +static noinline void mm_fault_error(struct pt_regs *regs, unsigned long error_code, unsigned long address, unsigned int fault) { - /* - * Pagefault was interrupted by SIGKILL. We have no reason to - * continue pagefault. - */ - if (fatal_signal_pending(current)) { - if (!(fault & VM_FAULT_RETRY)) - up_read(¤t->mm->mmap_sem); - if (!(error_code & PF_USER)) - no_context(regs, error_code, address, 0, 0); - return 1; + if (fatal_signal_pending(current) && !(error_code & PF_USER)) { + up_read(¤t->mm->mmap_sem); + no_context(regs, error_code, address, 0, 0); + return; } - if (!(fault & VM_FAULT_ERROR)) - return 0; if (fault & VM_FAULT_OOM) { /* Kernel mode? Handle exceptions or die: */ @@ -866,7 +858,7 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, up_read(¤t->mm->mmap_sem); no_context(regs, error_code, address, SIGSEGV, SEGV_MAPERR); - return 1; + return; } up_read(¤t->mm->mmap_sem); @@ -884,7 +876,6 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, else BUG(); } - return 1; } static int spurious_fault_check(unsigned long error_code, pte_t *pte) @@ -1011,9 +1002,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) unsigned long address; struct mm_struct *mm; int fault; - int write = error_code & PF_WRITE; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (write ? FAULT_FLAG_WRITE : 0); + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; tsk = current; mm = tsk->mm; @@ -1083,6 +1072,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) if (user_mode_vm(regs)) { local_irq_enable(); error_code |= PF_USER; + flags |= FAULT_FLAG_USER; } else { if (regs->flags & X86_EFLAGS_IF) local_irq_enable(); @@ -1109,6 +1099,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) return; } + if (error_code & PF_WRITE) + flags |= FAULT_FLAG_WRITE; + /* * When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in @@ -1187,9 +1180,17 @@ good_area: */ fault = handle_mm_fault(mm, vma, address, flags); - if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) { - if (mm_fault_error(regs, error_code, address, fault)) - return; + /* + * If we need to retry but a fatal signal is pending, handle the + * signal first. We do not need to release the mmap_sem because it + * would already be released in __lock_page_or_retry in mm/filemap.c. + */ + if (unlikely((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))) + return; + + if (unlikely(fault & VM_FAULT_ERROR)) { + mm_fault_error(regs, error_code, address, fault); + return; } /* diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 7ea6451a3a33..8d24dcb7cdac 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -7,7 +7,6 @@ config XTENSA select HAVE_IDE select GENERIC_ATOMIC64 select GENERIC_CLOCKEVENTS - select HAVE_GENERIC_HARDIRQS select VIRT_TO_BUS select GENERIC_IRQ_SHOW select GENERIC_CPU_DEVICES diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile index 136224b74d4f..81250ece3062 100644 --- a/arch/xtensa/Makefile +++ b/arch/xtensa/Makefile @@ -55,10 +55,10 @@ ifneq ($(CONFIG_LD_NO_RELAX),) LDFLAGS := --no-relax endif -ifeq ($(shell echo -e __XTENSA_EB__ | $(CC) -E - | grep -v "\#"),1) +ifeq ($(shell echo __XTENSA_EB__ | $(CC) -E - | grep -v "\#"),1) CHECKFLAGS += -D__XTENSA_EB__ endif -ifeq ($(shell echo -e __XTENSA_EL__ | $(CC) -E - | grep -v "\#"),1) +ifeq ($(shell echo __XTENSA_EL__ | $(CC) -E - | grep -v "\#"),1) CHECKFLAGS += -D__XTENSA_EL__ endif diff --git a/arch/xtensa/boot/Makefile b/arch/xtensa/boot/Makefile index 64ffc4b53df6..ca20a892021b 100644 --- a/arch/xtensa/boot/Makefile +++ b/arch/xtensa/boot/Makefile @@ -12,7 +12,7 @@ KBUILD_CFLAGS += -fno-builtin -Iarch/$(ARCH)/boot/include HOSTFLAGS += -Iarch/$(ARCH)/boot/include -BIG_ENDIAN := $(shell echo -e __XTENSA_EB__ | $(CC) -E - | grep -v "\#") +BIG_ENDIAN := $(shell echo __XTENSA_EB__ | $(CC) -E - | grep -v "\#") export ccflags-y export BIG_ENDIAN diff --git a/arch/xtensa/configs/common_defconfig b/arch/xtensa/configs/common_defconfig index a182a4e6d688..f6000fe05119 100644 --- a/arch/xtensa/configs/common_defconfig +++ b/arch/xtensa/configs/common_defconfig @@ -8,7 +8,6 @@ CONFIG_XTENSA=y # CONFIG_UID16 is not set CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_HAVE_DEC_LOCK=y -CONFIG_GENERIC_HARDIRQS=y # # Code maturity level options diff --git a/arch/xtensa/configs/iss_defconfig b/arch/xtensa/configs/iss_defconfig index 77c52f80187a..4f233204faf9 100644 --- a/arch/xtensa/configs/iss_defconfig +++ b/arch/xtensa/configs/iss_defconfig @@ -9,7 +9,6 @@ CONFIG_XTENSA=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_FIND_NEXT_BIT=y CONFIG_GENERIC_HWEIGHT=y -CONFIG_GENERIC_HARDIRQS=y # CONFIG_ARCH_HAS_ILOG2_U32 is not set # CONFIG_ARCH_HAS_ILOG2_U64 is not set CONFIG_NO_IOPORT=y diff --git a/arch/xtensa/configs/s6105_defconfig b/arch/xtensa/configs/s6105_defconfig index 4799c6a526b5..d929f77a0360 100644 --- a/arch/xtensa/configs/s6105_defconfig +++ b/arch/xtensa/configs/s6105_defconfig @@ -9,7 +9,6 @@ CONFIG_XTENSA=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_FIND_NEXT_BIT=y CONFIG_GENERIC_HWEIGHT=y -CONFIG_GENERIC_HARDIRQS=y # CONFIG_ARCH_HAS_ILOG2_U32 is not set # CONFIG_ARCH_HAS_ILOG2_U64 is not set CONFIG_NO_IOPORT=y diff --git a/arch/xtensa/include/asm/regs.h b/arch/xtensa/include/asm/regs.h index b24de6717020..4ba9f516b0e2 100644 --- a/arch/xtensa/include/asm/regs.h +++ b/arch/xtensa/include/asm/regs.h @@ -82,6 +82,7 @@ #define PS_CALLINC_SHIFT 16 #define PS_CALLINC_MASK 0x00030000 #define PS_OWB_SHIFT 8 +#define PS_OWB_WIDTH 4 #define PS_OWB_MASK 0x00000F00 #define PS_RING_SHIFT 6 #define PS_RING_MASK 0x000000C0 diff --git a/arch/xtensa/include/asm/timex.h b/arch/xtensa/include/asm/timex.h index 69f901713fb6..27fa3c170662 100644 --- a/arch/xtensa/include/asm/timex.h +++ b/arch/xtensa/include/asm/timex.h @@ -35,13 +35,7 @@ # error "Bad timer number for Linux configurations!" #endif -#ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT extern unsigned long ccount_freq; -#define CCOUNT_PER_JIFFY (ccount_freq / HZ) -#else -#define CCOUNT_PER_JIFFY (CONFIG_XTENSA_CPU_CLOCK*(1000000UL/HZ)) -#endif - typedef unsigned long long cycles_t; diff --git a/arch/xtensa/kernel/align.S b/arch/xtensa/kernel/align.S index aa2e87b8566a..d4cef6039a5c 100644 --- a/arch/xtensa/kernel/align.S +++ b/arch/xtensa/kernel/align.S @@ -146,9 +146,9 @@ * a0: trashed, original value saved on stack (PT_AREG0) * a1: a1 * a2: new stack pointer, original in DEPC - * a3: dispatch table + * a3: a3 * depc: a2, original value saved on stack (PT_DEPC) - * excsave_1: a3 + * excsave_1: dispatch table * * PT_DEPC >= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception, DEPC * < VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception @@ -171,7 +171,6 @@ ENTRY(fast_unaligned) s32i a8, a2, PT_AREG8 rsr a0, depc - xsr a3, excsave1 s32i a0, a2, PT_AREG2 s32i a3, a2, PT_AREG3 diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S index 647657484866..a482df5df2b2 100644 --- a/arch/xtensa/kernel/coprocessor.S +++ b/arch/xtensa/kernel/coprocessor.S @@ -32,9 +32,9 @@ * a0: trashed, original value saved on stack (PT_AREG0) * a1: a1 * a2: new stack pointer, original in DEPC - * a3: dispatch table + * a3: a3 * depc: a2, original value saved on stack (PT_DEPC) - * excsave_1: a3 + * excsave_1: dispatch table * * PT_DEPC >= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception, DEPC * < VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception @@ -225,9 +225,9 @@ ENDPROC(coprocessor_restore) * a0: trashed, original value saved on stack (PT_AREG0) * a1: a1 * a2: new stack pointer, original in DEPC - * a3: dispatch table + * a3: a3 * depc: a2, original value saved on stack (PT_DEPC) - * excsave_1: a3 + * excsave_1: dispatch table * * PT_DEPC >= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception, DEPC * < VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception @@ -245,7 +245,6 @@ ENTRY(fast_coprocessor) /* Save remaining registers a1-a3 and SAR */ - xsr a3, excsave1 s32i a3, a2, PT_AREG3 rsr a3, sar s32i a1, a2, PT_AREG1 diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index 9298742f0fd0..de1dfa18d0a1 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -31,8 +31,6 @@ /* Unimplemented features. */ #undef KERNEL_STACK_OVERFLOW_CHECK -#undef PREEMPTIBLE_KERNEL -#undef ALLOCA_EXCEPTION_IN_IRAM /* Not well tested. * @@ -92,9 +90,9 @@ * a0: trashed, original value saved on stack (PT_AREG0) * a1: a1 * a2: new stack pointer, original value in depc - * a3: dispatch table + * a3: a3 * depc: a2, original value saved on stack (PT_DEPC) - * excsave1: a3 + * excsave1: dispatch table * * PT_DEPC >= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception, DEPC * < VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception @@ -110,9 +108,8 @@ ENTRY(user_exception) - /* Save a2, a3, and depc, restore excsave_1 and set SP. */ + /* Save a1, a2, a3, and set SP. */ - xsr a3, excsave1 rsr a0, depc s32i a1, a2, PT_AREG1 s32i a0, a2, PT_AREG2 @@ -238,9 +235,9 @@ ENDPROC(user_exception) * a0: trashed, original value saved on stack (PT_AREG0) * a1: a1 * a2: new stack pointer, original in DEPC - * a3: dispatch table + * a3: a3 * depc: a2, original value saved on stack (PT_DEPC) - * excsave_1: a3 + * excsave_1: dispatch table * * PT_DEPC >= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception, DEPC * < VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception @@ -256,9 +253,8 @@ ENDPROC(user_exception) ENTRY(kernel_exception) - /* Save a0, a2, a3, DEPC and set SP. */ + /* Save a1, a2, a3, and set SP. */ - xsr a3, excsave1 # restore a3, excsave_1 rsr a0, depc # get a2 s32i a1, a2, PT_AREG1 s32i a0, a2, PT_AREG2 @@ -409,7 +405,7 @@ common_exception: * exception handler and call the exception handler. */ - movi a4, exc_table + rsr a4, excsave1 mov a6, a1 # pass stack frame mov a7, a0 # pass EXCCAUSE addx4 a4, a0, a4 @@ -423,28 +419,15 @@ common_exception: .global common_exception_return common_exception_return: -#ifdef CONFIG_TRACE_IRQFLAGS - l32i a4, a1, PT_DEPC - /* Double exception means we came here with an exception - * while PS.EXCM was set, i.e. interrupts disabled. - */ - bgeui a4, VALID_DOUBLE_EXCEPTION_ADDRESS, 1f - l32i a4, a1, PT_EXCCAUSE - bnei a4, EXCCAUSE_LEVEL1_INTERRUPT, 1f - /* We came here with an interrupt means interrupts were enabled - * and we'll reenable them on return. - */ - movi a4, trace_hardirqs_on - callx4 a4 1: -#endif + rsil a2, LOCKLEVEL /* Jump if we are returning from kernel exceptions. */ -1: l32i a3, a1, PT_PS - _bbci.l a3, PS_UM_BIT, 4f - - rsil a2, 0 + l32i a3, a1, PT_PS + GET_THREAD_INFO(a2, a1) + l32i a4, a2, TI_FLAGS + _bbci.l a3, PS_UM_BIT, 6f /* Specific to a user exception exit: * We need to check some flags for signal handling and rescheduling, @@ -453,9 +436,6 @@ common_exception_return: * Note that we don't disable interrupts here. */ - GET_THREAD_INFO(a2,a1) - l32i a4, a2, TI_FLAGS - _bbsi.l a4, TIF_NEED_RESCHED, 3f _bbsi.l a4, TIF_NOTIFY_RESUME, 2f _bbci.l a4, TIF_SIGPENDING, 5f @@ -465,6 +445,7 @@ common_exception_return: /* Call do_signal() */ + rsil a2, 0 movi a4, do_notify_resume # int do_notify_resume(struct pt_regs*) mov a6, a1 callx4 a4 @@ -472,10 +453,24 @@ common_exception_return: 3: /* Reschedule */ + rsil a2, 0 movi a4, schedule # void schedule (void) callx4 a4 j 1b +#ifdef CONFIG_PREEMPT +6: + _bbci.l a4, TIF_NEED_RESCHED, 4f + + /* Check current_thread_info->preempt_count */ + + l32i a4, a2, TI_PRE_COUNT + bnez a4, 4f + movi a4, preempt_schedule_irq + callx4 a4 + j 1b +#endif + 5: #ifdef CONFIG_DEBUG_TLB_SANITY l32i a4, a1, PT_DEPC @@ -483,7 +478,24 @@ common_exception_return: movi a4, check_tlb_sanity callx4 a4 #endif -4: /* Restore optional registers. */ +6: +4: +#ifdef CONFIG_TRACE_IRQFLAGS + l32i a4, a1, PT_DEPC + /* Double exception means we came here with an exception + * while PS.EXCM was set, i.e. interrupts disabled. + */ + bgeui a4, VALID_DOUBLE_EXCEPTION_ADDRESS, 1f + l32i a4, a1, PT_EXCCAUSE + bnei a4, EXCCAUSE_LEVEL1_INTERRUPT, 1f + /* We came here with an interrupt means interrupts were enabled + * and we'll reenable them on return. + */ + movi a4, trace_hardirqs_on + callx4 a4 +1: +#endif + /* Restore optional registers. */ load_xtregs_opt a1 a2 a4 a5 a6 a7 PT_XTREGS_OPT @@ -570,29 +582,6 @@ user_exception_exit: kernel_exception_exit: -#ifdef PREEMPTIBLE_KERNEL - -#ifdef CONFIG_PREEMPT - - /* - * Note: We've just returned from a call4, so we have - * at least 4 addt'l regs. - */ - - /* Check current_thread_info->preempt_count */ - - GET_THREAD_INFO(a2) - l32i a3, a2, TI_PREEMPT - bnez a3, 1f - - l32i a2, a2, TI_FLAGS - -1: - -#endif - -#endif - /* Check if we have to do a movsp. * * We only have to do a movsp if the previous window-frame has @@ -829,176 +818,63 @@ ENDPROC(unrecoverable_exception) * * The ALLOCA handler is entered when user code executes the MOVSP * instruction and the caller's frame is not in the register file. - * In this case, the caller frame's a0..a3 are on the stack just - * below sp (a1), and this handler moves them. * - * For "MOVSP <ar>,<as>" without destination register a1, this routine - * simply moves the value from <as> to <ar> without moving the save area. + * This algorithm was taken from the Ross Morley's RTOS Porting Layer: + * + * /home/ross/rtos/porting/XtensaRTOS-PortingLayer-20090507/xtensa_vectors.S + * + * It leverages the existing window spill/fill routines and their support for + * double exceptions. The 'movsp' instruction will only cause an exception if + * the next window needs to be loaded. In fact this ALLOCA exception may be + * replaced at some point by changing the hardware to do a underflow exception + * of the proper size instead. + * + * This algorithm simply backs out the register changes started by the user + * excpetion handler, makes it appear that we have started a window underflow + * by rotating the window back and then setting the old window base (OWB) in + * the 'ps' register with the rolled back window base. The 'movsp' instruction + * will be re-executed and this time since the next window frames is in the + * active AR registers it won't cause an exception. + * + * If the WindowUnderflow code gets a TLB miss the page will get mapped + * the the partial windeowUnderflow will be handeled in the double exception + * handler. * * Entry condition: * * a0: trashed, original value saved on stack (PT_AREG0) * a1: a1 * a2: new stack pointer, original in DEPC - * a3: dispatch table + * a3: a3 * depc: a2, original value saved on stack (PT_DEPC) - * excsave_1: a3 + * excsave_1: dispatch table * * PT_DEPC >= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception, DEPC * < VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception */ -#if XCHAL_HAVE_BE -#define _EXTUI_MOVSP_SRC(ar) extui ar, ar, 4, 4 -#define _EXTUI_MOVSP_DST(ar) extui ar, ar, 0, 4 -#else -#define _EXTUI_MOVSP_SRC(ar) extui ar, ar, 0, 4 -#define _EXTUI_MOVSP_DST(ar) extui ar, ar, 4, 4 -#endif - ENTRY(fast_alloca) + rsr a0, windowbase + rotw -1 + rsr a2, ps + extui a3, a2, PS_OWB_SHIFT, PS_OWB_WIDTH + xor a3, a3, a4 + l32i a4, a6, PT_AREG0 + l32i a1, a6, PT_DEPC + rsr a6, depc + wsr a1, depc + slli a3, a3, PS_OWB_SHIFT + xor a2, a2, a3 + wsr a2, ps + rsync - /* We shouldn't be in a double exception. */ - - l32i a0, a2, PT_DEPC - _bgeui a0, VALID_DOUBLE_EXCEPTION_ADDRESS, .Lunhandled_double - - rsr a0, depc # get a2 - s32i a4, a2, PT_AREG4 # save a4 and - s32i a0, a2, PT_AREG2 # a2 to stack - - /* Exit critical section. */ - - movi a0, 0 - s32i a0, a3, EXC_TABLE_FIXUP - - /* Restore a3, excsave_1 */ - - xsr a3, excsave1 # make sure excsave_1 is valid for dbl. - rsr a4, epc1 # get exception address - s32i a3, a2, PT_AREG3 # save a3 to stack - -#ifdef ALLOCA_EXCEPTION_IN_IRAM -#error iram not supported -#else - /* Note: l8ui not allowed in IRAM/IROM!! */ - l8ui a0, a4, 1 # read as(src) from MOVSP instruction -#endif - movi a3, .Lmovsp_src - _EXTUI_MOVSP_SRC(a0) # extract source register number - addx8 a3, a0, a3 - jx a3 - -.Lunhandled_double: - wsr a0, excsave1 - movi a0, unrecoverable_exception - callx0 a0 - - .align 8 -.Lmovsp_src: - l32i a3, a2, PT_AREG0; _j 1f; .align 8 - mov a3, a1; _j 1f; .align 8 - l32i a3, a2, PT_AREG2; _j 1f; .align 8 - l32i a3, a2, PT_AREG3; _j 1f; .align 8 - l32i a3, a2, PT_AREG4; _j 1f; .align 8 - mov a3, a5; _j 1f; .align 8 - mov a3, a6; _j 1f; .align 8 - mov a3, a7; _j 1f; .align 8 - mov a3, a8; _j 1f; .align 8 - mov a3, a9; _j 1f; .align 8 - mov a3, a10; _j 1f; .align 8 - mov a3, a11; _j 1f; .align 8 - mov a3, a12; _j 1f; .align 8 - mov a3, a13; _j 1f; .align 8 - mov a3, a14; _j 1f; .align 8 - mov a3, a15; _j 1f; .align 8 - -1: - -#ifdef ALLOCA_EXCEPTION_IN_IRAM -#error iram not supported -#else - l8ui a0, a4, 0 # read ar(dst) from MOVSP instruction -#endif - addi a4, a4, 3 # step over movsp - _EXTUI_MOVSP_DST(a0) # extract destination register - wsr a4, epc1 # save new epc_1 - - _bnei a0, 1, 1f # no 'movsp a1, ax': jump - - /* Move the save area. This implies the use of the L32E - * and S32E instructions, because this move must be done with - * the user's PS.RING privilege levels, not with ring 0 - * (kernel's) privileges currently active with PS.EXCM - * set. Note that we have stil registered a fixup routine with the - * double exception vector in case a double exception occurs. - */ - - /* a0,a4:avail a1:old user stack a2:exc. stack a3:new user stack. */ - - l32e a0, a1, -16 - l32e a4, a1, -12 - s32e a0, a3, -16 - s32e a4, a3, -12 - l32e a0, a1, -8 - l32e a4, a1, -4 - s32e a0, a3, -8 - s32e a4, a3, -4 - - /* Restore stack-pointer and all the other saved registers. */ - - mov a1, a3 - - l32i a4, a2, PT_AREG4 - l32i a3, a2, PT_AREG3 - l32i a0, a2, PT_AREG0 - l32i a2, a2, PT_AREG2 - rfe - - /* MOVSP <at>,<as> was invoked with <at> != a1. - * Because the stack pointer is not being modified, - * we should be able to just modify the pointer - * without moving any save area. - * The processor only traps these occurrences if the - * caller window isn't live, so unfortunately we can't - * use this as an alternate trap mechanism. - * So we just do the move. This requires that we - * resolve the destination register, not just the source, - * so there's some extra work. - * (PERHAPS NOT REALLY NEEDED, BUT CLEANER...) - */ - - /* a0 dst-reg, a1 user-stack, a2 stack, a3 value of src reg. */ - -1: movi a4, .Lmovsp_dst - addx8 a4, a0, a4 - jx a4 - - .align 8 -.Lmovsp_dst: - s32i a3, a2, PT_AREG0; _j 1f; .align 8 - mov a1, a3; _j 1f; .align 8 - s32i a3, a2, PT_AREG2; _j 1f; .align 8 - s32i a3, a2, PT_AREG3; _j 1f; .align 8 - s32i a3, a2, PT_AREG4; _j 1f; .align 8 - mov a5, a3; _j 1f; .align 8 - mov a6, a3; _j 1f; .align 8 - mov a7, a3; _j 1f; .align 8 - mov a8, a3; _j 1f; .align 8 - mov a9, a3; _j 1f; .align 8 - mov a10, a3; _j 1f; .align 8 - mov a11, a3; _j 1f; .align 8 - mov a12, a3; _j 1f; .align 8 - mov a13, a3; _j 1f; .align 8 - mov a14, a3; _j 1f; .align 8 - mov a15, a3; _j 1f; .align 8 - -1: l32i a4, a2, PT_AREG4 - l32i a3, a2, PT_AREG3 - l32i a0, a2, PT_AREG0 - l32i a2, a2, PT_AREG2 - rfe - + _bbci.l a4, 31, 4f + rotw -1 + _bbci.l a8, 30, 8f + rotw -1 + j _WindowUnderflow12 +8: j _WindowUnderflow8 +4: j _WindowUnderflow4 ENDPROC(fast_alloca) /* @@ -1015,9 +891,9 @@ ENDPROC(fast_alloca) * a0: trashed, original value saved on stack (PT_AREG0) * a1: a1 * a2: new stack pointer, original in DEPC - * a3: dispatch table + * a3: a3 * depc: a2, original value saved on stack (PT_DEPC) - * excsave_1: a3 + * excsave_1: dispatch table */ ENTRY(fast_syscall_kernel) @@ -1064,7 +940,6 @@ ENTRY(fast_syscall_unrecoverable) l32i a0, a2, PT_AREG0 # restore a0 xsr a2, depc # restore a2, depc - rsr a3, excsave1 wsr a0, excsave1 movi a0, unrecoverable_exception @@ -1086,10 +961,10 @@ ENDPROC(fast_syscall_unrecoverable) * a0: a2 (syscall-nr), original value saved on stack (PT_AREG0) * a1: a1 * a2: new stack pointer, original in a0 and DEPC - * a3: dispatch table, original in excsave_1 + * a3: a3 * a4..a15: unchanged * depc: a2, original value saved on stack (PT_DEPC) - * excsave_1: a3 + * excsave_1: dispatch table * * PT_DEPC >= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception, DEPC * < VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception @@ -1122,8 +997,6 @@ ENDPROC(fast_syscall_unrecoverable) ENTRY(fast_syscall_xtensa) - xsr a3, excsave1 # restore a3, excsave1 - s32i a7, a2, PT_AREG7 # we need an additional register movi a7, 4 # sizeof(unsigned int) access_ok a3, a7, a0, a2, .Leac # a0: scratch reg, a2: sp @@ -1186,9 +1059,9 @@ ENDPROC(fast_syscall_xtensa) * a0: trashed, original value saved on stack (PT_AREG0) * a1: a1 * a2: new stack pointer, original in DEPC - * a3: dispatch table + * a3: a3 * depc: a2, original value saved on stack (PT_DEPC) - * excsave_1: a3 + * excsave_1: dispatch table * * Note: We assume the stack pointer is EXC_TABLE_KSTK in the fixup handler. */ @@ -1197,15 +1070,16 @@ ENTRY(fast_syscall_spill_registers) /* Register a FIXUP handler (pass current wb as a parameter) */ + xsr a3, excsave1 movi a0, fast_syscall_spill_registers_fixup s32i a0, a3, EXC_TABLE_FIXUP rsr a0, windowbase s32i a0, a3, EXC_TABLE_PARAM + xsr a3, excsave1 # restore a3 and excsave_1 - /* Save a3 and SAR on stack. */ + /* Save a3, a4 and SAR on stack. */ rsr a0, sar - xsr a3, excsave1 # restore a3 and excsave_1 s32i a3, a2, PT_AREG3 s32i a4, a2, PT_AREG4 s32i a0, a2, PT_AREG5 # store SAR to PT_AREG5 @@ -1259,14 +1133,14 @@ fast_syscall_spill_registers_fixup: * in WS, so that the exception handlers save them to the task stack. */ - rsr a3, excsave1 # get spill-mask + xsr a3, excsave1 # get spill-mask slli a2, a3, 1 # shift left by one slli a3, a2, 32-WSBITS src a2, a2, a3 # a1 = xxwww1yyxxxwww1yy...... wsr a2, windowstart # set corrected windowstart - movi a3, exc_table + rsr a3, excsave1 l32i a2, a3, EXC_TABLE_DOUBLE_SAVE # restore a2 l32i a3, a3, EXC_TABLE_PARAM # original WB (in user task) @@ -1303,7 +1177,7 @@ fast_syscall_spill_registers_fixup: /* Jump to the exception handler. */ - movi a3, exc_table + rsr a3, excsave1 rsr a0, exccause addx4 a0, a0, a3 # find entry in table l32i a0, a0, EXC_TABLE_FAST_USER # load handler @@ -1320,6 +1194,7 @@ fast_syscall_spill_registers_fixup_return: xsr a3, excsave1 movi a2, fast_syscall_spill_registers_fixup s32i a2, a3, EXC_TABLE_FIXUP + s32i a0, a3, EXC_TABLE_DOUBLE_SAVE rsr a2, windowbase s32i a2, a3, EXC_TABLE_PARAM l32i a2, a3, EXC_TABLE_KSTK @@ -1331,11 +1206,6 @@ fast_syscall_spill_registers_fixup_return: wsr a3, windowbase rsync - /* Restore a3 and return. */ - - movi a3, exc_table - xsr a3, excsave1 - rfde @@ -1522,9 +1392,8 @@ ENTRY(_spill_registers) movi a0, 0 - movi a3, exc_table + rsr a3, excsave1 l32i a1, a3, EXC_TABLE_KSTK - wsr a3, excsave1 movi a4, (1 << PS_WOE_BIT) | LOCKLEVEL wsr a4, ps @@ -1568,9 +1437,9 @@ ENDPROC(fast_second_level_miss_double_kernel) * a0: trashed, original value saved on stack (PT_AREG0) * a1: a1 * a2: new stack pointer, original in DEPC - * a3: dispatch table + * a3: a3 * depc: a2, original value saved on stack (PT_DEPC) - * excsave_1: a3 + * excsave_1: dispatch table * * PT_DEPC >= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception, DEPC * < VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception @@ -1578,9 +1447,10 @@ ENDPROC(fast_second_level_miss_double_kernel) ENTRY(fast_second_level_miss) - /* Save a1. Note: we don't expect a double exception. */ + /* Save a1 and a3. Note: we don't expect a double exception. */ s32i a1, a2, PT_AREG1 + s32i a3, a2, PT_AREG3 /* We need to map the page of PTEs for the user task. Find * the pointer to that page. Also, it's possible for tsk->mm @@ -1602,9 +1472,6 @@ ENTRY(fast_second_level_miss) l32i a0, a1, TASK_MM # tsk->mm beqz a0, 9f - - /* We deliberately destroy a3 that holds the exception table. */ - 8: rsr a3, excvaddr # fault address _PGD_OFFSET(a0, a3, a1) l32i a0, a0, 0 # read pmdval @@ -1655,7 +1522,7 @@ ENTRY(fast_second_level_miss) /* Exit critical section. */ -4: movi a3, exc_table # restore a3 +4: rsr a3, excsave1 movi a0, 0 s32i a0, a3, EXC_TABLE_FIXUP @@ -1663,8 +1530,8 @@ ENTRY(fast_second_level_miss) l32i a0, a2, PT_AREG0 l32i a1, a2, PT_AREG1 + l32i a3, a2, PT_AREG3 l32i a2, a2, PT_DEPC - xsr a3, excsave1 bgeui a2, VALID_DOUBLE_EXCEPTION_ADDRESS, 1f @@ -1751,11 +1618,8 @@ ENTRY(fast_second_level_miss) 2: /* Invalid PGD, default exception handling */ - movi a3, exc_table rsr a1, depc - xsr a3, excsave1 s32i a1, a2, PT_AREG2 - s32i a3, a2, PT_AREG3 mov a1, a2 rsr a2, ps @@ -1775,9 +1639,9 @@ ENDPROC(fast_second_level_miss) * a0: trashed, original value saved on stack (PT_AREG0) * a1: a1 * a2: new stack pointer, original in DEPC - * a3: dispatch table + * a3: a3 * depc: a2, original value saved on stack (PT_DEPC) - * excsave_1: a3 + * excsave_1: dispatch table * * PT_DEPC >= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception, DEPC * < VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception @@ -1785,17 +1649,17 @@ ENDPROC(fast_second_level_miss) ENTRY(fast_store_prohibited) - /* Save a1 and a4. */ + /* Save a1 and a3. */ s32i a1, a2, PT_AREG1 - s32i a4, a2, PT_AREG4 + s32i a3, a2, PT_AREG3 GET_CURRENT(a1,a2) l32i a0, a1, TASK_MM # tsk->mm beqz a0, 9f 8: rsr a1, excvaddr # fault address - _PGD_OFFSET(a0, a1, a4) + _PGD_OFFSET(a0, a1, a3) l32i a0, a0, 0 beqz a0, 2f @@ -1804,39 +1668,37 @@ ENTRY(fast_store_prohibited) * and is not PAGE_NONE. See pgtable.h for possible PTE layouts. */ - _PTE_OFFSET(a0, a1, a4) - l32i a4, a0, 0 # read pteval + _PTE_OFFSET(a0, a1, a3) + l32i a3, a0, 0 # read pteval movi a1, _PAGE_CA_INVALID - ball a4, a1, 2f - bbci.l a4, _PAGE_WRITABLE_BIT, 2f + ball a3, a1, 2f + bbci.l a3, _PAGE_WRITABLE_BIT, 2f movi a1, _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_HW_WRITE - or a4, a4, a1 + or a3, a3, a1 rsr a1, excvaddr - s32i a4, a0, 0 + s32i a3, a0, 0 /* We need to flush the cache if we have page coloring. */ #if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK dhwb a0, 0 #endif pdtlb a0, a1 - wdtlb a4, a0 + wdtlb a3, a0 /* Exit critical section. */ movi a0, 0 + rsr a3, excsave1 s32i a0, a3, EXC_TABLE_FIXUP /* Restore the working registers, and return. */ - l32i a4, a2, PT_AREG4 + l32i a3, a2, PT_AREG3 l32i a1, a2, PT_AREG1 l32i a0, a2, PT_AREG0 l32i a2, a2, PT_DEPC - /* Restore excsave1 and a3. */ - - xsr a3, excsave1 bgeui a2, VALID_DOUBLE_EXCEPTION_ADDRESS, 1f rsr a2, depc @@ -1853,11 +1715,8 @@ ENTRY(fast_store_prohibited) 2: /* If there was a problem, handle fault in C */ - rsr a4, depc # still holds a2 - xsr a3, excsave1 - s32i a4, a2, PT_AREG2 - s32i a3, a2, PT_AREG3 - l32i a4, a2, PT_AREG4 + rsr a3, depc # still holds a2 + s32i a3, a2, PT_AREG2 mov a1, a2 rsr a2, ps diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 101012bc1ff6..946fb8d06c8b 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -584,8 +584,8 @@ c_show(struct seq_file *f, void *slot) "bogomips\t: %lu.%02lu\n", XCHAL_BUILD_UNIQUE_ID, XCHAL_HAVE_BE ? "big" : "little", - CCOUNT_PER_JIFFY/(1000000/HZ), - (CCOUNT_PER_JIFFY/(10000/HZ)) % 100, + ccount_freq/1000000, + (ccount_freq/10000) % 100, loops_per_jiffy/(500000/HZ), (loops_per_jiffy/(5000/HZ)) % 100); diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index 24bb0c1776ba..9af3dd88ad7e 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -29,9 +29,7 @@ #include <asm/timex.h> #include <asm/platform.h> -#ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT unsigned long ccount_freq; /* ccount Hz */ -#endif static cycle_t ccount_read(struct clocksource *cs) { @@ -129,8 +127,10 @@ void __init time_init(void) platform_calibrate_ccount(); printk("%d.%02d MHz\n", (int)ccount_freq/1000000, (int)(ccount_freq/10000)%100); +#else + ccount_freq = CONFIG_XTENSA_CPU_CLOCK*1000000UL; #endif - clocksource_register_hz(&ccount_clocksource, CCOUNT_PER_JIFFY * HZ); + clocksource_register_hz(&ccount_clocksource, ccount_freq); ccount_timer.evt.cpumask = cpumask_of(0); ccount_timer.evt.irq = irq_create_mapping(NULL, LINUX_TIMER_INT); @@ -164,7 +164,7 @@ irqreturn_t timer_interrupt (int irq, void *dev_id) #ifndef CONFIG_GENERIC_CALIBRATE_DELAY void calibrate_delay(void) { - loops_per_jiffy = CCOUNT_PER_JIFFY; + loops_per_jiffy = ccount_freq / HZ; printk("Calibrating delay loop (skipped)... " "%lu.%02lu BogoMIPS preset\n", loops_per_jiffy/(1000000/HZ), diff --git a/arch/xtensa/kernel/vectors.S b/arch/xtensa/kernel/vectors.S index f9e175382aa9..cb8fd44caabc 100644 --- a/arch/xtensa/kernel/vectors.S +++ b/arch/xtensa/kernel/vectors.S @@ -78,6 +78,7 @@ ENTRY(_UserExceptionVector) s32i a0, a2, PT_DEPC # mark it as a regular exception addx4 a0, a0, a3 # find entry in table l32i a0, a0, EXC_TABLE_FAST_USER # load handler + xsr a3, excsave1 # restore a3 and dispatch table jx a0 ENDPROC(_UserExceptionVector) @@ -104,6 +105,7 @@ ENTRY(_KernelExceptionVector) s32i a0, a2, PT_DEPC # mark it as a regular exception addx4 a0, a0, a3 # find entry in table l32i a0, a0, EXC_TABLE_FAST_KERNEL # load handler address + xsr a3, excsave1 # restore a3 and dispatch table jx a0 ENDPROC(_KernelExceptionVector) @@ -168,7 +170,7 @@ ENDPROC(_KernelExceptionVector) * * a0: DEPC * a1: a1 - * a2: trashed, original value in EXC_TABLE_DOUBLE_A2 + * a2: trashed, original value in EXC_TABLE_DOUBLE_SAVE * a3: exctable * depc: a0 * excsave_1: a3 @@ -204,47 +206,46 @@ ENDPROC(_KernelExceptionVector) .section .DoubleExceptionVector.text, "ax" .begin literal_prefix .DoubleExceptionVector + .globl _DoubleExceptionVector_WindowUnderflow + .globl _DoubleExceptionVector_WindowOverflow ENTRY(_DoubleExceptionVector) - /* Deliberately destroy excsave (don't assume it's value was valid). */ - - wsr a3, excsave1 # save a3 + xsr a3, excsave1 + s32i a2, a3, EXC_TABLE_DOUBLE_SAVE /* Check for kernel double exception (usually fatal). */ - rsr a3, ps - _bbci.l a3, PS_UM_BIT, .Lksp + rsr a2, ps + _bbci.l a2, PS_UM_BIT, .Lksp /* Check if we are currently handling a window exception. */ /* Note: We don't need to indicate that we enter a critical section. */ xsr a0, depc # get DEPC, save a0 - movi a3, WINDOW_VECTORS_VADDR - _bltu a0, a3, .Lfixup - addi a3, a3, WINDOW_VECTORS_SIZE - _bgeu a0, a3, .Lfixup + movi a2, WINDOW_VECTORS_VADDR + _bltu a0, a2, .Lfixup + addi a2, a2, WINDOW_VECTORS_SIZE + _bgeu a0, a2, .Lfixup /* Window overflow/underflow exception. Get stack pointer. */ - mov a3, a2 - /* This explicit literal and the following references to it are made - * in order to fit DoubleExceptionVector.literals into the available - * 16-byte gap before DoubleExceptionVector.text in the absence of - * link time relaxation. See kernel/vmlinux.lds.S - */ - .literal .Lexc_table, exc_table - l32r a2, .Lexc_table - l32i a2, a2, EXC_TABLE_KSTK + l32i a2, a3, EXC_TABLE_KSTK /* Check for overflow/underflow exception, jump if overflow. */ - _bbci.l a0, 6, .Lovfl - - /* a0: depc, a1: a1, a2: kstk, a3: a2, depc: a0, excsave: a3 */ + _bbci.l a0, 6, _DoubleExceptionVector_WindowOverflow - /* Restart window underflow exception. + /* + * Restart window underflow exception. + * Currently: + * depc = orig a0, + * a0 = orig DEPC, + * a2 = new sp based on KSTK from exc_table + * a3 = excsave_1 + * excsave_1 = orig a3 + * * We return to the instruction in user space that caused the window * underflow exception. Therefore, we change window base to the value * before we entered the window underflow exception and prepare the @@ -252,10 +253,11 @@ ENTRY(_DoubleExceptionVector) * by changing depc (in a0). * Note: We can trash the current window frame (a0...a3) and depc! */ - +_DoubleExceptionVector_WindowUnderflow: + xsr a3, excsave1 wsr a2, depc # save stack pointer temporarily rsr a0, ps - extui a0, a0, PS_OWB_SHIFT, 4 + extui a0, a0, PS_OWB_SHIFT, PS_OWB_WIDTH wsr a0, windowbase rsync @@ -263,28 +265,57 @@ ENTRY(_DoubleExceptionVector) xsr a2, depc # save a2 and get stack pointer s32i a0, a2, PT_AREG0 - - wsr a3, excsave1 # save a3 - l32r a3, .Lexc_table - + xsr a3, excsave1 rsr a0, exccause s32i a0, a2, PT_DEPC # mark it as a regular exception addx4 a0, a0, a3 + xsr a3, excsave1 l32i a0, a0, EXC_TABLE_FAST_USER jx a0 -.Lfixup:/* Check for a fixup handler or if we were in a critical section. */ + /* + * We only allow the ITLB miss exception if we are in kernel space. + * All other exceptions are unexpected and thus unrecoverable! + */ + +#ifdef CONFIG_MMU + .extern fast_second_level_miss_double_kernel + +.Lksp: /* a0: a0, a1: a1, a2: a2, a3: trashed, depc: depc, excsave: a3 */ + + rsr a3, exccause + beqi a3, EXCCAUSE_ITLB_MISS, 1f + addi a3, a3, -EXCCAUSE_DTLB_MISS + bnez a3, .Lunrecoverable +1: movi a3, fast_second_level_miss_double_kernel + jx a3 +#else +.equ .Lksp, .Lunrecoverable +#endif + + /* Critical! We can't handle this situation. PANIC! */ - /* a0: depc, a1: a1, a2: a2, a3: trashed, depc: a0, excsave1: a3 */ + .extern unrecoverable_exception - l32r a3, .Lexc_table - s32i a2, a3, EXC_TABLE_DOUBLE_SAVE # temporary variable +.Lunrecoverable_fixup: + l32i a2, a3, EXC_TABLE_DOUBLE_SAVE + xsr a0, depc + +.Lunrecoverable: + rsr a3, excsave1 + wsr a0, excsave1 + movi a0, unrecoverable_exception + callx0 a0 + +.Lfixup:/* Check for a fixup handler or if we were in a critical section. */ + + /* a0: depc, a1: a1, a2: trash, a3: exctable, depc: a0, excsave1: a3 */ /* Enter critical section. */ l32i a2, a3, EXC_TABLE_FIXUP s32i a3, a3, EXC_TABLE_FIXUP - beq a2, a3, .Lunrecoverable_fixup # critical! + beq a2, a3, .Lunrecoverable_fixup # critical section beqz a2, .Ldflt # no handler was registered /* a0: depc, a1: a1, a2: trash, a3: exctable, depc: a0, excsave: a3 */ @@ -293,58 +324,145 @@ ENTRY(_DoubleExceptionVector) .Ldflt: /* Get stack pointer. */ - l32i a3, a3, EXC_TABLE_DOUBLE_SAVE - addi a2, a3, -PT_USER_SIZE - -.Lovfl: /* Jump to default handlers. */ + l32i a2, a3, EXC_TABLE_DOUBLE_SAVE + addi a2, a2, -PT_USER_SIZE - /* a0: depc, a1: a1, a2: kstk, a3: a2, depc: a0, excsave: a3 */ + /* a0: depc, a1: a1, a2: kstk, a3: exctable, depc: a0, excsave: a3 */ - xsr a3, depc s32i a0, a2, PT_DEPC - s32i a3, a2, PT_AREG0 + l32i a0, a3, EXC_TABLE_DOUBLE_SAVE + xsr a0, depc + s32i a0, a2, PT_AREG0 - /* a0: avail, a1: a1, a2: kstk, a3: avail, depc: a2, excsave: a3 */ + /* a0: avail, a1: a1, a2: kstk, a3: exctable, depc: a2, excsave: a3 */ - l32r a3, .Lexc_table rsr a0, exccause addx4 a0, a0, a3 + xsr a3, excsave1 l32i a0, a0, EXC_TABLE_FAST_USER jx a0 /* - * We only allow the ITLB miss exception if we are in kernel space. - * All other exceptions are unexpected and thus unrecoverable! + * Restart window OVERFLOW exception. + * Currently: + * depc = orig a0, + * a0 = orig DEPC, + * a2 = new sp based on KSTK from exc_table + * a3 = EXCSAVE_1 + * excsave_1 = orig a3 + * + * We return to the instruction in user space that caused the window + * overflow exception. Therefore, we change window base to the value + * before we entered the window overflow exception and prepare the + * registers to return as if we were coming from a regular exception + * by changing DEPC (in a0). + * + * NOTE: We CANNOT trash the current window frame (a0...a3), but we + * can clobber depc. + * + * The tricky part here is that overflow8 and overflow12 handlers + * save a0, then clobber a0. To restart the handler, we have to restore + * a0 if the double exception was past the point where a0 was clobbered. + * + * To keep things simple, we take advantage of the fact all overflow + * handlers save a0 in their very first instruction. If DEPC was past + * that instruction, we can safely restore a0 from where it was saved + * on the stack. + * + * a0: depc, a1: a1, a2: kstk, a3: exc_table, depc: a0, excsave1: a3 */ +_DoubleExceptionVector_WindowOverflow: + extui a2, a0, 0, 6 # get offset into 64-byte vector handler + beqz a2, 1f # if at start of vector, don't restore -#ifdef CONFIG_MMU - .extern fast_second_level_miss_double_kernel + addi a0, a0, -128 + bbsi a0, 8, 1f # don't restore except for overflow 8 and 12 + bbsi a0, 7, 2f -.Lksp: /* a0: a0, a1: a1, a2: a2, a3: trashed, depc: depc, excsave: a3 */ + /* + * Restore a0 as saved by _WindowOverflow8(). + * + * FIXME: we really need a fixup handler for this L32E, + * for the extremely unlikely case where the overflow handler's + * reference thru a0 gets a hardware TLB refill that bumps out + * the (distinct, aliasing) TLB entry that mapped its prior + * references thru a9, and where our reference now thru a9 + * gets a 2nd-level miss exception (not hardware TLB refill). + */ - rsr a3, exccause - beqi a3, EXCCAUSE_ITLB_MISS, 1f - addi a3, a3, -EXCCAUSE_DTLB_MISS - bnez a3, .Lunrecoverable -1: movi a3, fast_second_level_miss_double_kernel - jx a3 -#else -.equ .Lksp, .Lunrecoverable -#endif + l32e a2, a9, -16 + wsr a2, depc # replace the saved a0 + j 1f - /* Critical! We can't handle this situation. PANIC! */ +2: + /* + * Restore a0 as saved by _WindowOverflow12(). + * + * FIXME: we really need a fixup handler for this L32E, + * for the extremely unlikely case where the overflow handler's + * reference thru a0 gets a hardware TLB refill that bumps out + * the (distinct, aliasing) TLB entry that mapped its prior + * references thru a13, and where our reference now thru a13 + * gets a 2nd-level miss exception (not hardware TLB refill). + */ - .extern unrecoverable_exception + l32e a2, a13, -16 + wsr a2, depc # replace the saved a0 +1: + /* + * Restore WindowBase while leaving all address registers restored. + * We have to use ROTW for this, because WSR.WINDOWBASE requires + * an address register (which would prevent restore). + * + * Window Base goes from 0 ... 7 (Module 8) + * Window Start is 8 bits; Ex: (0b1010 1010):0x55 from series of call4s + */ + + rsr a0, ps + extui a0, a0, PS_OWB_SHIFT, PS_OWB_WIDTH + rsr a2, windowbase + sub a0, a2, a0 + extui a0, a0, 0, 3 -.Lunrecoverable_fixup: l32i a2, a3, EXC_TABLE_DOUBLE_SAVE - xsr a0, depc + xsr a3, excsave1 + beqi a0, 1, .L1pane + beqi a0, 3, .L3pane -.Lunrecoverable: - rsr a3, excsave1 - wsr a0, excsave1 - movi a0, unrecoverable_exception - callx0 a0 + rsr a0, depc + rotw -2 + + /* + * We are now in the user code's original window frame. + * Process the exception as a user exception as if it was + * taken by the user code. + * + * This is similar to the user exception vector, + * except that PT_DEPC isn't set to EXCCAUSE. + */ +1: + xsr a3, excsave1 + wsr a2, depc + l32i a2, a3, EXC_TABLE_KSTK + s32i a0, a2, PT_AREG0 + rsr a0, exccause + + s32i a0, a2, PT_DEPC + + addx4 a0, a0, a3 + l32i a0, a0, EXC_TABLE_FAST_USER + xsr a3, excsave1 + jx a0 + +.L1pane: + rsr a0, depc + rotw -1 + j 1b + +.L3pane: + rsr a0, depc + rotw -3 + j 1b .end literal_prefix diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c index d8507f812f46..74a60c7e085e 100644 --- a/arch/xtensa/kernel/xtensa_ksyms.c +++ b/arch/xtensa/kernel/xtensa_ksyms.c @@ -25,6 +25,7 @@ #include <asm/io.h> #include <asm/page.h> #include <asm/pgalloc.h> +#include <asm/ftrace.h> #ifdef CONFIG_BLK_DEV_FD #include <asm/floppy.h> #endif diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index 4b7bc8db170f..70fa7bc42b4a 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -72,6 +72,8 @@ void do_page_fault(struct pt_regs *regs) address, exccause, regs->pc, is_write? "w":"", is_exec? "x":""); #endif + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); diff --git a/block/partitions/efi.c b/block/partitions/efi.c index 1a5ec9a03c00..1eb09ee5311b 100644 --- a/block/partitions/efi.c +++ b/block/partitions/efi.c @@ -186,6 +186,7 @@ invalid: */ static int is_pmbr_valid(legacy_mbr *mbr, sector_t total_sectors) { + uint32_t sz = 0; int i, part = 0, ret = 0; /* invalid by default */ if (!mbr || le16_to_cpu(mbr->signature) != MSDOS_MBR_SIGNATURE) @@ -216,12 +217,15 @@ check_hybrid: /* * Protective MBRs take up the lesser of the whole disk * or 2 TiB (32bit LBA), ignoring the rest of the disk. + * Some partitioning programs, nonetheless, choose to set + * the size to the maximum 32-bit limitation, disregarding + * the disk size. * * Hybrid MBRs do not necessarily comply with this. */ if (ret == GPT_MBR_PROTECTIVE) { - if (le32_to_cpu(mbr->partition_record[part].size_in_lba) != - min((uint32_t) total_sectors - 1, 0xFFFFFFFF)) + sz = le32_to_cpu(mbr->partition_record[part].size_in_lba); + if (sz != (uint32_t) total_sectors - 1 && sz != 0xFFFFFFFF) ret = 0; } done: diff --git a/crypto/Makefile b/crypto/Makefile index 2d5ed08a239f..80019ba8da3a 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -83,7 +83,7 @@ obj-$(CONFIG_CRYPTO_ZLIB) += zlib.o obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o obj-$(CONFIG_CRYPTO_CRC32) += crc32.o -obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif.o +obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o crct10dif_generic.o obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o obj-$(CONFIG_CRYPTO_LZO) += lzo.o obj-$(CONFIG_CRYPTO_LZ4) += lz4.o diff --git a/crypto/api.c b/crypto/api.c index 320ea4d8a0f5..a2b39c5f3649 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -34,6 +34,8 @@ EXPORT_SYMBOL_GPL(crypto_alg_sem); BLOCKING_NOTIFIER_HEAD(crypto_chain); EXPORT_SYMBOL_GPL(crypto_chain); +static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg); + struct crypto_alg *crypto_mod_get(struct crypto_alg *alg) { return try_module_get(alg->cra_module) ? crypto_alg_get(alg) : NULL; @@ -144,8 +146,11 @@ static struct crypto_alg *crypto_larval_add(const char *name, u32 type, } up_write(&crypto_alg_sem); - if (alg != &larval->alg) + if (alg != &larval->alg) { kfree(larval); + if (crypto_is_larval(alg)) + alg = crypto_larval_wait(alg); + } return alg; } diff --git a/crypto/crct10dif.c b/crypto/crct10dif_common.c index 92aca96d6b98..b2fab366f518 100644 --- a/crypto/crct10dif.c +++ b/crypto/crct10dif_common.c @@ -24,18 +24,10 @@ * */ -#include <linux/types.h> -#include <linux/module.h> #include <linux/crc-t10dif.h> -#include <crypto/internal/hash.h> -#include <linux/init.h> -#include <linux/string.h> +#include <linux/module.h> #include <linux/kernel.h> -struct chksum_desc_ctx { - __u16 crc; -}; - /* Table generated using the following polynomium: * x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1 * gt: 0x8bb7 @@ -86,93 +78,5 @@ __u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len) } EXPORT_SYMBOL(crc_t10dif_generic); -/* - * Steps through buffer one byte at at time, calculates reflected - * crc using table. - */ - -static int chksum_init(struct shash_desc *desc) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - ctx->crc = 0; - - return 0; -} - -static int chksum_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - ctx->crc = crc_t10dif_generic(ctx->crc, data, length); - return 0; -} - -static int chksum_final(struct shash_desc *desc, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - *(__u16 *)out = ctx->crc; - return 0; -} - -static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len, - u8 *out) -{ - *(__u16 *)out = crc_t10dif_generic(*crcp, data, len); - return 0; -} - -static int chksum_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - return __chksum_finup(&ctx->crc, data, len, out); -} - -static int chksum_digest(struct shash_desc *desc, const u8 *data, - unsigned int length, u8 *out) -{ - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - return __chksum_finup(&ctx->crc, data, length, out); -} - -static struct shash_alg alg = { - .digestsize = CRC_T10DIF_DIGEST_SIZE, - .init = chksum_init, - .update = chksum_update, - .final = chksum_final, - .finup = chksum_finup, - .digest = chksum_digest, - .descsize = sizeof(struct chksum_desc_ctx), - .base = { - .cra_name = "crct10dif", - .cra_driver_name = "crct10dif-generic", - .cra_priority = 100, - .cra_blocksize = CRC_T10DIF_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static int __init crct10dif_mod_init(void) -{ - int ret; - - ret = crypto_register_shash(&alg); - return ret; -} - -static void __exit crct10dif_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -module_init(crct10dif_mod_init); -module_exit(crct10dif_mod_fini); - -MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>"); -MODULE_DESCRIPTION("T10 DIF CRC calculation."); +MODULE_DESCRIPTION("T10 DIF CRC calculation common code"); MODULE_LICENSE("GPL"); diff --git a/crypto/crct10dif_generic.c b/crypto/crct10dif_generic.c new file mode 100644 index 000000000000..877e7114ec5c --- /dev/null +++ b/crypto/crct10dif_generic.c @@ -0,0 +1,127 @@ +/* + * Cryptographic API. + * + * T10 Data Integrity Field CRC16 Crypto Transform + * + * Copyright (c) 2007 Oracle Corporation. All rights reserved. + * Written by Martin K. Petersen <martin.petersen@oracle.com> + * Copyright (C) 2013 Intel Corporation + * Author: Tim Chen <tim.c.chen@linux.intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/module.h> +#include <linux/crc-t10dif.h> +#include <crypto/internal/hash.h> +#include <linux/init.h> +#include <linux/kernel.h> + +struct chksum_desc_ctx { + __u16 crc; +}; + +/* + * Steps through buffer one byte at at time, calculates reflected + * crc using table. + */ + +static int chksum_init(struct shash_desc *desc) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + ctx->crc = 0; + + return 0; +} + +static int chksum_update(struct shash_desc *desc, const u8 *data, + unsigned int length) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + ctx->crc = crc_t10dif_generic(ctx->crc, data, length); + return 0; +} + +static int chksum_final(struct shash_desc *desc, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + *(__u16 *)out = ctx->crc; + return 0; +} + +static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len, + u8 *out) +{ + *(__u16 *)out = crc_t10dif_generic(*crcp, data, len); + return 0; +} + +static int chksum_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + return __chksum_finup(&ctx->crc, data, len, out); +} + +static int chksum_digest(struct shash_desc *desc, const u8 *data, + unsigned int length, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + return __chksum_finup(&ctx->crc, data, length, out); +} + +static struct shash_alg alg = { + .digestsize = CRC_T10DIF_DIGEST_SIZE, + .init = chksum_init, + .update = chksum_update, + .final = chksum_final, + .finup = chksum_finup, + .digest = chksum_digest, + .descsize = sizeof(struct chksum_desc_ctx), + .base = { + .cra_name = "crct10dif", + .cra_driver_name = "crct10dif-generic", + .cra_priority = 100, + .cra_blocksize = CRC_T10DIF_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static int __init crct10dif_mod_init(void) +{ + int ret; + + ret = crypto_register_shash(&alg); + return ret; +} + +static void __exit crct10dif_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(crct10dif_mod_init); +module_exit(crct10dif_mod_fini); + +MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>"); +MODULE_DESCRIPTION("T10 DIF CRC calculation."); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("crct10dif"); diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index 6a382188fa20..fb78bb9ad8f6 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -257,12 +257,13 @@ static int acpi_lpss_create_device(struct acpi_device *adev, pdata->mmio_size = resource_size(&rentry->res); pdata->mmio_base = ioremap(rentry->res.start, pdata->mmio_size); - pdata->dev_desc = dev_desc; break; } acpi_dev_free_resource_list(&resource_list); + pdata->dev_desc = dev_desc; + if (dev_desc->clk_required) { ret = register_device_clock(adev, pdata); if (ret) { diff --git a/drivers/acpi/acpica/exstore.c b/drivers/acpi/acpica/exstore.c index 2bdba6f7d762..f0b09bf9887d 100644 --- a/drivers/acpi/acpica/exstore.c +++ b/drivers/acpi/acpica/exstore.c @@ -57,6 +57,11 @@ acpi_ex_store_object_to_index(union acpi_operand_object *val_desc, union acpi_operand_object *dest_desc, struct acpi_walk_state *walk_state); +static acpi_status +acpi_ex_store_direct_to_node(union acpi_operand_object *source_desc, + struct acpi_namespace_node *node, + struct acpi_walk_state *walk_state); + /******************************************************************************* * * FUNCTION: acpi_ex_store @@ -375,7 +380,11 @@ acpi_ex_store_object_to_index(union acpi_operand_object *source_desc, * When storing into an object the data is converted to the * target object type then stored in the object. This means * that the target object type (for an initialized target) will - * not be changed by a store operation. + * not be changed by a store operation. A copy_object can change + * the target type, however. + * + * The implicit_conversion flag is set to NO/FALSE only when + * storing to an arg_x -- as per the rules of the ACPI spec. * * Assumes parameters are already validated. * @@ -399,7 +408,7 @@ acpi_ex_store_object_to_node(union acpi_operand_object *source_desc, target_type = acpi_ns_get_type(node); target_desc = acpi_ns_get_attached_object(node); - ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "Storing %p(%s) into node %p(%s)\n", + ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "Storing %p (%s) to node %p (%s)\n", source_desc, acpi_ut_get_object_type_name(source_desc), node, acpi_ut_get_type_name(target_type))); @@ -413,45 +422,30 @@ acpi_ex_store_object_to_node(union acpi_operand_object *source_desc, return_ACPI_STATUS(status); } - /* If no implicit conversion, drop into the default case below */ - - if ((!implicit_conversion) || - ((walk_state->opcode == AML_COPY_OP) && - (target_type != ACPI_TYPE_LOCAL_REGION_FIELD) && - (target_type != ACPI_TYPE_LOCAL_BANK_FIELD) && - (target_type != ACPI_TYPE_LOCAL_INDEX_FIELD))) { - /* - * Force execution of default (no implicit conversion). Note: - * copy_object does not perform an implicit conversion, as per the ACPI - * spec -- except in case of region/bank/index fields -- because these - * objects must retain their original type permanently. - */ - target_type = ACPI_TYPE_ANY; - } - /* Do the actual store operation */ switch (target_type) { - case ACPI_TYPE_BUFFER_FIELD: - case ACPI_TYPE_LOCAL_REGION_FIELD: - case ACPI_TYPE_LOCAL_BANK_FIELD: - case ACPI_TYPE_LOCAL_INDEX_FIELD: - - /* For fields, copy the source data to the target field. */ - - status = acpi_ex_write_data_to_field(source_desc, target_desc, - &walk_state->result_obj); - break; - case ACPI_TYPE_INTEGER: case ACPI_TYPE_STRING: case ACPI_TYPE_BUFFER: /* - * These target types are all of type Integer/String/Buffer, and - * therefore support implicit conversion before the store. - * - * Copy and/or convert the source object to a new target object + * The simple data types all support implicit source operand + * conversion before the store. */ + + if ((walk_state->opcode == AML_COPY_OP) || !implicit_conversion) { + /* + * However, copy_object and Stores to arg_x do not perform + * an implicit conversion, as per the ACPI specification. + * A direct store is performed instead. + */ + status = acpi_ex_store_direct_to_node(source_desc, node, + walk_state); + break; + } + + /* Store with implicit source operand conversion support */ + status = acpi_ex_store_object_to_object(source_desc, target_desc, &new_desc, walk_state); @@ -465,13 +459,12 @@ acpi_ex_store_object_to_node(union acpi_operand_object *source_desc, * the Name's type to that of the value being stored in it. * source_desc reference count is incremented by attach_object. * - * Note: This may change the type of the node if an explicit store - * has been performed such that the node/object type has been - * changed. + * Note: This may change the type of the node if an explicit + * store has been performed such that the node/object type + * has been changed. */ - status = - acpi_ns_attach_object(node, new_desc, - new_desc->common.type); + status = acpi_ns_attach_object(node, new_desc, + new_desc->common.type); ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "Store %s into %s via Convert/Attach\n", @@ -482,38 +475,83 @@ acpi_ex_store_object_to_node(union acpi_operand_object *source_desc, } break; - default: - - ACPI_DEBUG_PRINT((ACPI_DB_EXEC, - "Storing [%s] (%p) directly into node [%s] (%p)" - " with no implicit conversion\n", - acpi_ut_get_object_type_name(source_desc), - source_desc, - acpi_ut_get_object_type_name(target_desc), - node)); + case ACPI_TYPE_BUFFER_FIELD: + case ACPI_TYPE_LOCAL_REGION_FIELD: + case ACPI_TYPE_LOCAL_BANK_FIELD: + case ACPI_TYPE_LOCAL_INDEX_FIELD: + /* + * For all fields, always write the source data to the target + * field. Any required implicit source operand conversion is + * performed in the function below as necessary. Note, field + * objects must retain their original type permanently. + */ + status = acpi_ex_write_data_to_field(source_desc, target_desc, + &walk_state->result_obj); + break; + default: /* * No conversions for all other types. Directly store a copy of - * the source object. NOTE: This is a departure from the ACPI - * spec, which states "If conversion is impossible, abort the - * running control method". + * the source object. This is the ACPI spec-defined behavior for + * the copy_object operator. * - * This code implements "If conversion is impossible, treat the - * Store operation as a CopyObject". + * NOTE: For the Store operator, this is a departure from the + * ACPI spec, which states "If conversion is impossible, abort + * the running control method". Instead, this code implements + * "If conversion is impossible, treat the Store operation as + * a CopyObject". */ - status = - acpi_ut_copy_iobject_to_iobject(source_desc, &new_desc, - walk_state); - if (ACPI_FAILURE(status)) { - return_ACPI_STATUS(status); - } - - status = - acpi_ns_attach_object(node, new_desc, - new_desc->common.type); - acpi_ut_remove_reference(new_desc); + status = acpi_ex_store_direct_to_node(source_desc, node, + walk_state); break; } return_ACPI_STATUS(status); } + +/******************************************************************************* + * + * FUNCTION: acpi_ex_store_direct_to_node + * + * PARAMETERS: source_desc - Value to be stored + * node - Named object to receive the value + * walk_state - Current walk state + * + * RETURN: Status + * + * DESCRIPTION: "Store" an object directly to a node. This involves a copy + * and an attach. + * + ******************************************************************************/ + +static acpi_status +acpi_ex_store_direct_to_node(union acpi_operand_object *source_desc, + struct acpi_namespace_node *node, + struct acpi_walk_state *walk_state) +{ + acpi_status status; + union acpi_operand_object *new_desc; + + ACPI_FUNCTION_TRACE(ex_store_direct_to_node); + + ACPI_DEBUG_PRINT((ACPI_DB_EXEC, + "Storing [%s] (%p) directly into node [%s] (%p)" + " with no implicit conversion\n", + acpi_ut_get_object_type_name(source_desc), + source_desc, acpi_ut_get_type_name(node->type), + node)); + + /* Copy the source object to a new object */ + + status = + acpi_ut_copy_iobject_to_iobject(source_desc, &new_desc, walk_state); + if (ACPI_FAILURE(status)) { + return_ACPI_STATUS(status); + } + + /* Attach the new object to the node */ + + status = acpi_ns_attach_object(node, new_desc, new_desc->common.type); + acpi_ut_remove_reference(new_desc); + return_ACPI_STATUS(status); +} diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 94672297e1b1..10f0f40587bb 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -79,6 +79,9 @@ static struct acpi_bus_type *acpi_get_bus_type(struct device *dev) return ret; } +#define FIND_CHILD_MIN_SCORE 1 +#define FIND_CHILD_MAX_SCORE 2 + static acpi_status acpi_dev_present(acpi_handle handle, u32 lvl_not_used, void *not_used, void **ret_p) { @@ -92,14 +95,17 @@ static acpi_status acpi_dev_present(acpi_handle handle, u32 lvl_not_used, return AE_OK; } -static bool acpi_extra_checks_passed(acpi_handle handle, bool is_bridge) +static int do_find_child_checks(acpi_handle handle, bool is_bridge) { + bool sta_present = true; unsigned long long sta; acpi_status status; - status = acpi_bus_get_status_handle(handle, &sta); - if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_ENABLED)) - return false; + status = acpi_evaluate_integer(handle, "_STA", NULL, &sta); + if (status == AE_NOT_FOUND) + sta_present = false; + else if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_ENABLED)) + return -ENODEV; if (is_bridge) { void *test = NULL; @@ -107,16 +113,17 @@ static bool acpi_extra_checks_passed(acpi_handle handle, bool is_bridge) /* Check if this object has at least one child device. */ acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1, acpi_dev_present, NULL, NULL, &test); - return !!test; + if (!test) + return -ENODEV; } - return true; + return sta_present ? FIND_CHILD_MAX_SCORE : FIND_CHILD_MIN_SCORE; } struct find_child_context { u64 addr; bool is_bridge; acpi_handle ret; - bool ret_checked; + int ret_score; }; static acpi_status do_find_child(acpi_handle handle, u32 lvl_not_used, @@ -125,6 +132,7 @@ static acpi_status do_find_child(acpi_handle handle, u32 lvl_not_used, struct find_child_context *context = data; unsigned long long addr; acpi_status status; + int score; status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, &addr); if (ACPI_FAILURE(status) || addr != context->addr) @@ -144,15 +152,20 @@ static acpi_status do_find_child(acpi_handle handle, u32 lvl_not_used, * its handle if so. Second, check the same for the object that we've * just found. */ - if (!context->ret_checked) { - if (acpi_extra_checks_passed(context->ret, context->is_bridge)) + if (!context->ret_score) { + score = do_find_child_checks(context->ret, context->is_bridge); + if (score == FIND_CHILD_MAX_SCORE) return AE_CTRL_TERMINATE; else - context->ret_checked = true; + context->ret_score = score; } - if (acpi_extra_checks_passed(handle, context->is_bridge)) { + score = do_find_child_checks(handle, context->is_bridge); + if (score == FIND_CHILD_MAX_SCORE) { context->ret = handle; return AE_CTRL_TERMINATE; + } else if (score > context->ret_score) { + context->ret = handle; + context->ret_score = score; } return AE_OK; } diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 61d090b6ce25..fbdb82e70d10 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -204,8 +204,6 @@ static int acpi_scan_hot_remove(struct acpi_device *device) return -EINVAL; } - lock_device_hotplug(); - /* * Carry out two passes here and ignore errors in the first pass, * because if the devices in question are memory blocks and @@ -236,9 +234,6 @@ static int acpi_scan_hot_remove(struct acpi_device *device) ACPI_UINT32_MAX, acpi_bus_online_companions, NULL, NULL, NULL); - - unlock_device_hotplug(); - put_device(&device->dev); return -EBUSY; } @@ -249,8 +244,6 @@ static int acpi_scan_hot_remove(struct acpi_device *device) acpi_bus_trim(device); - unlock_device_hotplug(); - /* Device node has been unregistered. */ put_device(&device->dev); device = NULL; @@ -289,6 +282,7 @@ static void acpi_bus_device_eject(void *context) u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE; int error; + lock_device_hotplug(); mutex_lock(&acpi_scan_lock); acpi_bus_get_device(handle, &device); @@ -312,6 +306,7 @@ static void acpi_bus_device_eject(void *context) out: mutex_unlock(&acpi_scan_lock); + unlock_device_hotplug(); return; err_out: @@ -326,8 +321,8 @@ static void acpi_scan_bus_device_check(acpi_handle handle, u32 ost_source) u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE; int error; - mutex_lock(&acpi_scan_lock); lock_device_hotplug(); + mutex_lock(&acpi_scan_lock); if (ost_source != ACPI_NOTIFY_BUS_CHECK) { acpi_bus_get_device(handle, &device); @@ -353,9 +348,9 @@ static void acpi_scan_bus_device_check(acpi_handle handle, u32 ost_source) kobject_uevent(&device->dev.kobj, KOBJ_ONLINE); out: - unlock_device_hotplug(); acpi_evaluate_hotplug_ost(handle, ost_source, ost_code, NULL); mutex_unlock(&acpi_scan_lock); + unlock_device_hotplug(); } static void acpi_scan_bus_check(void *context) @@ -446,6 +441,7 @@ void acpi_bus_hot_remove_device(void *context) acpi_handle handle = device->handle; int error; + lock_device_hotplug(); mutex_lock(&acpi_scan_lock); error = acpi_scan_hot_remove(device); @@ -455,6 +451,7 @@ void acpi_bus_hot_remove_device(void *context) NULL); mutex_unlock(&acpi_scan_lock); + unlock_device_hotplug(); kfree(context); } EXPORT_SYMBOL(acpi_bus_hot_remove_device); diff --git a/drivers/base/node.c b/drivers/base/node.c index 7616a77ca322..bc9f43bf7e29 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -125,13 +125,7 @@ static ssize_t node_read_meminfo(struct device *dev, nid, K(node_page_state(nid, NR_WRITEBACK)), nid, K(node_page_state(nid, NR_FILE_PAGES)), nid, K(node_page_state(nid, NR_FILE_MAPPED)), -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - nid, K(node_page_state(nid, NR_ANON_PAGES) - + node_page_state(nid, NR_ANON_TRANSPARENT_HUGEPAGES) * - HPAGE_PMD_NR), -#else nid, K(node_page_state(nid, NR_ANON_PAGES)), -#endif nid, K(node_page_state(nid, NR_SHMEM)), nid, node_page_state(nid, NR_KERNEL_STACK) * THREAD_SIZE / 1024, diff --git a/drivers/block/mtip32xx/Kconfig b/drivers/block/mtip32xx/Kconfig index 1fca1f996b45..0ba837fc62a8 100644 --- a/drivers/block/mtip32xx/Kconfig +++ b/drivers/block/mtip32xx/Kconfig @@ -4,6 +4,6 @@ config BLK_DEV_PCIESSD_MTIP32XX tristate "Block Device Driver for Micron PCIe SSDs" - depends on PCI && GENERIC_HARDIRQS + depends on PCI help This enables the block driver for Micron PCIe SSDs. diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index b22a7d0fe5b7..cb1db2979d3d 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -931,12 +931,14 @@ static const char *rbd_dev_v1_snap_name(struct rbd_device *rbd_dev, u64 snap_id) { u32 which; + const char *snap_name; which = rbd_dev_snap_index(rbd_dev, snap_id); if (which == BAD_SNAP_INDEX) - return NULL; + return ERR_PTR(-ENOENT); - return _rbd_dev_v1_snap_name(rbd_dev, which); + snap_name = _rbd_dev_v1_snap_name(rbd_dev, which); + return snap_name ? snap_name : ERR_PTR(-ENOMEM); } static const char *rbd_snap_name(struct rbd_device *rbd_dev, u64 snap_id) @@ -2812,7 +2814,7 @@ out_err: obj_request_done_set(obj_request); } -static int rbd_obj_notify_ack(struct rbd_device *rbd_dev, u64 notify_id) +static int rbd_obj_notify_ack_sync(struct rbd_device *rbd_dev, u64 notify_id) { struct rbd_obj_request *obj_request; struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; @@ -2827,16 +2829,17 @@ static int rbd_obj_notify_ack(struct rbd_device *rbd_dev, u64 notify_id) obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, obj_request); if (!obj_request->osd_req) goto out; - obj_request->callback = rbd_obj_request_put; osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_NOTIFY_ACK, notify_id, 0, 0); rbd_osd_req_format_read(obj_request); ret = rbd_obj_request_submit(osdc, obj_request); -out: if (ret) - rbd_obj_request_put(obj_request); + goto out; + ret = rbd_obj_request_wait(obj_request); +out: + rbd_obj_request_put(obj_request); return ret; } @@ -2856,7 +2859,7 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) if (ret) rbd_warn(rbd_dev, "header refresh error (%d)\n", ret); - rbd_obj_notify_ack(rbd_dev, notify_id); + rbd_obj_notify_ack_sync(rbd_dev, notify_id); } /* @@ -3328,6 +3331,31 @@ static void rbd_exists_validate(struct rbd_device *rbd_dev) clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); } +static void rbd_dev_update_size(struct rbd_device *rbd_dev) +{ + sector_t size; + bool removing; + + /* + * Don't hold the lock while doing disk operations, + * or lock ordering will conflict with the bdev mutex via: + * rbd_add() -> blkdev_get() -> rbd_open() + */ + spin_lock_irq(&rbd_dev->lock); + removing = test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags); + spin_unlock_irq(&rbd_dev->lock); + /* + * If the device is being removed, rbd_dev->disk has + * been destroyed, so don't try to update its size + */ + if (!removing) { + size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE; + dout("setting size to %llu sectors", (unsigned long long)size); + set_capacity(rbd_dev->disk, size); + revalidate_disk(rbd_dev->disk); + } +} + static int rbd_dev_refresh(struct rbd_device *rbd_dev) { u64 mapping_size; @@ -3347,12 +3375,7 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev) up_write(&rbd_dev->header_rwsem); if (mapping_size != rbd_dev->mapping.size) { - sector_t size; - - size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE; - dout("setting size to %llu sectors", (unsigned long long)size); - set_capacity(rbd_dev->disk, size); - revalidate_disk(rbd_dev->disk); + rbd_dev_update_size(rbd_dev); } return ret; @@ -4061,8 +4084,13 @@ static u64 rbd_v2_snap_id_by_name(struct rbd_device *rbd_dev, const char *name) snap_id = snapc->snaps[which]; snap_name = rbd_dev_v2_snap_name(rbd_dev, snap_id); - if (IS_ERR(snap_name)) - break; + if (IS_ERR(snap_name)) { + /* ignore no-longer existing snapshots */ + if (PTR_ERR(snap_name) == -ENOENT) + continue; + else + break; + } found = !strcmp(name, snap_name); kfree(snap_name); } @@ -4141,8 +4169,8 @@ static int rbd_dev_spec_update(struct rbd_device *rbd_dev) /* Look up the snapshot name, and make a copy */ snap_name = rbd_snap_name(rbd_dev, spec->snap_id); - if (!snap_name) { - ret = -ENOMEM; + if (IS_ERR(snap_name)) { + ret = PTR_ERR(snap_name); goto out_err; } @@ -5163,10 +5191,23 @@ static ssize_t rbd_remove(struct bus_type *bus, if (ret < 0 || already) return ret; - rbd_bus_del_dev(rbd_dev); ret = rbd_dev_header_watch_sync(rbd_dev, false); if (ret) rbd_warn(rbd_dev, "failed to cancel watch event (%d)\n", ret); + + /* + * flush remaining watch callbacks - these must be complete + * before the osd_client is shutdown + */ + dout("%s: flushing notifies", __func__); + ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc); + /* + * Don't free anything from rbd_dev->disk until after all + * notifies are completely processed. Otherwise + * rbd_bus_del_dev() will race with rbd_watch_cb(), resulting + * in a potential use after free of rbd_dev->disk or rbd_dev. + */ + rbd_bus_del_dev(rbd_dev); rbd_dev_image_release(rbd_dev); module_put(THIS_MODULE); diff --git a/drivers/char/random.c b/drivers/char/random.c index 0d91fe52f3f5..7737b5bd26af 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -255,10 +255,7 @@ #include <linux/fips.h> #include <linux/ptrace.h> #include <linux/kmemcheck.h> - -#ifdef CONFIG_GENERIC_HARDIRQS -# include <linux/irq.h> -#endif +#include <linux/irq.h> #include <asm/processor.h> #include <asm/uaccess.h> diff --git a/drivers/clocksource/em_sti.c b/drivers/clocksource/em_sti.c index 4329a29a5310..b9c81b7c3a3b 100644 --- a/drivers/clocksource/em_sti.c +++ b/drivers/clocksource/em_sti.c @@ -315,68 +315,47 @@ static int em_sti_probe(struct platform_device *pdev) { struct em_sti_priv *p; struct resource *res; - int irq, ret; + int irq; - p = kzalloc(sizeof(*p), GFP_KERNEL); + p = devm_kzalloc(&pdev->dev, sizeof(*p), GFP_KERNEL); if (p == NULL) { dev_err(&pdev->dev, "failed to allocate driver data\n"); - ret = -ENOMEM; - goto err0; + return -ENOMEM; } p->pdev = pdev; platform_set_drvdata(pdev, p); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(&pdev->dev, "failed to get I/O memory\n"); - ret = -EINVAL; - goto err0; - } - irq = platform_get_irq(pdev, 0); if (irq < 0) { dev_err(&pdev->dev, "failed to get irq\n"); - ret = -EINVAL; - goto err0; + return -EINVAL; } /* map memory, let base point to the STI instance */ - p->base = ioremap_nocache(res->start, resource_size(res)); - if (p->base == NULL) { - dev_err(&pdev->dev, "failed to remap I/O memory\n"); - ret = -ENXIO; - goto err0; - } + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + p->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(p->base)) + return PTR_ERR(p->base); /* get hold of clock */ - p->clk = clk_get(&pdev->dev, "sclk"); + p->clk = devm_clk_get(&pdev->dev, "sclk"); if (IS_ERR(p->clk)) { dev_err(&pdev->dev, "cannot get clock\n"); - ret = PTR_ERR(p->clk); - goto err1; + return PTR_ERR(p->clk); } - if (request_irq(irq, em_sti_interrupt, - IRQF_TIMER | IRQF_IRQPOLL | IRQF_NOBALANCING, - dev_name(&pdev->dev), p)) { + if (devm_request_irq(&pdev->dev, irq, em_sti_interrupt, + IRQF_TIMER | IRQF_IRQPOLL | IRQF_NOBALANCING, + dev_name(&pdev->dev), p)) { dev_err(&pdev->dev, "failed to request low IRQ\n"); - ret = -ENOENT; - goto err2; + return -ENOENT; } raw_spin_lock_init(&p->lock); em_sti_register_clockevent(p); em_sti_register_clocksource(p); return 0; - -err2: - clk_put(p->clk); -err1: - iounmap(p->base); -err0: - kfree(p); - return ret; } static int em_sti_remove(struct platform_device *pdev) diff --git a/drivers/clocksource/nomadik-mtu.c b/drivers/clocksource/nomadik-mtu.c index 7d2c2c56f73c..1b74bea12385 100644 --- a/drivers/clocksource/nomadik-mtu.c +++ b/drivers/clocksource/nomadik-mtu.c @@ -165,7 +165,8 @@ static void nmdk_clkevt_resume(struct clock_event_device *cedev) static struct clock_event_device nmdk_clkevt = { .name = "mtu_1", - .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC, + .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC | + CLOCK_EVT_FEAT_DYNIRQ, .rating = 200, .set_mode = nmdk_clkevt_mode, .set_next_event = nmdk_clkevt_next, diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c index 08d0c418c94a..0965e9848b3d 100644 --- a/drivers/clocksource/sh_cmt.c +++ b/drivers/clocksource/sh_cmt.c @@ -37,6 +37,7 @@ struct sh_cmt_priv { void __iomem *mapbase; + void __iomem *mapbase_str; struct clk *clk; unsigned long width; /* 16 or 32 bit version of hardware block */ unsigned long overflow_bit; @@ -79,6 +80,12 @@ struct sh_cmt_priv { * CMCSR 0xffca0060 16-bit * CMCNT 0xffca0064 32-bit * CMCOR 0xffca0068 32-bit + * + * "32-bit counter and 32-bit control" as found on r8a73a4 and r8a7790: + * CMSTR 0xffca0500 32-bit + * CMCSR 0xffca0510 32-bit + * CMCNT 0xffca0514 32-bit + * CMCOR 0xffca0518 32-bit */ static unsigned long sh_cmt_read16(void __iomem *base, unsigned long offs) @@ -109,9 +116,7 @@ static void sh_cmt_write32(void __iomem *base, unsigned long offs, static inline unsigned long sh_cmt_read_cmstr(struct sh_cmt_priv *p) { - struct sh_timer_config *cfg = p->pdev->dev.platform_data; - - return p->read_control(p->mapbase - cfg->channel_offset, 0); + return p->read_control(p->mapbase_str, 0); } static inline unsigned long sh_cmt_read_cmcsr(struct sh_cmt_priv *p) @@ -127,9 +132,7 @@ static inline unsigned long sh_cmt_read_cmcnt(struct sh_cmt_priv *p) static inline void sh_cmt_write_cmstr(struct sh_cmt_priv *p, unsigned long value) { - struct sh_timer_config *cfg = p->pdev->dev.platform_data; - - p->write_control(p->mapbase - cfg->channel_offset, 0, value); + p->write_control(p->mapbase_str, 0, value); } static inline void sh_cmt_write_cmcsr(struct sh_cmt_priv *p, @@ -676,7 +679,7 @@ static int sh_cmt_register(struct sh_cmt_priv *p, char *name, static int sh_cmt_setup(struct sh_cmt_priv *p, struct platform_device *pdev) { struct sh_timer_config *cfg = pdev->dev.platform_data; - struct resource *res; + struct resource *res, *res2; int irq, ret; ret = -ENXIO; @@ -694,6 +697,9 @@ static int sh_cmt_setup(struct sh_cmt_priv *p, struct platform_device *pdev) goto err0; } + /* optional resource for the shared timer start/stop register */ + res2 = platform_get_resource(p->pdev, IORESOURCE_MEM, 1); + irq = platform_get_irq(p->pdev, 0); if (irq < 0) { dev_err(&p->pdev->dev, "failed to get irq\n"); @@ -707,6 +713,15 @@ static int sh_cmt_setup(struct sh_cmt_priv *p, struct platform_device *pdev) goto err0; } + /* map second resource for CMSTR */ + p->mapbase_str = ioremap_nocache(res2 ? res2->start : + res->start - cfg->channel_offset, + res2 ? resource_size(res2) : 2); + if (p->mapbase_str == NULL) { + dev_err(&p->pdev->dev, "failed to remap I/O second memory\n"); + goto err1; + } + /* request irq using setup_irq() (too early for request_irq()) */ p->irqaction.name = dev_name(&p->pdev->dev); p->irqaction.handler = sh_cmt_interrupt; @@ -719,11 +734,17 @@ static int sh_cmt_setup(struct sh_cmt_priv *p, struct platform_device *pdev) if (IS_ERR(p->clk)) { dev_err(&p->pdev->dev, "cannot get clock\n"); ret = PTR_ERR(p->clk); - goto err1; + goto err2; } - p->read_control = sh_cmt_read16; - p->write_control = sh_cmt_write16; + if (res2 && (resource_size(res2) == 4)) { + /* assume both CMSTR and CMCSR to be 32-bit */ + p->read_control = sh_cmt_read32; + p->write_control = sh_cmt_write32; + } else { + p->read_control = sh_cmt_read16; + p->write_control = sh_cmt_write16; + } if (resource_size(res) == 6) { p->width = 16; @@ -752,22 +773,23 @@ static int sh_cmt_setup(struct sh_cmt_priv *p, struct platform_device *pdev) cfg->clocksource_rating); if (ret) { dev_err(&p->pdev->dev, "registration failed\n"); - goto err2; + goto err3; } p->cs_enabled = false; ret = setup_irq(irq, &p->irqaction); if (ret) { dev_err(&p->pdev->dev, "failed to request irq %d\n", irq); - goto err2; + goto err3; } platform_set_drvdata(pdev, p); return 0; -err2: +err3: clk_put(p->clk); - +err2: + iounmap(p->mapbase_str); err1: iounmap(p->mapbase); err0: diff --git a/drivers/clocksource/time-armada-370-xp.c b/drivers/clocksource/time-armada-370-xp.c index 847cab6f6e31..0198504ef6b0 100644 --- a/drivers/clocksource/time-armada-370-xp.c +++ b/drivers/clocksource/time-armada-370-xp.c @@ -13,6 +13,19 @@ * * Timer 0 is used as free-running clocksource, while timer 1 is * used as clock_event_device. + * + * --- + * Clocksource driver for Armada 370 and Armada XP SoC. + * This driver implements one compatible string for each SoC, given + * each has its own characteristics: + * + * * Armada 370 has no 25 MHz fixed timer. + * + * * Armada XP cannot work properly without such 25 MHz fixed timer as + * doing otherwise leads to using a clocksource whose frequency varies + * when doing cpufreq frequency changes. + * + * See Documentation/devicetree/bindings/timer/marvell,armada-370-xp-timer.txt */ #include <linux/init.h> @@ -30,19 +43,18 @@ #include <linux/module.h> #include <linux/sched_clock.h> #include <linux/percpu.h> -#include <linux/time-armada-370-xp.h> /* * Timer block registers. */ #define TIMER_CTRL_OFF 0x0000 -#define TIMER0_EN 0x0001 -#define TIMER0_RELOAD_EN 0x0002 -#define TIMER0_25MHZ 0x0800 +#define TIMER0_EN BIT(0) +#define TIMER0_RELOAD_EN BIT(1) +#define TIMER0_25MHZ BIT(11) #define TIMER0_DIV(div) ((div) << 19) -#define TIMER1_EN 0x0004 -#define TIMER1_RELOAD_EN 0x0008 -#define TIMER1_25MHZ 0x1000 +#define TIMER1_EN BIT(2) +#define TIMER1_RELOAD_EN BIT(3) +#define TIMER1_25MHZ BIT(12) #define TIMER1_DIV(div) ((div) << 22) #define TIMER_EVENTS_STATUS 0x0004 #define TIMER0_CLR_MASK (~0x1) @@ -72,6 +84,18 @@ static u32 ticks_per_jiffy; static struct clock_event_device __percpu *armada_370_xp_evt; +static void timer_ctrl_clrset(u32 clr, u32 set) +{ + writel((readl(timer_base + TIMER_CTRL_OFF) & ~clr) | set, + timer_base + TIMER_CTRL_OFF); +} + +static void local_timer_ctrl_clrset(u32 clr, u32 set) +{ + writel((readl(local_base + TIMER_CTRL_OFF) & ~clr) | set, + local_base + TIMER_CTRL_OFF); +} + static u32 notrace armada_370_xp_read_sched_clock(void) { return ~readl(timer_base + TIMER0_VAL_OFF); @@ -84,7 +108,6 @@ static int armada_370_xp_clkevt_next_event(unsigned long delta, struct clock_event_device *dev) { - u32 u; /* * Clear clockevent timer interrupt. */ @@ -98,11 +121,8 @@ armada_370_xp_clkevt_next_event(unsigned long delta, /* * Enable the timer. */ - u = readl(local_base + TIMER_CTRL_OFF); - u = ((u & ~TIMER0_RELOAD_EN) | TIMER0_EN | - TIMER0_DIV(TIMER_DIVIDER_SHIFT)); - writel(u, local_base + TIMER_CTRL_OFF); - + local_timer_ctrl_clrset(TIMER0_RELOAD_EN, + TIMER0_EN | TIMER0_DIV(TIMER_DIVIDER_SHIFT)); return 0; } @@ -110,8 +130,6 @@ static void armada_370_xp_clkevt_mode(enum clock_event_mode mode, struct clock_event_device *dev) { - u32 u; - if (mode == CLOCK_EVT_MODE_PERIODIC) { /* @@ -123,18 +141,14 @@ armada_370_xp_clkevt_mode(enum clock_event_mode mode, /* * Enable timer. */ - - u = readl(local_base + TIMER_CTRL_OFF); - - writel((u | TIMER0_EN | TIMER0_RELOAD_EN | - TIMER0_DIV(TIMER_DIVIDER_SHIFT)), - local_base + TIMER_CTRL_OFF); + local_timer_ctrl_clrset(0, TIMER0_RELOAD_EN | + TIMER0_EN | + TIMER0_DIV(TIMER_DIVIDER_SHIFT)); } else { /* * Disable timer. */ - u = readl(local_base + TIMER_CTRL_OFF); - writel(u & ~TIMER0_EN, local_base + TIMER_CTRL_OFF); + local_timer_ctrl_clrset(TIMER0_EN, 0); /* * ACK pending timer interrupt. @@ -163,14 +177,14 @@ static irqreturn_t armada_370_xp_timer_interrupt(int irq, void *dev_id) */ static int armada_370_xp_timer_setup(struct clock_event_device *evt) { - u32 u; + u32 clr = 0, set = 0; int cpu = smp_processor_id(); - u = readl(local_base + TIMER_CTRL_OFF); if (timer25Mhz) - writel(u | TIMER0_25MHZ, local_base + TIMER_CTRL_OFF); + set = TIMER0_25MHZ; else - writel(u & ~TIMER0_25MHZ, local_base + TIMER_CTRL_OFF); + clr = TIMER0_25MHZ; + local_timer_ctrl_clrset(clr, set); evt->name = "armada_370_xp_per_cpu_tick", evt->features = CLOCK_EVT_FEAT_ONESHOT | @@ -217,36 +231,21 @@ static struct notifier_block armada_370_xp_timer_cpu_nb = { .notifier_call = armada_370_xp_timer_cpu_notify, }; -void __init armada_370_xp_timer_init(void) +static void __init armada_370_xp_timer_common_init(struct device_node *np) { - u32 u; - struct device_node *np; + u32 clr = 0, set = 0; int res; - np = of_find_compatible_node(NULL, NULL, "marvell,armada-370-xp-timer"); timer_base = of_iomap(np, 0); WARN_ON(!timer_base); local_base = of_iomap(np, 1); - if (of_find_property(np, "marvell,timer-25Mhz", NULL)) { - /* The fixed 25MHz timer is available so let's use it */ - u = readl(timer_base + TIMER_CTRL_OFF); - writel(u | TIMER0_25MHZ, - timer_base + TIMER_CTRL_OFF); - timer_clk = 25000000; - } else { - unsigned long rate = 0; - struct clk *clk = of_clk_get(np, 0); - WARN_ON(IS_ERR(clk)); - rate = clk_get_rate(clk); - - u = readl(timer_base + TIMER_CTRL_OFF); - writel(u & ~(TIMER0_25MHZ), - timer_base + TIMER_CTRL_OFF); - - timer_clk = rate / TIMER_DIVIDER; - timer25Mhz = false; - } + if (timer25Mhz) + set = TIMER0_25MHZ; + else + clr = TIMER0_25MHZ; + timer_ctrl_clrset(clr, set); + local_timer_ctrl_clrset(clr, set); /* * We use timer 0 as clocksource, and private(local) timer 0 @@ -268,10 +267,8 @@ void __init armada_370_xp_timer_init(void) writel(0xffffffff, timer_base + TIMER0_VAL_OFF); writel(0xffffffff, timer_base + TIMER0_RELOAD_OFF); - u = readl(timer_base + TIMER_CTRL_OFF); - - writel((u | TIMER0_EN | TIMER0_RELOAD_EN | - TIMER0_DIV(TIMER_DIVIDER_SHIFT)), timer_base + TIMER_CTRL_OFF); + timer_ctrl_clrset(0, TIMER0_EN | TIMER0_RELOAD_EN | + TIMER0_DIV(TIMER_DIVIDER_SHIFT)); clocksource_mmio_init(timer_base + TIMER0_VAL_OFF, "armada_370_xp_clocksource", @@ -293,3 +290,29 @@ void __init armada_370_xp_timer_init(void) if (!res) armada_370_xp_timer_setup(this_cpu_ptr(armada_370_xp_evt)); } + +static void __init armada_xp_timer_init(struct device_node *np) +{ + struct clk *clk = of_clk_get_by_name(np, "fixed"); + + /* The 25Mhz fixed clock is mandatory, and must always be available */ + BUG_ON(IS_ERR(clk)); + timer_clk = clk_get_rate(clk); + + armada_370_xp_timer_common_init(np); +} +CLOCKSOURCE_OF_DECLARE(armada_xp, "marvell,armada-xp-timer", + armada_xp_timer_init); + +static void __init armada_370_timer_init(struct device_node *np) +{ + struct clk *clk = of_clk_get(np, 0); + + BUG_ON(IS_ERR(clk)); + timer_clk = clk_get_rate(clk) / TIMER_DIVIDER; + timer25Mhz = false; + + armada_370_xp_timer_common_init(np); +} +CLOCKSOURCE_OF_DECLARE(armada_370, "marvell,armada-370-timer", + armada_370_timer_init); diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 5c75e3147a60..43c24aa756f6 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -280,13 +280,6 @@ static void __cpufreq_notify_transition(struct cpufreq_policy *policy, switch (state) { case CPUFREQ_PRECHANGE: - if (WARN(policy->transition_ongoing == - cpumask_weight(policy->cpus), - "In middle of another frequency transition\n")) - return; - - policy->transition_ongoing++; - /* detect if the driver reported a value as "old frequency" * which is not equal to what the cpufreq core thinks is * "old frequency". @@ -306,12 +299,6 @@ static void __cpufreq_notify_transition(struct cpufreq_policy *policy, break; case CPUFREQ_POSTCHANGE: - if (WARN(!policy->transition_ongoing, - "No frequency transition in progress\n")) - return; - - policy->transition_ongoing--; - adjust_jiffies(CPUFREQ_POSTCHANGE, freqs); pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new, (unsigned long)freqs->cpu); @@ -437,7 +424,7 @@ static int __cpufreq_set_policy(struct cpufreq_policy *policy, static ssize_t store_##file_name \ (struct cpufreq_policy *policy, const char *buf, size_t count) \ { \ - unsigned int ret; \ + int ret; \ struct cpufreq_policy new_policy; \ \ ret = cpufreq_get_policy(&new_policy, policy->cpu); \ @@ -490,7 +477,7 @@ static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf) static ssize_t store_scaling_governor(struct cpufreq_policy *policy, const char *buf, size_t count) { - unsigned int ret; + int ret; char str_governor[16]; struct cpufreq_policy new_policy; @@ -694,8 +681,13 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, struct freq_attr *fattr = to_attr(attr); ssize_t ret = -EINVAL; + get_online_cpus(); + + if (!cpu_online(policy->cpu)) + goto unlock; + if (!down_read_trylock(&cpufreq_rwsem)) - goto exit; + goto unlock; if (lock_policy_rwsem_write(policy->cpu) < 0) goto up_read; @@ -709,7 +701,9 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, up_read: up_read(&cpufreq_rwsem); -exit: +unlock: + put_online_cpus(); + return ret; } @@ -912,11 +906,11 @@ static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu) struct cpufreq_policy *policy; unsigned long flags; - write_lock_irqsave(&cpufreq_driver_lock, flags); + read_lock_irqsave(&cpufreq_driver_lock, flags); policy = per_cpu(cpufreq_cpu_data_fallback, cpu); - write_unlock_irqrestore(&cpufreq_driver_lock, flags); + read_unlock_irqrestore(&cpufreq_driver_lock, flags); return policy; } @@ -953,6 +947,21 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy) kfree(policy); } +static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu) +{ + if (cpu == policy->cpu) + return; + + policy->last_cpu = policy->cpu; + policy->cpu = cpu; + +#ifdef CONFIG_CPU_FREQ_TABLE + cpufreq_frequency_table_update_policy_cpu(policy); +#endif + blocking_notifier_call_chain(&cpufreq_policy_notifier_list, + CPUFREQ_UPDATE_POLICY_CPU, policy); +} + static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif, bool frozen) { @@ -1006,7 +1015,18 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif, if (!policy) goto nomem_out; - policy->cpu = cpu; + + /* + * In the resume path, since we restore a saved policy, the assignment + * to policy->cpu is like an update of the existing policy, rather than + * the creation of a brand new one. So we need to perform this update + * by invoking update_policy_cpu(). + */ + if (frozen && cpu != policy->cpu) + update_policy_cpu(policy, cpu); + else + policy->cpu = cpu; + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; cpumask_copy(policy->cpus, cpumask_of(cpu)); @@ -1098,18 +1118,6 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) return __cpufreq_add_dev(dev, sif, false); } -static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu) -{ - policy->last_cpu = policy->cpu; - policy->cpu = cpu; - -#ifdef CONFIG_CPU_FREQ_TABLE - cpufreq_frequency_table_update_policy_cpu(policy); -#endif - blocking_notifier_call_chain(&cpufreq_policy_notifier_list, - CPUFREQ_UPDATE_POLICY_CPU, policy); -} - static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy, unsigned int old_cpu, bool frozen) { @@ -1141,22 +1149,14 @@ static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy, return cpu_dev->id; } -/** - * __cpufreq_remove_dev - remove a CPU device - * - * Removes the cpufreq interface for a CPU device. - * Caller should already have policy_rwsem in write mode for this CPU. - * This routine frees the rwsem before returning. - */ -static int __cpufreq_remove_dev(struct device *dev, - struct subsys_interface *sif, bool frozen) +static int __cpufreq_remove_dev_prepare(struct device *dev, + struct subsys_interface *sif, + bool frozen) { unsigned int cpu = dev->id, cpus; int new_cpu, ret; unsigned long flags; struct cpufreq_policy *policy; - struct kobject *kobj; - struct completion *cmp; pr_debug("%s: unregistering CPU %u\n", __func__, cpu); @@ -1196,8 +1196,9 @@ static int __cpufreq_remove_dev(struct device *dev, cpumask_clear_cpu(cpu, policy->cpus); unlock_policy_rwsem_write(cpu); - if (cpu != policy->cpu && !frozen) { - sysfs_remove_link(&dev->kobj, "cpufreq"); + if (cpu != policy->cpu) { + if (!frozen) + sysfs_remove_link(&dev->kobj, "cpufreq"); } else if (cpus > 1) { new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu, frozen); @@ -1213,6 +1214,33 @@ static int __cpufreq_remove_dev(struct device *dev, } } + return 0; +} + +static int __cpufreq_remove_dev_finish(struct device *dev, + struct subsys_interface *sif, + bool frozen) +{ + unsigned int cpu = dev->id, cpus; + int ret; + unsigned long flags; + struct cpufreq_policy *policy; + struct kobject *kobj; + struct completion *cmp; + + read_lock_irqsave(&cpufreq_driver_lock, flags); + policy = per_cpu(cpufreq_cpu_data, cpu); + read_unlock_irqrestore(&cpufreq_driver_lock, flags); + + if (!policy) { + pr_debug("%s: No cpu_data found\n", __func__); + return -EINVAL; + } + + lock_policy_rwsem_read(cpu); + cpus = cpumask_weight(policy->cpus); + unlock_policy_rwsem_read(cpu); + /* If cpu is last user of policy, free policy */ if (cpus == 1) { if (cpufreq_driver->target) { @@ -1272,6 +1300,27 @@ static int __cpufreq_remove_dev(struct device *dev, return 0; } +/** + * __cpufreq_remove_dev - remove a CPU device + * + * Removes the cpufreq interface for a CPU device. + * Caller should already have policy_rwsem in write mode for this CPU. + * This routine frees the rwsem before returning. + */ +static inline int __cpufreq_remove_dev(struct device *dev, + struct subsys_interface *sif, + bool frozen) +{ + int ret; + + ret = __cpufreq_remove_dev_prepare(dev, sif, frozen); + + if (!ret) + ret = __cpufreq_remove_dev_finish(dev, sif, frozen); + + return ret; +} + static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) { unsigned int cpu = dev->id; @@ -1610,8 +1659,6 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, if (cpufreq_disabled()) return -ENODEV; - if (policy->transition_ongoing) - return -EBUSY; /* Make sure that target_freq is within supported range */ if (target_freq > policy->max) @@ -1692,8 +1739,9 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, policy->cpu, event); mutex_lock(&cpufreq_governor_lock); - if ((!policy->governor_enabled && (event == CPUFREQ_GOV_STOP)) || - (policy->governor_enabled && (event == CPUFREQ_GOV_START))) { + if ((policy->governor_enabled && event == CPUFREQ_GOV_START) + || (!policy->governor_enabled + && (event == CPUFREQ_GOV_LIMITS || event == CPUFREQ_GOV_STOP))) { mutex_unlock(&cpufreq_governor_lock); return -EBUSY; } @@ -1994,7 +2042,11 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb, break; case CPU_DOWN_PREPARE: - __cpufreq_remove_dev(dev, NULL, frozen); + __cpufreq_remove_dev_prepare(dev, NULL, frozen); + break; + + case CPU_POST_DEAD: + __cpufreq_remove_dev_finish(dev, NULL, frozen); break; case CPU_DOWN_FAILED: diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 04452f026ed0..4cf0d2805cb2 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -74,7 +74,7 @@ static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf) for (i = 0; i < stat->state_num; i++) { len += sprintf(buf + len, "%u %llu\n", stat->freq_table[i], (unsigned long long) - cputime64_to_clock_t(stat->time_in_state[i])); + jiffies_64_to_clock_t(stat->time_in_state[i])); } return len; } diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 6efd96c196b2..9733f29ed148 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -522,6 +522,11 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { ICPU(0x2a, default_policy), ICPU(0x2d, default_policy), ICPU(0x3a, default_policy), + ICPU(0x3c, default_policy), + ICPU(0x3e, default_policy), + ICPU(0x3f, default_policy), + ICPU(0x45, default_policy), + ICPU(0x46, default_policy), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index 3ac499d5a207..6e11701f0fca 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -331,7 +331,8 @@ struct cpuidle_driver *cpuidle_driver_ref(void) spin_lock(&cpuidle_driver_lock); drv = cpuidle_get_driver(); - drv->refcnt++; + if (drv) + drv->refcnt++; spin_unlock(&cpuidle_driver_lock); return drv; diff --git a/drivers/dma/dw/Kconfig b/drivers/dma/dw/Kconfig index dde13248b681..dcfe964cc8dc 100644 --- a/drivers/dma/dw/Kconfig +++ b/drivers/dma/dw/Kconfig @@ -4,7 +4,6 @@ config DW_DMAC_CORE tristate "Synopsys DesignWare AHB DMA support" - depends on GENERIC_HARDIRQS select DMA_ENGINE config DW_DMAC diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 349b16160ac9..b6ed304863eb 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -203,6 +203,14 @@ config GPIO_MXS select GPIO_GENERIC select GENERIC_IRQ_CHIP +config GPIO_OCTEON + tristate "Cavium OCTEON GPIO" + depends on GPIOLIB && CAVIUM_OCTEON_SOC + default y + help + Say yes here to support the on-chip GPIO lines on the OCTEON + family of SOCs. + config GPIO_PL061 bool "PrimeCell PL061 GPIO support" depends on ARM && ARM_AMBA @@ -314,7 +322,7 @@ config GPIO_ICH config GPIO_VX855 tristate "VIA VX855/VX875 GPIO" - depends on PCI && GENERIC_HARDIRQS + depends on PCI select MFD_CORE select MFD_VX855 help @@ -388,7 +396,7 @@ config GPIO_MAX732X config GPIO_MAX732X_IRQ bool "Interrupt controller support for MAX732x" - depends on GPIO_MAX732X=y && GENERIC_HARDIRQS + depends on GPIO_MAX732X=y help Say yes here to enable the max732x to be used as an interrupt controller. It requires the driver to be built in the kernel. @@ -653,7 +661,7 @@ config GPIO_TIMBERDALE config GPIO_RDC321X tristate "RDC R-321x GPIO support" - depends on PCI && GENERIC_HARDIRQS + depends on PCI select MFD_CORE select MFD_RDC321X help diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile index 97438bf8434a..98e23ebba2cf 100644 --- a/drivers/gpio/Makefile +++ b/drivers/gpio/Makefile @@ -52,6 +52,7 @@ obj-$(CONFIG_GPIO_MSM_V2) += gpio-msm-v2.o obj-$(CONFIG_GPIO_MVEBU) += gpio-mvebu.o obj-$(CONFIG_GPIO_MXC) += gpio-mxc.o obj-$(CONFIG_GPIO_MXS) += gpio-mxs.o +obj-$(CONFIG_GPIO_OCTEON) += gpio-octeon.o obj-$(CONFIG_ARCH_OMAP) += gpio-omap.o obj-$(CONFIG_GPIO_PCA953X) += gpio-pca953x.o obj-$(CONFIG_GPIO_PCF857X) += gpio-pcf857x.o diff --git a/drivers/gpio/gpio-octeon.c b/drivers/gpio/gpio-octeon.c new file mode 100644 index 000000000000..71a4a318315d --- /dev/null +++ b/drivers/gpio/gpio-octeon.c @@ -0,0 +1,157 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2011, 2012 Cavium Inc. + */ + +#include <linux/platform_device.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/gpio.h> +#include <linux/io.h> + +#include <asm/octeon/octeon.h> +#include <asm/octeon/cvmx-gpio-defs.h> + +#define RX_DAT 0x80 +#define TX_SET 0x88 +#define TX_CLEAR 0x90 +/* + * The address offset of the GPIO configuration register for a given + * line. + */ +static unsigned int bit_cfg_reg(unsigned int offset) +{ + /* + * The register stride is 8, with a discontinuity after the + * first 16. + */ + if (offset < 16) + return 8 * offset; + else + return 8 * (offset - 16) + 0x100; +} + +struct octeon_gpio { + struct gpio_chip chip; + u64 register_base; +}; + +static int octeon_gpio_dir_in(struct gpio_chip *chip, unsigned offset) +{ + struct octeon_gpio *gpio = container_of(chip, struct octeon_gpio, chip); + + cvmx_write_csr(gpio->register_base + bit_cfg_reg(offset), 0); + return 0; +} + +static void octeon_gpio_set(struct gpio_chip *chip, unsigned offset, int value) +{ + struct octeon_gpio *gpio = container_of(chip, struct octeon_gpio, chip); + u64 mask = 1ull << offset; + u64 reg = gpio->register_base + (value ? TX_SET : TX_CLEAR); + cvmx_write_csr(reg, mask); +} + +static int octeon_gpio_dir_out(struct gpio_chip *chip, unsigned offset, + int value) +{ + struct octeon_gpio *gpio = container_of(chip, struct octeon_gpio, chip); + union cvmx_gpio_bit_cfgx cfgx; + + octeon_gpio_set(chip, offset, value); + + cfgx.u64 = 0; + cfgx.s.tx_oe = 1; + + cvmx_write_csr(gpio->register_base + bit_cfg_reg(offset), cfgx.u64); + return 0; +} + +static int octeon_gpio_get(struct gpio_chip *chip, unsigned offset) +{ + struct octeon_gpio *gpio = container_of(chip, struct octeon_gpio, chip); + u64 read_bits = cvmx_read_csr(gpio->register_base + RX_DAT); + + return ((1ull << offset) & read_bits) != 0; +} + +static int octeon_gpio_probe(struct platform_device *pdev) +{ + struct octeon_gpio *gpio; + struct gpio_chip *chip; + struct resource *res_mem; + int err = 0; + + gpio = devm_kzalloc(&pdev->dev, sizeof(*gpio), GFP_KERNEL); + if (!gpio) + return -ENOMEM; + chip = &gpio->chip; + + res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (res_mem == NULL) { + dev_err(&pdev->dev, "found no memory resource\n"); + err = -ENXIO; + goto out; + } + if (!devm_request_mem_region(&pdev->dev, res_mem->start, + resource_size(res_mem), + res_mem->name)) { + dev_err(&pdev->dev, "request_mem_region failed\n"); + err = -ENXIO; + goto out; + } + gpio->register_base = (u64)devm_ioremap(&pdev->dev, res_mem->start, + resource_size(res_mem)); + + pdev->dev.platform_data = chip; + chip->label = "octeon-gpio"; + chip->dev = &pdev->dev; + chip->owner = THIS_MODULE; + chip->base = 0; + chip->can_sleep = 0; + chip->ngpio = 20; + chip->direction_input = octeon_gpio_dir_in; + chip->get = octeon_gpio_get; + chip->direction_output = octeon_gpio_dir_out; + chip->set = octeon_gpio_set; + err = gpiochip_add(chip); + if (err) + goto out; + + dev_info(&pdev->dev, "OCTEON GPIO driver probed.\n"); +out: + return err; +} + +static int octeon_gpio_remove(struct platform_device *pdev) +{ + struct gpio_chip *chip = pdev->dev.platform_data; + return gpiochip_remove(chip); +} + +static struct of_device_id octeon_gpio_match[] = { + { + .compatible = "cavium,octeon-3860-gpio", + }, + {}, +}; +MODULE_DEVICE_TABLE(of, octeon_gpio_match); + +static struct platform_driver octeon_gpio_driver = { + .driver = { + .name = "octeon_gpio", + .owner = THIS_MODULE, + .of_match_table = octeon_gpio_match, + }, + .probe = octeon_gpio_probe, + .remove = octeon_gpio_remove, +}; + +module_platform_driver(octeon_gpio_driver); + +MODULE_DESCRIPTION("Cavium Inc. OCTEON GPIO Driver"); +MODULE_AUTHOR("David Daney"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h index 796dbb212a41..8492b68e873c 100644 --- a/drivers/gpu/drm/ast/ast_drv.h +++ b/drivers/gpu/drm/ast/ast_drv.h @@ -177,7 +177,7 @@ uint8_t ast_get_index_reg_mask(struct ast_private *ast, static inline void ast_open_key(struct ast_private *ast) { - ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xA1, 0xFF, 0x04); + ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0x80, 0xA8); } #define AST_VIDMEM_SIZE_8M 0x00800000 diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 3d13ca6e257f..f6f6cc7fc133 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -416,6 +416,14 @@ static void drm_fb_helper_dpms(struct fb_info *info, int dpms_mode) return; /* + * fbdev->blank can be called from irq context in case of a panic. + * Since we already have our own special panic handler which will + * restore the fbdev console mode completely, just bail out early. + */ + if (oops_in_progress) + return; + + /* * For each CRTC in this fb, turn the connectors on/off. */ drm_modeset_lock_all(dev); diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 9b265a4c6a3d..c27a21034a5e 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1676,7 +1676,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) return 0; out_gem_unload: - if (dev_priv->mm.inactive_shrinker.shrink) + if (dev_priv->mm.inactive_shrinker.scan_objects) unregister_shrinker(&dev_priv->mm.inactive_shrinker); if (dev->pdev->msi_enabled) @@ -1715,7 +1715,7 @@ int i915_driver_unload(struct drm_device *dev) i915_teardown_sysfs(dev); - if (dev_priv->mm.inactive_shrinker.shrink) + if (dev_priv->mm.inactive_shrinker.scan_objects) unregister_shrinker(&dev_priv->mm.inactive_shrinker); mutex_lock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d9e337feef14..8507c6d1e642 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -57,10 +57,12 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, struct drm_i915_fence_reg *fence, bool enable); -static int i915_gem_inactive_shrink(struct shrinker *shrinker, - struct shrink_control *sc); +static unsigned long i915_gem_inactive_count(struct shrinker *shrinker, + struct shrink_control *sc); +static unsigned long i915_gem_inactive_scan(struct shrinker *shrinker, + struct shrink_control *sc); static long i915_gem_purge(struct drm_i915_private *dev_priv, long target); -static void i915_gem_shrink_all(struct drm_i915_private *dev_priv); +static long i915_gem_shrink_all(struct drm_i915_private *dev_priv); static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); static bool cpu_cache_is_coherent(struct drm_device *dev, @@ -1769,16 +1771,21 @@ i915_gem_purge(struct drm_i915_private *dev_priv, long target) return __i915_gem_shrink(dev_priv, target, true); } -static void +static long i915_gem_shrink_all(struct drm_i915_private *dev_priv) { struct drm_i915_gem_object *obj, *next; + long freed = 0; i915_gem_evict_everything(dev_priv->dev); list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, - global_list) + global_list) { + if (obj->pages_pin_count == 0) + freed += obj->base.size >> PAGE_SHIFT; i915_gem_object_put_pages(obj); + } + return freed; } static int @@ -4558,7 +4565,8 @@ i915_gem_load(struct drm_device *dev) dev_priv->mm.interruptible = true; - dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink; + dev_priv->mm.inactive_shrinker.scan_objects = i915_gem_inactive_scan; + dev_priv->mm.inactive_shrinker.count_objects = i915_gem_inactive_count; dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS; register_shrinker(&dev_priv->mm.inactive_shrinker); } @@ -4781,8 +4789,8 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) #endif } -static int -i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) +static unsigned long +i915_gem_inactive_count(struct shrinker *shrinker, struct shrink_control *sc) { struct drm_i915_private *dev_priv = container_of(shrinker, @@ -4790,45 +4798,35 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) mm.inactive_shrinker); struct drm_device *dev = dev_priv->dev; struct drm_i915_gem_object *obj; - int nr_to_scan = sc->nr_to_scan; bool unlock = true; - int cnt; + unsigned long count; if (!mutex_trylock(&dev->struct_mutex)) { if (!mutex_is_locked_by(&dev->struct_mutex, current)) - return 0; + return SHRINK_STOP; if (dev_priv->mm.shrinker_no_lock_stealing) - return 0; + return SHRINK_STOP; unlock = false; } - if (nr_to_scan) { - nr_to_scan -= i915_gem_purge(dev_priv, nr_to_scan); - if (nr_to_scan > 0) - nr_to_scan -= __i915_gem_shrink(dev_priv, nr_to_scan, - false); - if (nr_to_scan > 0) - i915_gem_shrink_all(dev_priv); - } - - cnt = 0; + count = 0; list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) if (obj->pages_pin_count == 0) - cnt += obj->base.size >> PAGE_SHIFT; + count += obj->base.size >> PAGE_SHIFT; list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { if (obj->active) continue; if (obj->pin_count == 0 && obj->pages_pin_count == 0) - cnt += obj->base.size >> PAGE_SHIFT; + count += obj->base.size >> PAGE_SHIFT; } if (unlock) mutex_unlock(&dev->struct_mutex); - return cnt; + return count; } /* All the new VM stuff */ @@ -4892,6 +4890,40 @@ unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, return 0; } +static unsigned long +i915_gem_inactive_scan(struct shrinker *shrinker, struct shrink_control *sc) +{ + struct drm_i915_private *dev_priv = + container_of(shrinker, + struct drm_i915_private, + mm.inactive_shrinker); + struct drm_device *dev = dev_priv->dev; + int nr_to_scan = sc->nr_to_scan; + unsigned long freed; + bool unlock = true; + + if (!mutex_trylock(&dev->struct_mutex)) { + if (!mutex_is_locked_by(&dev->struct_mutex, current)) + return 0; + + if (dev_priv->mm.shrinker_no_lock_stealing) + return 0; + + unlock = false; + } + + freed = i915_gem_purge(dev_priv, nr_to_scan); + if (freed < nr_to_scan) + freed += __i915_gem_shrink(dev_priv, nr_to_scan, + false); + if (freed < nr_to_scan) + freed += i915_gem_shrink_all(dev_priv); + + if (unlock) + mutex_unlock(&dev->struct_mutex); + return freed; +} + struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, struct i915_address_space *vm) { diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/init.c b/drivers/gpu/drm/nouveau/core/subdev/bios/init.c index 2e11ea02cf87..57cda2a1437b 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/bios/init.c +++ b/drivers/gpu/drm/nouveau/core/subdev/bios/init.c @@ -579,8 +579,22 @@ static void init_reserved(struct nvbios_init *init) { u8 opcode = nv_ro08(init->bios, init->offset); - trace("RESERVED\t0x%02x\n", opcode); - init->offset += 1; + u8 length, i; + + switch (opcode) { + case 0xaa: + length = 4; + break; + default: + length = 1; + break; + } + + trace("RESERVED 0x%02x\t", opcode); + for (i = 1; i < length; i++) + cont(" 0x%02x", nv_ro08(init->bios, init->offset + i)); + cont("\n"); + init->offset += length; } /** @@ -1437,7 +1451,7 @@ init_configure_mem(struct nvbios_init *init) data = init_rdvgai(init, 0x03c4, 0x01); init_wrvgai(init, 0x03c4, 0x01, data | 0x20); - while ((addr = nv_ro32(bios, sdata)) != 0xffffffff) { + for (; (addr = nv_ro32(bios, sdata)) != 0xffffffff; sdata += 4) { switch (addr) { case 0x10021c: /* CKE_NORMAL */ case 0x1002d0: /* CMD_REFRESH */ @@ -2135,6 +2149,7 @@ static struct nvbios_init_opcode { [0x99] = { init_zm_auxch }, [0x9a] = { init_i2c_long_if }, [0xa9] = { init_gpio_ne }, + [0xaa] = { init_reserved }, }; #define init_opcode_nr (sizeof(init_opcode) / sizeof(init_opcode[0])) diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index d2712e6e5d31..7848590f5568 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -278,7 +278,6 @@ nouveau_display_create(struct drm_device *dev) { struct nouveau_drm *drm = nouveau_drm(dev); struct nouveau_display *disp; - u32 pclass = dev->pdev->class >> 8; int ret, gen; disp = drm->display = kzalloc(sizeof(*disp), GFP_KERNEL); @@ -340,29 +339,25 @@ nouveau_display_create(struct drm_device *dev) drm_kms_helper_poll_init(dev); drm_kms_helper_poll_disable(dev); - if (nouveau_modeset == 1 || - (nouveau_modeset < 0 && pclass == PCI_CLASS_DISPLAY_VGA)) { - if (drm->vbios.dcb.entries) { - if (nv_device(drm->device)->card_type < NV_50) - ret = nv04_display_create(dev); - else - ret = nv50_display_create(dev); - } else { - ret = 0; - } - - if (ret) - goto disp_create_err; + if (drm->vbios.dcb.entries) { + if (nv_device(drm->device)->card_type < NV_50) + ret = nv04_display_create(dev); + else + ret = nv50_display_create(dev); + } else { + ret = 0; + } - if (dev->mode_config.num_crtc) { - ret = drm_vblank_init(dev, dev->mode_config.num_crtc); - if (ret) - goto vblank_err; - } + if (ret) + goto disp_create_err; - nouveau_backlight_init(dev); + if (dev->mode_config.num_crtc) { + ret = drm_vblank_init(dev, dev->mode_config.num_crtc); + if (ret) + goto vblank_err; } + nouveau_backlight_init(dev); return 0; vblank_err: diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index 8f6d63d7edd3..a86ecf65c164 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -454,7 +454,8 @@ nouveau_fbcon_init(struct drm_device *dev) int preferred_bpp; int ret; - if (!dev->mode_config.num_crtc) + if (!dev->mode_config.num_crtc || + (dev->pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA) return 0; fbcon = kzalloc(sizeof(struct nouveau_fbdev), GFP_KERNEL); diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c index ca5492ac2da5..0843ebc910d4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c +++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c @@ -104,9 +104,7 @@ nouveau_sgdma_create_ttm(struct ttm_bo_device *bdev, else nvbe->ttm.ttm.func = &nv50_sgdma_backend; - if (ttm_dma_tt_init(&nvbe->ttm, bdev, size, page_flags, dummy_read_page)) { - kfree(nvbe); + if (ttm_dma_tt_init(&nvbe->ttm, bdev, size, page_flags, dummy_read_page)) return NULL; - } return &nvbe->ttm.ttm; } diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index dfac7965ea28..32923d2f6002 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -707,8 +707,9 @@ atombios_get_encoder_mode(struct drm_encoder *encoder) switch (connector->connector_type) { case DRM_MODE_CONNECTOR_DVII: case DRM_MODE_CONNECTOR_HDMIB: /* HDMI-B is basically DL-DVI; analog works fine */ - if (drm_detect_hdmi_monitor(radeon_connector->edid) && - radeon_audio) + if ((radeon_connector->audio == RADEON_AUDIO_ENABLE) || + (drm_detect_hdmi_monitor(radeon_connector->edid) && + (radeon_connector->audio == RADEON_AUDIO_AUTO))) return ATOM_ENCODER_MODE_HDMI; else if (radeon_connector->use_digital) return ATOM_ENCODER_MODE_DVI; @@ -718,8 +719,9 @@ atombios_get_encoder_mode(struct drm_encoder *encoder) case DRM_MODE_CONNECTOR_DVID: case DRM_MODE_CONNECTOR_HDMIA: default: - if (drm_detect_hdmi_monitor(radeon_connector->edid) && - radeon_audio) + if ((radeon_connector->audio == RADEON_AUDIO_ENABLE) || + (drm_detect_hdmi_monitor(radeon_connector->edid) && + (radeon_connector->audio == RADEON_AUDIO_AUTO))) return ATOM_ENCODER_MODE_HDMI; else return ATOM_ENCODER_MODE_DVI; @@ -732,8 +734,9 @@ atombios_get_encoder_mode(struct drm_encoder *encoder) if ((dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) || (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP)) return ATOM_ENCODER_MODE_DP; - else if (drm_detect_hdmi_monitor(radeon_connector->edid) && - radeon_audio) + else if ((radeon_connector->audio == RADEON_AUDIO_ENABLE) || + (drm_detect_hdmi_monitor(radeon_connector->edid) && + (radeon_connector->audio == RADEON_AUDIO_AUTO))) return ATOM_ENCODER_MODE_HDMI; else return ATOM_ENCODER_MODE_DVI; @@ -1647,8 +1650,12 @@ radeon_atom_encoder_dpms_dig(struct drm_encoder *encoder, int mode) atombios_dig_encoder_setup(encoder, ATOM_ENABLE, 0); atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_SETUP, 0, 0); atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0); - /* some early dce3.2 boards have a bug in their transmitter control table */ - if ((rdev->family != CHIP_RV710) && (rdev->family != CHIP_RV730)) + /* some dce3.x boards have a bug in their transmitter control table. + * ACTION_ENABLE_OUTPUT can probably be dropped since ACTION_ENABLE + * does the same thing and more. + */ + if ((rdev->family != CHIP_RV710) && (rdev->family != CHIP_RV730) && + (rdev->family != CHIP_RS880)) atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE_OUTPUT, 0, 0); } if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(encoder)) && connector) { diff --git a/drivers/gpu/drm/radeon/btc_dpm.c b/drivers/gpu/drm/radeon/btc_dpm.c index 084e69414fd1..05ff315e8e9e 100644 --- a/drivers/gpu/drm/radeon/btc_dpm.c +++ b/drivers/gpu/drm/radeon/btc_dpm.c @@ -2340,12 +2340,6 @@ int btc_dpm_set_power_state(struct radeon_device *rdev) return ret; } - ret = rv770_dpm_force_performance_level(rdev, RADEON_DPM_FORCED_LEVEL_AUTO); - if (ret) { - DRM_ERROR("rv770_dpm_force_performance_level failed\n"); - return ret; - } - return 0; } diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 3cce533397c6..899627443030 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -4748,12 +4748,6 @@ int ci_dpm_set_power_state(struct radeon_device *rdev) if (pi->pcie_performance_request) ci_notify_link_speed_change_after_state_change(rdev, new_ps, old_ps); - ret = ci_dpm_force_performance_level(rdev, RADEON_DPM_FORCED_LEVEL_AUTO); - if (ret) { - DRM_ERROR("ci_dpm_force_performance_level failed\n"); - return ret; - } - cik_update_cg(rdev, (RADEON_CG_BLOCK_GFX | RADEON_CG_BLOCK_MC | RADEON_CG_BLOCK_SDMA | diff --git a/drivers/gpu/drm/radeon/ci_smc.c b/drivers/gpu/drm/radeon/ci_smc.c index 53b43dd3cf1e..252e10a41cf5 100644 --- a/drivers/gpu/drm/radeon/ci_smc.c +++ b/drivers/gpu/drm/radeon/ci_smc.c @@ -47,10 +47,11 @@ int ci_copy_bytes_to_smc(struct radeon_device *rdev, u32 smc_start_address, const u8 *src, u32 byte_count, u32 limit) { + unsigned long flags; u32 data, original_data; u32 addr; u32 extra_shift; - int ret; + int ret = 0; if (smc_start_address & 3) return -EINVAL; @@ -59,13 +60,14 @@ int ci_copy_bytes_to_smc(struct radeon_device *rdev, addr = smc_start_address; + spin_lock_irqsave(&rdev->smc_idx_lock, flags); while (byte_count >= 4) { /* SMC address space is BE */ data = (src[0] << 24) | (src[1] << 16) | (src[2] << 8) | src[3]; ret = ci_set_smc_sram_address(rdev, addr, limit); if (ret) - return ret; + goto done; WREG32(SMC_IND_DATA_0, data); @@ -80,7 +82,7 @@ int ci_copy_bytes_to_smc(struct radeon_device *rdev, ret = ci_set_smc_sram_address(rdev, addr, limit); if (ret) - return ret; + goto done; original_data = RREG32(SMC_IND_DATA_0); @@ -97,11 +99,15 @@ int ci_copy_bytes_to_smc(struct radeon_device *rdev, ret = ci_set_smc_sram_address(rdev, addr, limit); if (ret) - return ret; + goto done; WREG32(SMC_IND_DATA_0, data); } - return 0; + +done: + spin_unlock_irqrestore(&rdev->smc_idx_lock, flags); + + return ret; } void ci_start_smc(struct radeon_device *rdev) @@ -197,6 +203,7 @@ PPSMC_Result ci_wait_for_smc_inactive(struct radeon_device *rdev) int ci_load_smc_ucode(struct radeon_device *rdev, u32 limit) { + unsigned long flags; u32 ucode_start_address; u32 ucode_size; const u8 *src; @@ -219,6 +226,7 @@ int ci_load_smc_ucode(struct radeon_device *rdev, u32 limit) return -EINVAL; src = (const u8 *)rdev->smc_fw->data; + spin_lock_irqsave(&rdev->smc_idx_lock, flags); WREG32(SMC_IND_INDEX_0, ucode_start_address); WREG32_P(SMC_IND_ACCESS_CNTL, AUTO_INCREMENT_IND_0, ~AUTO_INCREMENT_IND_0); while (ucode_size >= 4) { @@ -231,6 +239,7 @@ int ci_load_smc_ucode(struct radeon_device *rdev, u32 limit) ucode_size -= 4; } WREG32_P(SMC_IND_ACCESS_CNTL, 0, ~AUTO_INCREMENT_IND_0); + spin_unlock_irqrestore(&rdev->smc_idx_lock, flags); return 0; } @@ -238,25 +247,29 @@ int ci_load_smc_ucode(struct radeon_device *rdev, u32 limit) int ci_read_smc_sram_dword(struct radeon_device *rdev, u32 smc_address, u32 *value, u32 limit) { + unsigned long flags; int ret; + spin_lock_irqsave(&rdev->smc_idx_lock, flags); ret = ci_set_smc_sram_address(rdev, smc_address, limit); - if (ret) - return ret; + if (ret == 0) + *value = RREG32(SMC_IND_DATA_0); + spin_unlock_irqrestore(&rdev->smc_idx_lock, flags); - *value = RREG32(SMC_IND_DATA_0); - return 0; + return ret; } int ci_write_smc_sram_dword(struct radeon_device *rdev, u32 smc_address, u32 value, u32 limit) { + unsigned long flags; int ret; + spin_lock_irqsave(&rdev->smc_idx_lock, flags); ret = ci_set_smc_sram_address(rdev, smc_address, limit); - if (ret) - return ret; + if (ret == 0) + WREG32(SMC_IND_DATA_0, value); + spin_unlock_irqrestore(&rdev->smc_idx_lock, flags); - WREG32(SMC_IND_DATA_0, value); - return 0; + return ret; } diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index a3bba0587276..adbdb6503b05 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -77,6 +77,8 @@ static void cik_pcie_gen3_enable(struct radeon_device *rdev); static void cik_program_aspm(struct radeon_device *rdev); static void cik_init_pg(struct radeon_device *rdev); static void cik_init_cg(struct radeon_device *rdev); +static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev, + bool enable); /* get temperature in millidegrees */ int ci_get_temp(struct radeon_device *rdev) @@ -120,20 +122,27 @@ int kv_get_temp(struct radeon_device *rdev) */ u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg) { + unsigned long flags; u32 r; + spin_lock_irqsave(&rdev->pciep_idx_lock, flags); WREG32(PCIE_INDEX, reg); (void)RREG32(PCIE_INDEX); r = RREG32(PCIE_DATA); + spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags); return r; } void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v) { + unsigned long flags; + + spin_lock_irqsave(&rdev->pciep_idx_lock, flags); WREG32(PCIE_INDEX, reg); (void)RREG32(PCIE_INDEX); WREG32(PCIE_DATA, v); (void)RREG32(PCIE_DATA); + spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags); } static const u32 spectre_rlc_save_restore_register_list[] = @@ -2722,7 +2731,8 @@ static void cik_gpu_init(struct radeon_device *rdev) } else if ((rdev->pdev->device == 0x1309) || (rdev->pdev->device == 0x130A) || (rdev->pdev->device == 0x130D) || - (rdev->pdev->device == 0x1313)) { + (rdev->pdev->device == 0x1313) || + (rdev->pdev->device == 0x131D)) { rdev->config.cik.max_cu_per_sh = 6; rdev->config.cik.max_backends_per_se = 2; } else if ((rdev->pdev->device == 0x1306) || @@ -4013,6 +4023,8 @@ static int cik_cp_resume(struct radeon_device *rdev) { int r; + cik_enable_gui_idle_interrupt(rdev, false); + r = cik_cp_load_microcode(rdev); if (r) return r; @@ -4024,6 +4036,8 @@ static int cik_cp_resume(struct radeon_device *rdev) if (r) return r; + cik_enable_gui_idle_interrupt(rdev, true); + return 0; } @@ -5376,7 +5390,9 @@ static void cik_enable_hdp_ls(struct radeon_device *rdev, void cik_update_cg(struct radeon_device *rdev, u32 block, bool enable) { + if (block & RADEON_CG_BLOCK_GFX) { + cik_enable_gui_idle_interrupt(rdev, false); /* order matters! */ if (enable) { cik_enable_mgcg(rdev, true); @@ -5385,6 +5401,7 @@ void cik_update_cg(struct radeon_device *rdev, cik_enable_cgcg(rdev, false); cik_enable_mgcg(rdev, false); } + cik_enable_gui_idle_interrupt(rdev, true); } if (block & RADEON_CG_BLOCK_MC) { @@ -5541,7 +5558,7 @@ static void cik_enable_gfx_cgpg(struct radeon_device *rdev, { u32 data, orig; - if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG)) { + if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) { orig = data = RREG32(RLC_PG_CNTL); data |= GFX_PG_ENABLE; if (orig != data) @@ -5805,7 +5822,7 @@ static void cik_init_pg(struct radeon_device *rdev) if (rdev->pg_flags) { cik_enable_sck_slowdown_on_pu(rdev, true); cik_enable_sck_slowdown_on_pd(rdev, true); - if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG) { + if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) { cik_init_gfx_cgpg(rdev); cik_enable_cp_pg(rdev, true); cik_enable_gds_pg(rdev, true); @@ -5819,7 +5836,7 @@ static void cik_fini_pg(struct radeon_device *rdev) { if (rdev->pg_flags) { cik_update_gfx_pg(rdev, false); - if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG) { + if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) { cik_enable_cp_pg(rdev, false); cik_enable_gds_pg(rdev, false); } @@ -5895,7 +5912,9 @@ static void cik_disable_interrupt_state(struct radeon_device *rdev) u32 tmp; /* gfx ring */ - WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); + tmp = RREG32(CP_INT_CNTL_RING0) & + (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); + WREG32(CP_INT_CNTL_RING0, tmp); /* sdma */ tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE; WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp); @@ -6036,8 +6055,7 @@ static int cik_irq_init(struct radeon_device *rdev) */ int cik_irq_set(struct radeon_device *rdev) { - u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE | - PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE; + u32 cp_int_cntl; u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3; u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3; u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0; @@ -6058,6 +6076,10 @@ int cik_irq_set(struct radeon_device *rdev) return 0; } + cp_int_cntl = RREG32(CP_INT_CNTL_RING0) & + (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); + cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE; + hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN; hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN; hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN; diff --git a/drivers/gpu/drm/radeon/cypress_dpm.c b/drivers/gpu/drm/radeon/cypress_dpm.c index 95a66db08d9b..91bb470de0a3 100644 --- a/drivers/gpu/drm/radeon/cypress_dpm.c +++ b/drivers/gpu/drm/radeon/cypress_dpm.c @@ -2014,12 +2014,6 @@ int cypress_dpm_set_power_state(struct radeon_device *rdev) if (eg_pi->pcie_performance_request) cypress_notify_link_speed_change_after_state_change(rdev, new_ps, old_ps); - ret = rv770_dpm_force_performance_level(rdev, RADEON_DPM_FORCED_LEVEL_AUTO); - if (ret) { - DRM_ERROR("rv770_dpm_force_performance_level failed\n"); - return ret; - } - return 0; } diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c index 8953255e894b..85a69d2ea3d2 100644 --- a/drivers/gpu/drm/radeon/dce6_afmt.c +++ b/drivers/gpu/drm/radeon/dce6_afmt.c @@ -28,22 +28,30 @@ static u32 dce6_endpoint_rreg(struct radeon_device *rdev, u32 block_offset, u32 reg) { + unsigned long flags; u32 r; + spin_lock_irqsave(&rdev->end_idx_lock, flags); WREG32(AZ_F0_CODEC_ENDPOINT_INDEX + block_offset, reg); r = RREG32(AZ_F0_CODEC_ENDPOINT_DATA + block_offset); + spin_unlock_irqrestore(&rdev->end_idx_lock, flags); + return r; } static void dce6_endpoint_wreg(struct radeon_device *rdev, u32 block_offset, u32 reg, u32 v) { + unsigned long flags; + + spin_lock_irqsave(&rdev->end_idx_lock, flags); if (ASIC_IS_DCE8(rdev)) WREG32(AZ_F0_CODEC_ENDPOINT_INDEX + block_offset, reg); else WREG32(AZ_F0_CODEC_ENDPOINT_INDEX + block_offset, AZ_ENDPOINT_REG_WRITE_EN | AZ_ENDPOINT_REG_INDEX(reg)); WREG32(AZ_F0_CODEC_ENDPOINT_DATA + block_offset, v); + spin_unlock_irqrestore(&rdev->end_idx_lock, flags); } #define RREG32_ENDPOINT(block, reg) dce6_endpoint_rreg(rdev, (block), (reg)) @@ -86,12 +94,12 @@ void dce6_afmt_select_pin(struct drm_encoder *encoder) struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; u32 offset = dig->afmt->offset; - u32 id = dig->afmt->pin->id; if (!dig->afmt->pin) return; - WREG32(AFMT_AUDIO_SRC_CONTROL + offset, AFMT_AUDIO_SRC_SELECT(id)); + WREG32(AFMT_AUDIO_SRC_CONTROL + offset, + AFMT_AUDIO_SRC_SELECT(dig->afmt->pin->id)); } void dce6_afmt_write_speaker_allocation(struct drm_encoder *encoder) diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c index ecd60809db4e..71399065db04 100644 --- a/drivers/gpu/drm/radeon/kv_dpm.c +++ b/drivers/gpu/drm/radeon/kv_dpm.c @@ -40,6 +40,7 @@ static int kv_calculate_dpm_settings(struct radeon_device *rdev); static void kv_enable_new_levels(struct radeon_device *rdev); static void kv_program_nbps_index_settings(struct radeon_device *rdev, struct radeon_ps *new_rps); +static int kv_set_enabled_level(struct radeon_device *rdev, u32 level); static int kv_set_enabled_levels(struct radeon_device *rdev); static int kv_force_dpm_highest(struct radeon_device *rdev); static int kv_force_dpm_lowest(struct radeon_device *rdev); @@ -519,7 +520,7 @@ static int kv_set_dpm_boot_state(struct radeon_device *rdev) static void kv_program_vc(struct radeon_device *rdev) { - WREG32_SMC(CG_FTV_0, 0x3FFFC000); + WREG32_SMC(CG_FTV_0, 0x3FFFC100); } static void kv_clear_vc(struct radeon_device *rdev) @@ -638,7 +639,10 @@ static int kv_force_lowest_valid(struct radeon_device *rdev) static int kv_unforce_levels(struct radeon_device *rdev) { - return kv_notify_message_to_smu(rdev, PPSMC_MSG_NoForcedLevel); + if (rdev->family == CHIP_KABINI) + return kv_notify_message_to_smu(rdev, PPSMC_MSG_NoForcedLevel); + else + return kv_set_enabled_levels(rdev); } static int kv_update_sclk_t(struct radeon_device *rdev) @@ -667,9 +671,8 @@ static int kv_program_bootup_state(struct radeon_device *rdev) &rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk; if (table && table->count) { - for (i = pi->graphics_dpm_level_count - 1; i >= 0; i--) { - if ((table->entries[i].clk == pi->boot_pl.sclk) || - (i == 0)) + for (i = pi->graphics_dpm_level_count - 1; i > 0; i--) { + if (table->entries[i].clk == pi->boot_pl.sclk) break; } @@ -682,9 +685,8 @@ static int kv_program_bootup_state(struct radeon_device *rdev) if (table->num_max_dpm_entries == 0) return -EINVAL; - for (i = pi->graphics_dpm_level_count - 1; i >= 0; i--) { - if ((table->entries[i].sclk_frequency == pi->boot_pl.sclk) || - (i == 0)) + for (i = pi->graphics_dpm_level_count - 1; i > 0; i--) { + if (table->entries[i].sclk_frequency == pi->boot_pl.sclk) break; } @@ -1078,6 +1080,13 @@ static int kv_enable_ulv(struct radeon_device *rdev, bool enable) PPSMC_MSG_EnableULV : PPSMC_MSG_DisableULV); } +static void kv_reset_acp_boot_level(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + + pi->acp_boot_level = 0xff; +} + static void kv_update_current_ps(struct radeon_device *rdev, struct radeon_ps *rps) { @@ -1100,6 +1109,18 @@ static void kv_update_requested_ps(struct radeon_device *rdev, pi->requested_rps.ps_priv = &pi->requested_ps; } +void kv_dpm_enable_bapm(struct radeon_device *rdev, bool enable) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + int ret; + + if (pi->bapm_enable) { + ret = kv_smc_bapm_enable(rdev, enable); + if (ret) + DRM_ERROR("kv_smc_bapm_enable failed\n"); + } +} + int kv_dpm_enable(struct radeon_device *rdev) { struct kv_power_info *pi = kv_get_pi(rdev); @@ -1192,6 +1213,8 @@ int kv_dpm_enable(struct radeon_device *rdev) return ret; } + kv_reset_acp_boot_level(rdev); + if (rdev->irq.installed && r600_is_internal_thermal_sensor(rdev->pm.int_thermal_type)) { ret = kv_set_thermal_temperature_range(rdev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX); @@ -1203,6 +1226,12 @@ int kv_dpm_enable(struct radeon_device *rdev) radeon_irq_set(rdev); } + ret = kv_smc_bapm_enable(rdev, false); + if (ret) { + DRM_ERROR("kv_smc_bapm_enable failed\n"); + return ret; + } + /* powerdown unused blocks for now */ kv_dpm_powergate_acp(rdev, true); kv_dpm_powergate_samu(rdev, true); @@ -1226,6 +1255,8 @@ void kv_dpm_disable(struct radeon_device *rdev) RADEON_CG_BLOCK_BIF | RADEON_CG_BLOCK_HDP), false); + kv_smc_bapm_enable(rdev, false); + /* powerup blocks */ kv_dpm_powergate_acp(rdev, false); kv_dpm_powergate_samu(rdev, false); @@ -1450,6 +1481,39 @@ static int kv_update_samu_dpm(struct radeon_device *rdev, bool gate) return kv_enable_samu_dpm(rdev, !gate); } +static u8 kv_get_acp_boot_level(struct radeon_device *rdev) +{ + u8 i; + struct radeon_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table; + + for (i = 0; i < table->count; i++) { + if (table->entries[i].clk >= 0) /* XXX */ + break; + } + + if (i >= table->count) + i = table->count - 1; + + return i; +} + +static void kv_update_acp_boot_level(struct radeon_device *rdev) +{ + struct kv_power_info *pi = kv_get_pi(rdev); + u8 acp_boot_level; + + if (!pi->caps_stable_p_state) { + acp_boot_level = kv_get_acp_boot_level(rdev); + if (acp_boot_level != pi->acp_boot_level) { + pi->acp_boot_level = acp_boot_level; + kv_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_ACPDPM_SetEnabledMask, + (1 << pi->acp_boot_level)); + } + } +} + static int kv_update_acp_dpm(struct radeon_device *rdev, bool gate) { struct kv_power_info *pi = kv_get_pi(rdev); @@ -1461,7 +1525,7 @@ static int kv_update_acp_dpm(struct radeon_device *rdev, bool gate) if (pi->caps_stable_p_state) pi->acp_boot_level = table->count - 1; else - pi->acp_boot_level = 0; + pi->acp_boot_level = kv_get_acp_boot_level(rdev); ret = kv_copy_bytes_to_smc(rdev, pi->dpm_table_start + @@ -1588,13 +1652,11 @@ static void kv_set_valid_clock_range(struct radeon_device *rdev, } } - for (i = pi->graphics_dpm_level_count - 1; i >= 0; i--) { - if ((table->entries[i].clk <= new_ps->levels[new_ps->num_levels -1].sclk) || - (i == 0)) { - pi->highest_valid = i; + for (i = pi->graphics_dpm_level_count - 1; i > 0; i--) { + if (table->entries[i].clk <= new_ps->levels[new_ps->num_levels - 1].sclk) break; - } } + pi->highest_valid = i; if (pi->lowest_valid > pi->highest_valid) { if ((new_ps->levels[0].sclk - table->entries[pi->highest_valid].clk) > @@ -1615,14 +1677,12 @@ static void kv_set_valid_clock_range(struct radeon_device *rdev, } } - for (i = pi->graphics_dpm_level_count - 1; i >= 0; i--) { + for (i = pi->graphics_dpm_level_count - 1; i > 0; i--) { if (table->entries[i].sclk_frequency <= - new_ps->levels[new_ps->num_levels - 1].sclk || - i == 0) { - pi->highest_valid = i; + new_ps->levels[new_ps->num_levels - 1].sclk) break; - } } + pi->highest_valid = i; if (pi->lowest_valid > pi->highest_valid) { if ((new_ps->levels[0].sclk - @@ -1724,6 +1784,14 @@ int kv_dpm_set_power_state(struct radeon_device *rdev) RADEON_CG_BLOCK_BIF | RADEON_CG_BLOCK_HDP), false); + if (pi->bapm_enable) { + ret = kv_smc_bapm_enable(rdev, rdev->pm.dpm.ac_power); + if (ret) { + DRM_ERROR("kv_smc_bapm_enable failed\n"); + return ret; + } + } + if (rdev->family == CHIP_KABINI) { if (pi->enable_dpm) { kv_set_valid_clock_range(rdev, new_ps); @@ -1775,6 +1843,7 @@ int kv_dpm_set_power_state(struct radeon_device *rdev) return ret; } #endif + kv_update_acp_boot_level(rdev); kv_update_sclk_t(rdev); kv_enable_nb_dpm(rdev); } @@ -1785,7 +1854,6 @@ int kv_dpm_set_power_state(struct radeon_device *rdev) RADEON_CG_BLOCK_BIF | RADEON_CG_BLOCK_HDP), true); - rdev->pm.dpm.forced_level = RADEON_DPM_FORCED_LEVEL_AUTO; return 0; } @@ -1806,12 +1874,23 @@ void kv_dpm_setup_asic(struct radeon_device *rdev) void kv_dpm_reset_asic(struct radeon_device *rdev) { - kv_force_lowest_valid(rdev); - kv_init_graphics_levels(rdev); - kv_program_bootup_state(rdev); - kv_upload_dpm_settings(rdev); - kv_force_lowest_valid(rdev); - kv_unforce_levels(rdev); + struct kv_power_info *pi = kv_get_pi(rdev); + + if (rdev->family == CHIP_KABINI) { + kv_force_lowest_valid(rdev); + kv_init_graphics_levels(rdev); + kv_program_bootup_state(rdev); + kv_upload_dpm_settings(rdev); + kv_force_lowest_valid(rdev); + kv_unforce_levels(rdev); + } else { + kv_init_graphics_levels(rdev); + kv_program_bootup_state(rdev); + kv_freeze_sclk_dpm(rdev, true); + kv_upload_dpm_settings(rdev); + kv_freeze_sclk_dpm(rdev, false); + kv_set_enabled_level(rdev, pi->graphics_boot_level); + } } //XXX use sumo_dpm_display_configuration_changed @@ -1871,12 +1950,15 @@ static int kv_force_dpm_highest(struct radeon_device *rdev) if (ret) return ret; - for (i = SMU7_MAX_LEVELS_GRAPHICS - 1; i >= 0; i--) { + for (i = SMU7_MAX_LEVELS_GRAPHICS - 1; i > 0; i--) { if (enable_mask & (1 << i)) break; } - return kv_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_DPM_ForceState, i); + if (rdev->family == CHIP_KABINI) + return kv_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_DPM_ForceState, i); + else + return kv_set_enabled_level(rdev, i); } static int kv_force_dpm_lowest(struct radeon_device *rdev) @@ -1893,7 +1975,10 @@ static int kv_force_dpm_lowest(struct radeon_device *rdev) break; } - return kv_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_DPM_ForceState, i); + if (rdev->family == CHIP_KABINI) + return kv_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_DPM_ForceState, i); + else + return kv_set_enabled_level(rdev, i); } static u8 kv_get_sleep_divider_id_from_clock(struct radeon_device *rdev, @@ -1911,9 +1996,9 @@ static u8 kv_get_sleep_divider_id_from_clock(struct radeon_device *rdev, if (!pi->caps_sclk_ds) return 0; - for (i = KV_MAX_DEEPSLEEP_DIVIDER_ID; i <= 0; i--) { + for (i = KV_MAX_DEEPSLEEP_DIVIDER_ID; i > 0; i--) { temp = sclk / sumo_get_sleep_divider_from_id(i); - if ((temp >= min) || (i == 0)) + if (temp >= min) break; } @@ -2039,12 +2124,12 @@ static void kv_apply_state_adjust_rules(struct radeon_device *rdev, ps->dpmx_nb_ps_lo = 0x1; ps->dpmx_nb_ps_hi = 0x0; } else { - ps->dpm0_pg_nb_ps_lo = 0x1; + ps->dpm0_pg_nb_ps_lo = 0x3; ps->dpm0_pg_nb_ps_hi = 0x0; - ps->dpmx_nb_ps_lo = 0x2; - ps->dpmx_nb_ps_hi = 0x1; + ps->dpmx_nb_ps_lo = 0x3; + ps->dpmx_nb_ps_hi = 0x0; - if (pi->sys_info.nb_dpm_enable && pi->battery_state) { + if (pi->sys_info.nb_dpm_enable) { force_high = (mclk >= pi->sys_info.nbp_memory_clock[3]) || pi->video_start || (rdev->pm.dpm.new_active_crtc_count >= 3) || pi->disable_nb_ps3_in_battery; @@ -2210,6 +2295,15 @@ static void kv_enable_new_levels(struct radeon_device *rdev) } } +static int kv_set_enabled_level(struct radeon_device *rdev, u32 level) +{ + u32 new_mask = (1 << level); + + return kv_send_msg_to_smc_with_parameter(rdev, + PPSMC_MSG_SCLKDPM_SetEnabledMask, + new_mask); +} + static int kv_set_enabled_levels(struct radeon_device *rdev) { struct kv_power_info *pi = kv_get_pi(rdev); diff --git a/drivers/gpu/drm/radeon/kv_dpm.h b/drivers/gpu/drm/radeon/kv_dpm.h index 32bb079572d7..8cef7525d7a8 100644 --- a/drivers/gpu/drm/radeon/kv_dpm.h +++ b/drivers/gpu/drm/radeon/kv_dpm.h @@ -192,6 +192,7 @@ int kv_send_msg_to_smc_with_parameter(struct radeon_device *rdev, int kv_read_smc_sram_dword(struct radeon_device *rdev, u32 smc_address, u32 *value, u32 limit); int kv_smc_dpm_enable(struct radeon_device *rdev, bool enable); +int kv_smc_bapm_enable(struct radeon_device *rdev, bool enable); int kv_copy_bytes_to_smc(struct radeon_device *rdev, u32 smc_start_address, const u8 *src, u32 byte_count, u32 limit); diff --git a/drivers/gpu/drm/radeon/kv_smc.c b/drivers/gpu/drm/radeon/kv_smc.c index 34a226d7e34a..0000b59a6d05 100644 --- a/drivers/gpu/drm/radeon/kv_smc.c +++ b/drivers/gpu/drm/radeon/kv_smc.c @@ -107,6 +107,14 @@ int kv_smc_dpm_enable(struct radeon_device *rdev, bool enable) return kv_notify_message_to_smu(rdev, PPSMC_MSG_DPM_Disable); } +int kv_smc_bapm_enable(struct radeon_device *rdev, bool enable) +{ + if (enable) + return kv_notify_message_to_smu(rdev, PPSMC_MSG_EnableBAPM); + else + return kv_notify_message_to_smu(rdev, PPSMC_MSG_DisableBAPM); +} + int kv_copy_bytes_to_smc(struct radeon_device *rdev, u32 smc_start_address, const u8 *src, u32 byte_count, u32 limit) diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c index f7b625c9e0e9..6c398a456d78 100644 --- a/drivers/gpu/drm/radeon/ni_dpm.c +++ b/drivers/gpu/drm/radeon/ni_dpm.c @@ -3865,12 +3865,6 @@ int ni_dpm_set_power_state(struct radeon_device *rdev) return ret; } - ret = ni_dpm_force_performance_level(rdev, RADEON_DPM_FORCED_LEVEL_AUTO); - if (ret) { - DRM_ERROR("ni_dpm_force_performance_level failed\n"); - return ret; - } - return 0; } diff --git a/drivers/gpu/drm/radeon/ppsmc.h b/drivers/gpu/drm/radeon/ppsmc.h index 682842804bce..5670b8291285 100644 --- a/drivers/gpu/drm/radeon/ppsmc.h +++ b/drivers/gpu/drm/radeon/ppsmc.h @@ -163,6 +163,8 @@ typedef uint8_t PPSMC_Result; #define PPSMC_MSG_VCEPowerON ((uint32_t) 0x10f) #define PPSMC_MSG_DCE_RemoveVoltageAdjustment ((uint32_t) 0x11d) #define PPSMC_MSG_DCE_AllowVoltageAdjustment ((uint32_t) 0x11e) +#define PPSMC_MSG_EnableBAPM ((uint32_t) 0x120) +#define PPSMC_MSG_DisableBAPM ((uint32_t) 0x121) #define PPSMC_MSG_UVD_DPM_Config ((uint32_t) 0x124) diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 9fc61dd68bc0..24175717307b 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -2853,21 +2853,28 @@ static void r100_pll_errata_after_data(struct radeon_device *rdev) uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg) { + unsigned long flags; uint32_t data; + spin_lock_irqsave(&rdev->pll_idx_lock, flags); WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f); r100_pll_errata_after_index(rdev); data = RREG32(RADEON_CLOCK_CNTL_DATA); r100_pll_errata_after_data(rdev); + spin_unlock_irqrestore(&rdev->pll_idx_lock, flags); return data; } void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) { + unsigned long flags; + + spin_lock_irqsave(&rdev->pll_idx_lock, flags); WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN)); r100_pll_errata_after_index(rdev); WREG32(RADEON_CLOCK_CNTL_DATA, v); r100_pll_errata_after_data(rdev); + spin_unlock_irqrestore(&rdev->pll_idx_lock, flags); } static void r100_set_safe_registers(struct radeon_device *rdev) diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c index 4e796ecf9ea4..6edf2b3a52b4 100644 --- a/drivers/gpu/drm/radeon/r420.c +++ b/drivers/gpu/drm/radeon/r420.c @@ -160,18 +160,25 @@ void r420_pipes_init(struct radeon_device *rdev) u32 r420_mc_rreg(struct radeon_device *rdev, u32 reg) { + unsigned long flags; u32 r; + spin_lock_irqsave(&rdev->mc_idx_lock, flags); WREG32(R_0001F8_MC_IND_INDEX, S_0001F8_MC_IND_ADDR(reg)); r = RREG32(R_0001FC_MC_IND_DATA); + spin_unlock_irqrestore(&rdev->mc_idx_lock, flags); return r; } void r420_mc_wreg(struct radeon_device *rdev, u32 reg, u32 v) { + unsigned long flags; + + spin_lock_irqsave(&rdev->mc_idx_lock, flags); WREG32(R_0001F8_MC_IND_INDEX, S_0001F8_MC_IND_ADDR(reg) | S_0001F8_MC_IND_WR_EN(1)); WREG32(R_0001FC_MC_IND_DATA, v); + spin_unlock_irqrestore(&rdev->mc_idx_lock, flags); } static void r420_debugfs(struct radeon_device *rdev) diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index ea4d3734e6d9..2a1b1876b431 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -119,6 +119,11 @@ u32 r600_get_xclk(struct radeon_device *rdev) return rdev->clock.spll.reference_freq; } +int r600_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk) +{ + return 0; +} + /* get temperature in millidegrees */ int rv6xx_get_temp(struct radeon_device *rdev) { @@ -1045,20 +1050,27 @@ int r600_mc_wait_for_idle(struct radeon_device *rdev) uint32_t rs780_mc_rreg(struct radeon_device *rdev, uint32_t reg) { + unsigned long flags; uint32_t r; + spin_lock_irqsave(&rdev->mc_idx_lock, flags); WREG32(R_0028F8_MC_INDEX, S_0028F8_MC_IND_ADDR(reg)); r = RREG32(R_0028FC_MC_DATA); WREG32(R_0028F8_MC_INDEX, ~C_0028F8_MC_IND_ADDR); + spin_unlock_irqrestore(&rdev->mc_idx_lock, flags); return r; } void rs780_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) { + unsigned long flags; + + spin_lock_irqsave(&rdev->mc_idx_lock, flags); WREG32(R_0028F8_MC_INDEX, S_0028F8_MC_IND_ADDR(reg) | S_0028F8_MC_IND_WR_EN(1)); WREG32(R_0028FC_MC_DATA, v); WREG32(R_0028F8_MC_INDEX, 0x7F); + spin_unlock_irqrestore(&rdev->mc_idx_lock, flags); } static void r600_mc_program(struct radeon_device *rdev) @@ -2092,20 +2104,27 @@ static void r600_gpu_init(struct radeon_device *rdev) */ u32 r600_pciep_rreg(struct radeon_device *rdev, u32 reg) { + unsigned long flags; u32 r; + spin_lock_irqsave(&rdev->pciep_idx_lock, flags); WREG32(PCIE_PORT_INDEX, ((reg) & 0xff)); (void)RREG32(PCIE_PORT_INDEX); r = RREG32(PCIE_PORT_DATA); + spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags); return r; } void r600_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v) { + unsigned long flags; + + spin_lock_irqsave(&rdev->pciep_idx_lock, flags); WREG32(PCIE_PORT_INDEX, ((reg) & 0xff)); (void)RREG32(PCIE_PORT_INDEX); WREG32(PCIE_PORT_DATA, (v)); (void)RREG32(PCIE_PORT_DATA); + spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags); } /* diff --git a/drivers/gpu/drm/radeon/r600_dpm.c b/drivers/gpu/drm/radeon/r600_dpm.c index fa0de46fcc0d..e65f211a7be0 100644 --- a/drivers/gpu/drm/radeon/r600_dpm.c +++ b/drivers/gpu/drm/radeon/r600_dpm.c @@ -1219,30 +1219,20 @@ int r600_parse_extended_power_table(struct radeon_device *rdev) void r600_free_extended_power_table(struct radeon_device *rdev) { - if (rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries) - kfree(rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries); - if (rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk.entries) - kfree(rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk.entries); - if (rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk.entries) - kfree(rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk.entries); - if (rdev->pm.dpm.dyn_state.mvdd_dependency_on_mclk.entries) - kfree(rdev->pm.dpm.dyn_state.mvdd_dependency_on_mclk.entries); - if (rdev->pm.dpm.dyn_state.cac_leakage_table.entries) - kfree(rdev->pm.dpm.dyn_state.cac_leakage_table.entries); - if (rdev->pm.dpm.dyn_state.phase_shedding_limits_table.entries) - kfree(rdev->pm.dpm.dyn_state.phase_shedding_limits_table.entries); - if (rdev->pm.dpm.dyn_state.ppm_table) - kfree(rdev->pm.dpm.dyn_state.ppm_table); - if (rdev->pm.dpm.dyn_state.cac_tdp_table) - kfree(rdev->pm.dpm.dyn_state.cac_tdp_table); - if (rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries) - kfree(rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries); - if (rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries) - kfree(rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries); - if (rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries) - kfree(rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries); - if (rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries) - kfree(rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries); + struct radeon_dpm_dynamic_state *dyn_state = &rdev->pm.dpm.dyn_state; + + kfree(dyn_state->vddc_dependency_on_sclk.entries); + kfree(dyn_state->vddci_dependency_on_mclk.entries); + kfree(dyn_state->vddc_dependency_on_mclk.entries); + kfree(dyn_state->mvdd_dependency_on_mclk.entries); + kfree(dyn_state->cac_leakage_table.entries); + kfree(dyn_state->phase_shedding_limits_table.entries); + kfree(dyn_state->ppm_table); + kfree(dyn_state->cac_tdp_table); + kfree(dyn_state->vce_clock_voltage_dependency_table.entries); + kfree(dyn_state->uvd_clock_voltage_dependency_table.entries); + kfree(dyn_state->samu_clock_voltage_dependency_table.entries); + kfree(dyn_state->acp_clock_voltage_dependency_table.entries); } enum radeon_pcie_gen r600_get_pcie_gen_support(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index 454f90a849e4..e673fe26ea84 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h @@ -1040,7 +1040,7 @@ # define HDMI0_AVI_INFO_CONT (1 << 1) # define HDMI0_AUDIO_INFO_SEND (1 << 4) # define HDMI0_AUDIO_INFO_CONT (1 << 5) -# define HDMI0_AUDIO_INFO_SOURCE (1 << 6) /* 0 - sound block; 1 - hmdi regs */ +# define HDMI0_AUDIO_INFO_SOURCE (1 << 6) /* 0 - sound block; 1 - hdmi regs */ # define HDMI0_AUDIO_INFO_UPDATE (1 << 7) # define HDMI0_MPEG_INFO_SEND (1 << 8) # define HDMI0_MPEG_INFO_CONT (1 << 9) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index ff8b564ce2b2..a400ac1c4147 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -181,7 +181,7 @@ extern int radeon_aspm; #define RADEON_CG_SUPPORT_HDP_MGCG (1 << 16) /* PG flags */ -#define RADEON_PG_SUPPORT_GFX_CG (1 << 0) +#define RADEON_PG_SUPPORT_GFX_PG (1 << 0) #define RADEON_PG_SUPPORT_GFX_SMG (1 << 1) #define RADEON_PG_SUPPORT_GFX_DMG (1 << 2) #define RADEON_PG_SUPPORT_UVD (1 << 3) @@ -1778,6 +1778,7 @@ struct radeon_asic { int (*force_performance_level)(struct radeon_device *rdev, enum radeon_dpm_forced_level level); bool (*vblank_too_short)(struct radeon_device *rdev); void (*powergate_uvd)(struct radeon_device *rdev, bool gate); + void (*enable_bapm)(struct radeon_device *rdev, bool enable); } dpm; /* pageflipping */ struct { @@ -2110,6 +2111,28 @@ struct radeon_device { resource_size_t rmmio_size; /* protects concurrent MM_INDEX/DATA based register access */ spinlock_t mmio_idx_lock; + /* protects concurrent SMC based register access */ + spinlock_t smc_idx_lock; + /* protects concurrent PLL register access */ + spinlock_t pll_idx_lock; + /* protects concurrent MC register access */ + spinlock_t mc_idx_lock; + /* protects concurrent PCIE register access */ + spinlock_t pcie_idx_lock; + /* protects concurrent PCIE_PORT register access */ + spinlock_t pciep_idx_lock; + /* protects concurrent PIF register access */ + spinlock_t pif_idx_lock; + /* protects concurrent CG register access */ + spinlock_t cg_idx_lock; + /* protects concurrent UVD register access */ + spinlock_t uvd_idx_lock; + /* protects concurrent RCU register access */ + spinlock_t rcu_idx_lock; + /* protects concurrent DIDT register access */ + spinlock_t didt_idx_lock; + /* protects concurrent ENDPOINT (audio) register access */ + spinlock_t end_idx_lock; void __iomem *rmmio; radeon_rreg_t mc_rreg; radeon_wreg_t mc_wreg; @@ -2277,123 +2300,179 @@ void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v); */ static inline uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg) { + unsigned long flags; uint32_t r; + spin_lock_irqsave(&rdev->pcie_idx_lock, flags); WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask)); r = RREG32(RADEON_PCIE_DATA); + spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags); return r; } static inline void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) { + unsigned long flags; + + spin_lock_irqsave(&rdev->pcie_idx_lock, flags); WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask)); WREG32(RADEON_PCIE_DATA, (v)); + spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags); } static inline u32 tn_smc_rreg(struct radeon_device *rdev, u32 reg) { + unsigned long flags; u32 r; + spin_lock_irqsave(&rdev->smc_idx_lock, flags); WREG32(TN_SMC_IND_INDEX_0, (reg)); r = RREG32(TN_SMC_IND_DATA_0); + spin_unlock_irqrestore(&rdev->smc_idx_lock, flags); return r; } static inline void tn_smc_wreg(struct radeon_device *rdev, u32 reg, u32 v) { + unsigned long flags; + + spin_lock_irqsave(&rdev->smc_idx_lock, flags); WREG32(TN_SMC_IND_INDEX_0, (reg)); WREG32(TN_SMC_IND_DATA_0, (v)); + spin_unlock_irqrestore(&rdev->smc_idx_lock, flags); } static inline u32 r600_rcu_rreg(struct radeon_device *rdev, u32 reg) { + unsigned long flags; u32 r; + spin_lock_irqsave(&rdev->rcu_idx_lock, flags); WREG32(R600_RCU_INDEX, ((reg) & 0x1fff)); r = RREG32(R600_RCU_DATA); + spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags); return r; } static inline void r600_rcu_wreg(struct radeon_device *rdev, u32 reg, u32 v) { + unsigned long flags; + + spin_lock_irqsave(&rdev->rcu_idx_lock, flags); WREG32(R600_RCU_INDEX, ((reg) & 0x1fff)); WREG32(R600_RCU_DATA, (v)); + spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags); } static inline u32 eg_cg_rreg(struct radeon_device *rdev, u32 reg) { + unsigned long flags; u32 r; + spin_lock_irqsave(&rdev->cg_idx_lock, flags); WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff)); r = RREG32(EVERGREEN_CG_IND_DATA); + spin_unlock_irqrestore(&rdev->cg_idx_lock, flags); return r; } static inline void eg_cg_wreg(struct radeon_device *rdev, u32 reg, u32 v) { + unsigned long flags; + + spin_lock_irqsave(&rdev->cg_idx_lock, flags); WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff)); WREG32(EVERGREEN_CG_IND_DATA, (v)); + spin_unlock_irqrestore(&rdev->cg_idx_lock, flags); } static inline u32 eg_pif_phy0_rreg(struct radeon_device *rdev, u32 reg) { + unsigned long flags; u32 r; + spin_lock_irqsave(&rdev->pif_idx_lock, flags); WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff)); r = RREG32(EVERGREEN_PIF_PHY0_DATA); + spin_unlock_irqrestore(&rdev->pif_idx_lock, flags); return r; } static inline void eg_pif_phy0_wreg(struct radeon_device *rdev, u32 reg, u32 v) { + unsigned long flags; + + spin_lock_irqsave(&rdev->pif_idx_lock, flags); WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff)); WREG32(EVERGREEN_PIF_PHY0_DATA, (v)); + spin_unlock_irqrestore(&rdev->pif_idx_lock, flags); } static inline u32 eg_pif_phy1_rreg(struct radeon_device *rdev, u32 reg) { + unsigned long flags; u32 r; + spin_lock_irqsave(&rdev->pif_idx_lock, flags); WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff)); r = RREG32(EVERGREEN_PIF_PHY1_DATA); + spin_unlock_irqrestore(&rdev->pif_idx_lock, flags); return r; } static inline void eg_pif_phy1_wreg(struct radeon_device *rdev, u32 reg, u32 v) { + unsigned long flags; + + spin_lock_irqsave(&rdev->pif_idx_lock, flags); WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff)); WREG32(EVERGREEN_PIF_PHY1_DATA, (v)); + spin_unlock_irqrestore(&rdev->pif_idx_lock, flags); } static inline u32 r600_uvd_ctx_rreg(struct radeon_device *rdev, u32 reg) { + unsigned long flags; u32 r; + spin_lock_irqsave(&rdev->uvd_idx_lock, flags); WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff)); r = RREG32(R600_UVD_CTX_DATA); + spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags); return r; } static inline void r600_uvd_ctx_wreg(struct radeon_device *rdev, u32 reg, u32 v) { + unsigned long flags; + + spin_lock_irqsave(&rdev->uvd_idx_lock, flags); WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff)); WREG32(R600_UVD_CTX_DATA, (v)); + spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags); } static inline u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg) { + unsigned long flags; u32 r; + spin_lock_irqsave(&rdev->didt_idx_lock, flags); WREG32(CIK_DIDT_IND_INDEX, (reg)); r = RREG32(CIK_DIDT_IND_DATA); + spin_unlock_irqrestore(&rdev->didt_idx_lock, flags); return r; } static inline void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v) { + unsigned long flags; + + spin_lock_irqsave(&rdev->didt_idx_lock, flags); WREG32(CIK_DIDT_IND_INDEX, (reg)); WREG32(CIK_DIDT_IND_DATA, (v)); + spin_unlock_irqrestore(&rdev->didt_idx_lock, flags); } void r100_pll_errata_after_index(struct radeon_device *rdev); @@ -2569,6 +2648,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v); #define radeon_dpm_force_performance_level(rdev, l) rdev->asic->dpm.force_performance_level((rdev), (l)) #define radeon_dpm_vblank_too_short(rdev) rdev->asic->dpm.vblank_too_short((rdev)) #define radeon_dpm_powergate_uvd(rdev, g) rdev->asic->dpm.powergate_uvd((rdev), (g)) +#define radeon_dpm_enable_bapm(rdev, e) rdev->asic->dpm.enable_bapm((rdev), (e)) /* Common functions */ /* AGP */ diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 630853b96841..5003385a7512 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1037,6 +1037,7 @@ static struct radeon_asic rv6xx_asic = { .set_pcie_lanes = &r600_set_pcie_lanes, .set_clock_gating = NULL, .get_temperature = &rv6xx_get_temp, + .set_uvd_clocks = &r600_set_uvd_clocks, }, .dpm = { .init = &rv6xx_dpm_init, @@ -1126,6 +1127,7 @@ static struct radeon_asic rs780_asic = { .set_pcie_lanes = NULL, .set_clock_gating = NULL, .get_temperature = &rv6xx_get_temp, + .set_uvd_clocks = &r600_set_uvd_clocks, }, .dpm = { .init = &rs780_dpm_init, @@ -1141,6 +1143,7 @@ static struct radeon_asic rs780_asic = { .get_mclk = &rs780_dpm_get_mclk, .print_power_state = &rs780_dpm_print_power_state, .debugfs_print_current_performance_level = &rs780_dpm_debugfs_print_current_performance_level, + .force_performance_level = &rs780_dpm_force_performance_level, }, .pflip = { .pre_page_flip = &rs600_pre_page_flip, @@ -1791,6 +1794,7 @@ static struct radeon_asic trinity_asic = { .print_power_state = &trinity_dpm_print_power_state, .debugfs_print_current_performance_level = &trinity_dpm_debugfs_print_current_performance_level, .force_performance_level = &trinity_dpm_force_performance_level, + .enable_bapm = &trinity_dpm_enable_bapm, }, .pflip = { .pre_page_flip = &evergreen_pre_page_flip, @@ -2166,6 +2170,7 @@ static struct radeon_asic kv_asic = { .debugfs_print_current_performance_level = &kv_dpm_debugfs_print_current_performance_level, .force_performance_level = &kv_dpm_force_performance_level, .powergate_uvd = &kv_dpm_powergate_uvd, + .enable_bapm = &kv_dpm_enable_bapm, }, .pflip = { .pre_page_flip = &evergreen_pre_page_flip, @@ -2390,7 +2395,7 @@ int radeon_asic_init(struct radeon_device *rdev) RADEON_CG_SUPPORT_HDP_LS | RADEON_CG_SUPPORT_HDP_MGCG; rdev->pg_flags = 0 | - /*RADEON_PG_SUPPORT_GFX_CG | */ + /*RADEON_PG_SUPPORT_GFX_PG | */ RADEON_PG_SUPPORT_SDMA; break; case CHIP_OLAND: @@ -2479,7 +2484,7 @@ int radeon_asic_init(struct radeon_device *rdev) RADEON_CG_SUPPORT_HDP_LS | RADEON_CG_SUPPORT_HDP_MGCG; rdev->pg_flags = 0; - /*RADEON_PG_SUPPORT_GFX_CG | + /*RADEON_PG_SUPPORT_GFX_PG | RADEON_PG_SUPPORT_GFX_SMG | RADEON_PG_SUPPORT_GFX_DMG | RADEON_PG_SUPPORT_UVD | @@ -2507,7 +2512,7 @@ int radeon_asic_init(struct radeon_device *rdev) RADEON_CG_SUPPORT_HDP_LS | RADEON_CG_SUPPORT_HDP_MGCG; rdev->pg_flags = 0; - /*RADEON_PG_SUPPORT_GFX_CG | + /*RADEON_PG_SUPPORT_GFX_PG | RADEON_PG_SUPPORT_GFX_SMG | RADEON_PG_SUPPORT_UVD | RADEON_PG_SUPPORT_VCE | diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 818bbe6b884b..70c29d5e080d 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -389,6 +389,7 @@ int r600_mc_wait_for_idle(struct radeon_device *rdev); u32 r600_get_xclk(struct radeon_device *rdev); uint64_t r600_get_gpu_clock_counter(struct radeon_device *rdev); int rv6xx_get_temp(struct radeon_device *rdev); +int r600_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk); int r600_dpm_pre_set_power_state(struct radeon_device *rdev); void r600_dpm_post_set_power_state(struct radeon_device *rdev); /* r600 dma */ @@ -428,6 +429,8 @@ void rs780_dpm_print_power_state(struct radeon_device *rdev, struct radeon_ps *ps); void rs780_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev, struct seq_file *m); +int rs780_dpm_force_performance_level(struct radeon_device *rdev, + enum radeon_dpm_forced_level level); /* * rv770,rv730,rv710,rv740 @@ -625,6 +628,7 @@ void trinity_dpm_debugfs_print_current_performance_level(struct radeon_device *r struct seq_file *m); int trinity_dpm_force_performance_level(struct radeon_device *rdev, enum radeon_dpm_forced_level level); +void trinity_dpm_enable_bapm(struct radeon_device *rdev, bool enable); /* DCE6 - SI */ void dce6_bandwidth_update(struct radeon_device *rdev); @@ -781,6 +785,7 @@ void kv_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev, int kv_dpm_force_performance_level(struct radeon_device *rdev, enum radeon_dpm_forced_level level); void kv_dpm_powergate_uvd(struct radeon_device *rdev, bool gate); +void kv_dpm_enable_bapm(struct radeon_device *rdev, bool enable); /* uvd v1.0 */ uint32_t uvd_v1_0_get_rptr(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 2399f25ec037..79159b5da05b 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -396,6 +396,21 @@ static int radeon_connector_set_property(struct drm_connector *connector, struct } } + if (property == rdev->mode_info.audio_property) { + struct radeon_connector *radeon_connector = to_radeon_connector(connector); + /* need to find digital encoder on connector */ + encoder = radeon_find_encoder(connector, DRM_MODE_ENCODER_TMDS); + if (!encoder) + return 0; + + radeon_encoder = to_radeon_encoder(encoder); + + if (radeon_connector->audio != val) { + radeon_connector->audio = val; + radeon_property_change_mode(&radeon_encoder->base); + } + } + if (property == rdev->mode_info.underscan_property) { /* need to find digital encoder on connector */ encoder = radeon_find_encoder(connector, DRM_MODE_ENCODER_TMDS); @@ -1420,7 +1435,7 @@ radeon_dp_detect(struct drm_connector *connector, bool force) if (radeon_dp_getdpcd(radeon_connector)) ret = connector_status_connected; } else { - /* try non-aux ddc (DP to DVI/HMDI/etc. adapter) */ + /* try non-aux ddc (DP to DVI/HDMI/etc. adapter) */ if (radeon_ddc_probe(radeon_connector, false)) ret = connector_status_connected; } @@ -1489,6 +1504,24 @@ static const struct drm_connector_funcs radeon_dp_connector_funcs = { .force = radeon_dvi_force, }; +static const struct drm_connector_funcs radeon_edp_connector_funcs = { + .dpms = drm_helper_connector_dpms, + .detect = radeon_dp_detect, + .fill_modes = drm_helper_probe_single_connector_modes, + .set_property = radeon_lvds_set_property, + .destroy = radeon_dp_connector_destroy, + .force = radeon_dvi_force, +}; + +static const struct drm_connector_funcs radeon_lvds_bridge_connector_funcs = { + .dpms = drm_helper_connector_dpms, + .detect = radeon_dp_detect, + .fill_modes = drm_helper_probe_single_connector_modes, + .set_property = radeon_lvds_set_property, + .destroy = radeon_dp_connector_destroy, + .force = radeon_dvi_force, +}; + void radeon_add_atom_connector(struct drm_device *dev, uint32_t connector_id, @@ -1580,8 +1613,6 @@ radeon_add_atom_connector(struct drm_device *dev, goto failed; radeon_dig_connector->igp_lane_info = igp_lane_info; radeon_connector->con_priv = radeon_dig_connector; - drm_connector_init(dev, &radeon_connector->base, &radeon_dp_connector_funcs, connector_type); - drm_connector_helper_add(&radeon_connector->base, &radeon_dp_connector_helper_funcs); if (i2c_bus->valid) { /* add DP i2c bus */ if (connector_type == DRM_MODE_CONNECTOR_eDP) @@ -1598,6 +1629,10 @@ radeon_add_atom_connector(struct drm_device *dev, case DRM_MODE_CONNECTOR_VGA: case DRM_MODE_CONNECTOR_DVIA: default: + drm_connector_init(dev, &radeon_connector->base, + &radeon_dp_connector_funcs, connector_type); + drm_connector_helper_add(&radeon_connector->base, + &radeon_dp_connector_helper_funcs); connector->interlace_allowed = true; connector->doublescan_allowed = true; radeon_connector->dac_load_detect = true; @@ -1610,6 +1645,10 @@ radeon_add_atom_connector(struct drm_device *dev, case DRM_MODE_CONNECTOR_HDMIA: case DRM_MODE_CONNECTOR_HDMIB: case DRM_MODE_CONNECTOR_DisplayPort: + drm_connector_init(dev, &radeon_connector->base, + &radeon_dp_connector_funcs, connector_type); + drm_connector_helper_add(&radeon_connector->base, + &radeon_dp_connector_helper_funcs); drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.underscan_property, UNDERSCAN_OFF); @@ -1619,6 +1658,9 @@ radeon_add_atom_connector(struct drm_device *dev, drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.underscan_vborder_property, 0); + drm_object_attach_property(&radeon_connector->base.base, + rdev->mode_info.audio_property, + RADEON_AUDIO_DISABLE); subpixel_order = SubPixelHorizontalRGB; connector->interlace_allowed = true; if (connector_type == DRM_MODE_CONNECTOR_HDMIB) @@ -1634,6 +1676,10 @@ radeon_add_atom_connector(struct drm_device *dev, break; case DRM_MODE_CONNECTOR_LVDS: case DRM_MODE_CONNECTOR_eDP: + drm_connector_init(dev, &radeon_connector->base, + &radeon_lvds_bridge_connector_funcs, connector_type); + drm_connector_helper_add(&radeon_connector->base, + &radeon_dp_connector_helper_funcs); drm_object_attach_property(&radeon_connector->base.base, dev->mode_config.scaling_mode_property, DRM_MODE_SCALE_FULLSCREEN); @@ -1708,6 +1754,11 @@ radeon_add_atom_connector(struct drm_device *dev, rdev->mode_info.underscan_vborder_property, 0); } + if (ASIC_IS_DCE2(rdev)) { + drm_object_attach_property(&radeon_connector->base.base, + rdev->mode_info.audio_property, + RADEON_AUDIO_DISABLE); + } if (connector_type == DRM_MODE_CONNECTOR_DVII) { radeon_connector->dac_load_detect = true; drm_object_attach_property(&radeon_connector->base.base, @@ -1748,6 +1799,11 @@ radeon_add_atom_connector(struct drm_device *dev, rdev->mode_info.underscan_vborder_property, 0); } + if (ASIC_IS_DCE2(rdev)) { + drm_object_attach_property(&radeon_connector->base.base, + rdev->mode_info.audio_property, + RADEON_AUDIO_DISABLE); + } subpixel_order = SubPixelHorizontalRGB; connector->interlace_allowed = true; if (connector_type == DRM_MODE_CONNECTOR_HDMIB) @@ -1787,6 +1843,11 @@ radeon_add_atom_connector(struct drm_device *dev, rdev->mode_info.underscan_vborder_property, 0); } + if (ASIC_IS_DCE2(rdev)) { + drm_object_attach_property(&radeon_connector->base.base, + rdev->mode_info.audio_property, + RADEON_AUDIO_DISABLE); + } connector->interlace_allowed = true; /* in theory with a DP to VGA converter... */ connector->doublescan_allowed = false; @@ -1797,7 +1858,7 @@ radeon_add_atom_connector(struct drm_device *dev, goto failed; radeon_dig_connector->igp_lane_info = igp_lane_info; radeon_connector->con_priv = radeon_dig_connector; - drm_connector_init(dev, &radeon_connector->base, &radeon_dp_connector_funcs, connector_type); + drm_connector_init(dev, &radeon_connector->base, &radeon_edp_connector_funcs, connector_type); drm_connector_helper_add(&radeon_connector->base, &radeon_dp_connector_helper_funcs); if (i2c_bus->valid) { /* add DP i2c bus */ diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index a56084410372..ac6ece61a476 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -28,6 +28,7 @@ #include <drm/radeon_drm.h> #include "radeon_reg.h" #include "radeon.h" +#include "radeon_trace.h" static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) { @@ -80,9 +81,11 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) p->relocs[i].lobj.bo = p->relocs[i].robj; p->relocs[i].lobj.written = !!r->write_domain; - /* the first reloc of an UVD job is the - msg and that must be in VRAM */ - if (p->ring == R600_RING_TYPE_UVD_INDEX && i == 0) { + /* the first reloc of an UVD job is the msg and that must be in + VRAM, also but everything into VRAM on AGP cards to avoid + image corruptions */ + if (p->ring == R600_RING_TYPE_UVD_INDEX && + (i == 0 || p->rdev->flags & RADEON_IS_AGP)) { /* TODO: is this still needed for NI+ ? */ p->relocs[i].lobj.domain = RADEON_GEM_DOMAIN_VRAM; @@ -559,6 +562,8 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) return r; } + trace_radeon_cs(&parser); + r = radeon_cs_ib_chunk(rdev, &parser); if (r) { goto out; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 16cb8792b1e6..e29faa73b574 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1249,6 +1249,17 @@ int radeon_device_init(struct radeon_device *rdev, /* Registers mapping */ /* TODO: block userspace mapping of io register */ spin_lock_init(&rdev->mmio_idx_lock); + spin_lock_init(&rdev->smc_idx_lock); + spin_lock_init(&rdev->pll_idx_lock); + spin_lock_init(&rdev->mc_idx_lock); + spin_lock_init(&rdev->pcie_idx_lock); + spin_lock_init(&rdev->pciep_idx_lock); + spin_lock_init(&rdev->pif_idx_lock); + spin_lock_init(&rdev->cg_idx_lock); + spin_lock_init(&rdev->uvd_idx_lock); + spin_lock_init(&rdev->rcu_idx_lock); + spin_lock_init(&rdev->didt_idx_lock); + spin_lock_init(&rdev->end_idx_lock); if (rdev->family >= CHIP_BONAIRE) { rdev->rmmio_base = pci_resource_start(rdev->pdev, 5); rdev->rmmio_size = pci_resource_len(rdev->pdev, 5); diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index b055bddaa94c..0d1aa050d41d 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -1172,6 +1172,12 @@ static struct drm_prop_enum_list radeon_underscan_enum_list[] = { UNDERSCAN_AUTO, "auto" }, }; +static struct drm_prop_enum_list radeon_audio_enum_list[] = +{ { RADEON_AUDIO_DISABLE, "off" }, + { RADEON_AUDIO_ENABLE, "on" }, + { RADEON_AUDIO_AUTO, "auto" }, +}; + static int radeon_modeset_create_props(struct radeon_device *rdev) { int sz; @@ -1222,6 +1228,12 @@ static int radeon_modeset_create_props(struct radeon_device *rdev) if (!rdev->mode_info.underscan_vborder_property) return -ENOMEM; + sz = ARRAY_SIZE(radeon_audio_enum_list); + rdev->mode_info.audio_property = + drm_property_create_enum(rdev->ddev, 0, + "audio", + radeon_audio_enum_list, sz); + return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index cb4445f55a96..cdd12dcd988b 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -153,7 +153,7 @@ int radeon_benchmarking = 0; int radeon_testing = 0; int radeon_connector_table = 0; int radeon_tv = 1; -int radeon_audio = 0; +int radeon_audio = 1; int radeon_disp_priority = 0; int radeon_hw_i2c = 0; int radeon_pcie_gen2 = -1; diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index d908d8d68f6b..ef63d3f00b2f 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -247,6 +247,8 @@ struct radeon_mode_info { struct drm_property *underscan_property; struct drm_property *underscan_hborder_property; struct drm_property *underscan_vborder_property; + /* audio */ + struct drm_property *audio_property; /* hardcoded DFP edid from BIOS */ struct edid *bios_hardcoded_edid; int bios_hardcoded_edid_size; @@ -471,6 +473,12 @@ struct radeon_router { u8 cd_mux_state; }; +enum radeon_connector_audio { + RADEON_AUDIO_DISABLE = 0, + RADEON_AUDIO_ENABLE = 1, + RADEON_AUDIO_AUTO = 2 +}; + struct radeon_connector { struct drm_connector base; uint32_t connector_id; @@ -489,6 +497,7 @@ struct radeon_connector { struct radeon_hpd hpd; struct radeon_router router; struct radeon_i2c_chan *router_bus; + enum radeon_connector_audio audio; }; struct radeon_framebuffer { diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index d7555369a3e5..87e1d69e8fdb 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -67,7 +67,16 @@ int radeon_pm_get_type_index(struct radeon_device *rdev, void radeon_pm_acpi_event_handler(struct radeon_device *rdev) { - if (rdev->pm.pm_method == PM_METHOD_PROFILE) { + if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { + mutex_lock(&rdev->pm.mutex); + if (power_supply_is_system_supplied() > 0) + rdev->pm.dpm.ac_power = true; + else + rdev->pm.dpm.ac_power = false; + if (rdev->asic->dpm.enable_bapm) + radeon_dpm_enable_bapm(rdev, rdev->pm.dpm.ac_power); + mutex_unlock(&rdev->pm.mutex); + } else if (rdev->pm.pm_method == PM_METHOD_PROFILE) { if (rdev->pm.profile == PM_PROFILE_AUTO) { mutex_lock(&rdev->pm.mutex); radeon_pm_update_profile(rdev); @@ -333,7 +342,7 @@ static ssize_t radeon_get_pm_profile(struct device *dev, struct device_attribute *attr, char *buf) { - struct drm_device *ddev = pci_get_drvdata(to_pci_dev(dev)); + struct drm_device *ddev = dev_get_drvdata(dev); struct radeon_device *rdev = ddev->dev_private; int cp = rdev->pm.profile; @@ -349,7 +358,7 @@ static ssize_t radeon_set_pm_profile(struct device *dev, const char *buf, size_t count) { - struct drm_device *ddev = pci_get_drvdata(to_pci_dev(dev)); + struct drm_device *ddev = dev_get_drvdata(dev); struct radeon_device *rdev = ddev->dev_private; mutex_lock(&rdev->pm.mutex); @@ -383,7 +392,7 @@ static ssize_t radeon_get_pm_method(struct device *dev, struct device_attribute *attr, char *buf) { - struct drm_device *ddev = pci_get_drvdata(to_pci_dev(dev)); + struct drm_device *ddev = dev_get_drvdata(dev); struct radeon_device *rdev = ddev->dev_private; int pm = rdev->pm.pm_method; @@ -397,7 +406,7 @@ static ssize_t radeon_set_pm_method(struct device *dev, const char *buf, size_t count) { - struct drm_device *ddev = pci_get_drvdata(to_pci_dev(dev)); + struct drm_device *ddev = dev_get_drvdata(dev); struct radeon_device *rdev = ddev->dev_private; /* we don't support the legacy modes with dpm */ @@ -433,7 +442,7 @@ static ssize_t radeon_get_dpm_state(struct device *dev, struct device_attribute *attr, char *buf) { - struct drm_device *ddev = pci_get_drvdata(to_pci_dev(dev)); + struct drm_device *ddev = dev_get_drvdata(dev); struct radeon_device *rdev = ddev->dev_private; enum radeon_pm_state_type pm = rdev->pm.dpm.user_state; @@ -447,7 +456,7 @@ static ssize_t radeon_set_dpm_state(struct device *dev, const char *buf, size_t count) { - struct drm_device *ddev = pci_get_drvdata(to_pci_dev(dev)); + struct drm_device *ddev = dev_get_drvdata(dev); struct radeon_device *rdev = ddev->dev_private; mutex_lock(&rdev->pm.mutex); @@ -472,7 +481,7 @@ static ssize_t radeon_get_dpm_forced_performance_level(struct device *dev, struct device_attribute *attr, char *buf) { - struct drm_device *ddev = pci_get_drvdata(to_pci_dev(dev)); + struct drm_device *ddev = dev_get_drvdata(dev); struct radeon_device *rdev = ddev->dev_private; enum radeon_dpm_forced_level level = rdev->pm.dpm.forced_level; @@ -486,7 +495,7 @@ static ssize_t radeon_set_dpm_forced_performance_level(struct device *dev, const char *buf, size_t count) { - struct drm_device *ddev = pci_get_drvdata(to_pci_dev(dev)); + struct drm_device *ddev = dev_get_drvdata(dev); struct radeon_device *rdev = ddev->dev_private; enum radeon_dpm_forced_level level; int ret = 0; @@ -524,7 +533,7 @@ static ssize_t radeon_hwmon_show_temp(struct device *dev, struct device_attribute *attr, char *buf) { - struct drm_device *ddev = pci_get_drvdata(to_pci_dev(dev)); + struct drm_device *ddev = dev_get_drvdata(dev); struct radeon_device *rdev = ddev->dev_private; int temp; @@ -536,6 +545,23 @@ static ssize_t radeon_hwmon_show_temp(struct device *dev, return snprintf(buf, PAGE_SIZE, "%d\n", temp); } +static ssize_t radeon_hwmon_show_temp_thresh(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct radeon_device *rdev = ddev->dev_private; + int hyst = to_sensor_dev_attr(attr)->index; + int temp; + + if (hyst) + temp = rdev->pm.dpm.thermal.min_temp; + else + temp = rdev->pm.dpm.thermal.max_temp; + + return snprintf(buf, PAGE_SIZE, "%d\n", temp); +} + static ssize_t radeon_hwmon_show_name(struct device *dev, struct device_attribute *attr, char *buf) @@ -544,16 +570,37 @@ static ssize_t radeon_hwmon_show_name(struct device *dev, } static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, radeon_hwmon_show_temp, NULL, 0); +static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, radeon_hwmon_show_temp_thresh, NULL, 0); +static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, radeon_hwmon_show_temp_thresh, NULL, 1); static SENSOR_DEVICE_ATTR(name, S_IRUGO, radeon_hwmon_show_name, NULL, 0); static struct attribute *hwmon_attributes[] = { &sensor_dev_attr_temp1_input.dev_attr.attr, + &sensor_dev_attr_temp1_crit.dev_attr.attr, + &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr, &sensor_dev_attr_name.dev_attr.attr, NULL }; +static umode_t hwmon_attributes_visible(struct kobject *kobj, + struct attribute *attr, int index) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct drm_device *ddev = dev_get_drvdata(dev); + struct radeon_device *rdev = ddev->dev_private; + + /* Skip limit attributes if DPM is not enabled */ + if (rdev->pm.pm_method != PM_METHOD_DPM && + (attr == &sensor_dev_attr_temp1_crit.dev_attr.attr || + attr == &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr)) + return 0; + + return attr->mode; +} + static const struct attribute_group hwmon_attrgroup = { .attrs = hwmon_attributes, + .is_visible = hwmon_attributes_visible, }; static int radeon_hwmon_init(struct radeon_device *rdev) @@ -870,10 +917,13 @@ static void radeon_dpm_change_power_state_locked(struct radeon_device *rdev) radeon_dpm_post_set_power_state(rdev); - /* force low perf level for thermal */ - if (rdev->pm.dpm.thermal_active && - rdev->asic->dpm.force_performance_level) { - radeon_dpm_force_performance_level(rdev, RADEON_DPM_FORCED_LEVEL_LOW); + if (rdev->asic->dpm.force_performance_level) { + if (rdev->pm.dpm.thermal_active) + /* force low perf level for thermal */ + radeon_dpm_force_performance_level(rdev, RADEON_DPM_FORCED_LEVEL_LOW); + else + /* otherwise, enable auto */ + radeon_dpm_force_performance_level(rdev, RADEON_DPM_FORCED_LEVEL_AUTO); } done: @@ -1102,9 +1152,10 @@ static int radeon_pm_init_dpm(struct radeon_device *rdev) { int ret; - /* default to performance state */ + /* default to balanced state */ rdev->pm.dpm.state = POWER_STATE_TYPE_BALANCED; rdev->pm.dpm.user_state = POWER_STATE_TYPE_BALANCED; + rdev->pm.dpm.forced_level = RADEON_DPM_FORCED_LEVEL_AUTO; rdev->pm.default_sclk = rdev->clock.default_sclk; rdev->pm.default_mclk = rdev->clock.default_mclk; rdev->pm.current_sclk = rdev->clock.default_sclk; diff --git a/drivers/gpu/drm/radeon/radeon_trace.h b/drivers/gpu/drm/radeon/radeon_trace.h index eafd8160a155..f7e367815964 100644 --- a/drivers/gpu/drm/radeon/radeon_trace.h +++ b/drivers/gpu/drm/radeon/radeon_trace.h @@ -27,6 +27,26 @@ TRACE_EVENT(radeon_bo_create, TP_printk("bo=%p, pages=%u", __entry->bo, __entry->pages) ); +TRACE_EVENT(radeon_cs, + TP_PROTO(struct radeon_cs_parser *p), + TP_ARGS(p), + TP_STRUCT__entry( + __field(u32, ring) + __field(u32, dw) + __field(u32, fences) + ), + + TP_fast_assign( + __entry->ring = p->ring; + __entry->dw = p->chunks[p->chunk_ib_idx].length_dw; + __entry->fences = radeon_fence_count_emitted( + p->rdev, p->ring); + ), + TP_printk("ring=%u, dw=%u, fences=%u", + __entry->ring, __entry->dw, + __entry->fences) +); + DECLARE_EVENT_CLASS(radeon_fence_request, TP_PROTO(struct drm_device *dev, u32 seqno), @@ -53,13 +73,6 @@ DEFINE_EVENT(radeon_fence_request, radeon_fence_emit, TP_ARGS(dev, seqno) ); -DEFINE_EVENT(radeon_fence_request, radeon_fence_retire, - - TP_PROTO(struct drm_device *dev, u32 seqno), - - TP_ARGS(dev, seqno) -); - DEFINE_EVENT(radeon_fence_request, radeon_fence_wait_begin, TP_PROTO(struct drm_device *dev, u32 seqno), diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c index b8074a8ec75a..9566b5940a5a 100644 --- a/drivers/gpu/drm/radeon/rs400.c +++ b/drivers/gpu/drm/radeon/rs400.c @@ -274,19 +274,26 @@ static void rs400_mc_init(struct radeon_device *rdev) uint32_t rs400_mc_rreg(struct radeon_device *rdev, uint32_t reg) { + unsigned long flags; uint32_t r; + spin_lock_irqsave(&rdev->mc_idx_lock, flags); WREG32(RS480_NB_MC_INDEX, reg & 0xff); r = RREG32(RS480_NB_MC_DATA); WREG32(RS480_NB_MC_INDEX, 0xff); + spin_unlock_irqrestore(&rdev->mc_idx_lock, flags); return r; } void rs400_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) { + unsigned long flags; + + spin_lock_irqsave(&rdev->mc_idx_lock, flags); WREG32(RS480_NB_MC_INDEX, ((reg) & 0xff) | RS480_NB_MC_IND_WR_EN); WREG32(RS480_NB_MC_DATA, (v)); WREG32(RS480_NB_MC_INDEX, 0xff); + spin_unlock_irqrestore(&rdev->mc_idx_lock, flags); } #if defined(CONFIG_DEBUG_FS) diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index 670b555d2ca2..6acba8017b9a 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -847,16 +847,26 @@ void rs600_bandwidth_update(struct radeon_device *rdev) uint32_t rs600_mc_rreg(struct radeon_device *rdev, uint32_t reg) { + unsigned long flags; + u32 r; + + spin_lock_irqsave(&rdev->mc_idx_lock, flags); WREG32(R_000070_MC_IND_INDEX, S_000070_MC_IND_ADDR(reg) | S_000070_MC_IND_CITF_ARB0(1)); - return RREG32(R_000074_MC_IND_DATA); + r = RREG32(R_000074_MC_IND_DATA); + spin_unlock_irqrestore(&rdev->mc_idx_lock, flags); + return r; } void rs600_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) { + unsigned long flags; + + spin_lock_irqsave(&rdev->mc_idx_lock, flags); WREG32(R_000070_MC_IND_INDEX, S_000070_MC_IND_ADDR(reg) | S_000070_MC_IND_CITF_ARB0(1) | S_000070_MC_IND_WR_EN(1)); WREG32(R_000074_MC_IND_DATA, v); + spin_unlock_irqrestore(&rdev->mc_idx_lock, flags); } static void rs600_debugfs(struct radeon_device *rdev) diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c index d8ddfb34545d..1447d794c22a 100644 --- a/drivers/gpu/drm/radeon/rs690.c +++ b/drivers/gpu/drm/radeon/rs690.c @@ -631,20 +631,27 @@ void rs690_bandwidth_update(struct radeon_device *rdev) uint32_t rs690_mc_rreg(struct radeon_device *rdev, uint32_t reg) { + unsigned long flags; uint32_t r; + spin_lock_irqsave(&rdev->mc_idx_lock, flags); WREG32(R_000078_MC_INDEX, S_000078_MC_IND_ADDR(reg)); r = RREG32(R_00007C_MC_DATA); WREG32(R_000078_MC_INDEX, ~C_000078_MC_IND_ADDR); + spin_unlock_irqrestore(&rdev->mc_idx_lock, flags); return r; } void rs690_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) { + unsigned long flags; + + spin_lock_irqsave(&rdev->mc_idx_lock, flags); WREG32(R_000078_MC_INDEX, S_000078_MC_IND_ADDR(reg) | S_000078_MC_IND_WR_EN(1)); WREG32(R_00007C_MC_DATA, v); WREG32(R_000078_MC_INDEX, 0x7F); + spin_unlock_irqrestore(&rdev->mc_idx_lock, flags); } static void rs690_mc_program(struct radeon_device *rdev) diff --git a/drivers/gpu/drm/radeon/rs780_dpm.c b/drivers/gpu/drm/radeon/rs780_dpm.c index d1a1ce73bd45..6af8505cf4d2 100644 --- a/drivers/gpu/drm/radeon/rs780_dpm.c +++ b/drivers/gpu/drm/radeon/rs780_dpm.c @@ -62,9 +62,7 @@ static void rs780_get_pm_mode_parameters(struct radeon_device *rdev) radeon_crtc = to_radeon_crtc(crtc); pi->crtc_id = radeon_crtc->crtc_id; if (crtc->mode.htotal && crtc->mode.vtotal) - pi->refresh_rate = - (crtc->mode.clock * 1000) / - (crtc->mode.htotal * crtc->mode.vtotal); + pi->refresh_rate = drm_mode_vrefresh(&crtc->mode); break; } } @@ -376,9 +374,8 @@ static void rs780_disable_vbios_powersaving(struct radeon_device *rdev) WREG32_P(CG_INTGFX_MISC, 0, ~0xFFF00000); } -static void rs780_force_voltage_to_high(struct radeon_device *rdev) +static void rs780_force_voltage(struct radeon_device *rdev, u16 voltage) { - struct igp_power_info *pi = rs780_get_pi(rdev); struct igp_ps *current_state = rs780_get_ps(rdev->pm.dpm.current_ps); if ((current_state->max_voltage == RS780_VDDC_LEVEL_HIGH) && @@ -390,7 +387,7 @@ static void rs780_force_voltage_to_high(struct radeon_device *rdev) udelay(1); WREG32_P(FVTHROT_PWM_CTRL_REG0, - STARTING_PWM_HIGHTIME(pi->max_voltage), + STARTING_PWM_HIGHTIME(voltage), ~STARTING_PWM_HIGHTIME_MASK); WREG32_P(FVTHROT_PWM_CTRL_REG0, @@ -404,6 +401,26 @@ static void rs780_force_voltage_to_high(struct radeon_device *rdev) WREG32_P(GFX_MACRO_BYPASS_CNTL, 0, ~SPLL_BYPASS_CNTL); } +static void rs780_force_fbdiv(struct radeon_device *rdev, u32 fb_div) +{ + struct igp_ps *current_state = rs780_get_ps(rdev->pm.dpm.current_ps); + + if (current_state->sclk_low == current_state->sclk_high) + return; + + WREG32_P(GFX_MACRO_BYPASS_CNTL, SPLL_BYPASS_CNTL, ~SPLL_BYPASS_CNTL); + + WREG32_P(FVTHROT_FBDIV_REG2, FORCED_FEEDBACK_DIV(fb_div), + ~FORCED_FEEDBACK_DIV_MASK); + WREG32_P(FVTHROT_FBDIV_REG1, STARTING_FEEDBACK_DIV(fb_div), + ~STARTING_FEEDBACK_DIV_MASK); + WREG32_P(FVTHROT_FBDIV_REG1, FORCE_FEEDBACK_DIV, ~FORCE_FEEDBACK_DIV); + + udelay(100); + + WREG32_P(GFX_MACRO_BYPASS_CNTL, 0, ~SPLL_BYPASS_CNTL); +} + static int rs780_set_engine_clock_scaling(struct radeon_device *rdev, struct radeon_ps *new_ps, struct radeon_ps *old_ps) @@ -432,17 +449,13 @@ static int rs780_set_engine_clock_scaling(struct radeon_device *rdev, if (ret) return ret; - WREG32_P(GFX_MACRO_BYPASS_CNTL, SPLL_BYPASS_CNTL, ~SPLL_BYPASS_CNTL); - - WREG32_P(FVTHROT_FBDIV_REG2, FORCED_FEEDBACK_DIV(max_dividers.fb_div), - ~FORCED_FEEDBACK_DIV_MASK); - WREG32_P(FVTHROT_FBDIV_REG1, STARTING_FEEDBACK_DIV(max_dividers.fb_div), - ~STARTING_FEEDBACK_DIV_MASK); - WREG32_P(FVTHROT_FBDIV_REG1, FORCE_FEEDBACK_DIV, ~FORCE_FEEDBACK_DIV); - - udelay(100); + if ((min_dividers.ref_div != max_dividers.ref_div) || + (min_dividers.post_div != max_dividers.post_div) || + (max_dividers.ref_div != current_max_dividers.ref_div) || + (max_dividers.post_div != current_max_dividers.post_div)) + return -EINVAL; - WREG32_P(GFX_MACRO_BYPASS_CNTL, 0, ~SPLL_BYPASS_CNTL); + rs780_force_fbdiv(rdev, max_dividers.fb_div); if (max_dividers.fb_div > min_dividers.fb_div) { WREG32_P(FVTHROT_FBDIV_REG0, @@ -486,6 +499,9 @@ static void rs780_activate_engine_clk_scaling(struct radeon_device *rdev, (new_state->sclk_low == old_state->sclk_low)) return; + if (new_state->sclk_high == new_state->sclk_low) + return; + rs780_clk_scaling_enable(rdev, true); } @@ -649,7 +665,7 @@ int rs780_dpm_set_power_state(struct radeon_device *rdev) rs780_set_uvd_clock_before_set_eng_clock(rdev, new_ps, old_ps); if (pi->voltage_control) { - rs780_force_voltage_to_high(rdev); + rs780_force_voltage(rdev, pi->max_voltage); mdelay(5); } @@ -717,14 +733,18 @@ static void rs780_parse_pplib_non_clock_info(struct radeon_device *rdev, if (ATOM_PPLIB_NONCLOCKINFO_VER1 < table_rev) { rps->vclk = le32_to_cpu(non_clock_info->ulVCLK); rps->dclk = le32_to_cpu(non_clock_info->ulDCLK); - } else if (r600_is_uvd_state(rps->class, rps->class2)) { - rps->vclk = RS780_DEFAULT_VCLK_FREQ; - rps->dclk = RS780_DEFAULT_DCLK_FREQ; } else { rps->vclk = 0; rps->dclk = 0; } + if (r600_is_uvd_state(rps->class, rps->class2)) { + if ((rps->vclk == 0) || (rps->dclk == 0)) { + rps->vclk = RS780_DEFAULT_VCLK_FREQ; + rps->dclk = RS780_DEFAULT_DCLK_FREQ; + } + } + if (rps->class & ATOM_PPLIB_CLASSIFICATION_BOOT) rdev->pm.dpm.boot_ps = rps; if (rps->class & ATOM_PPLIB_CLASSIFICATION_UVDSTATE) @@ -986,3 +1006,55 @@ void rs780_dpm_debugfs_print_current_performance_level(struct radeon_device *rde seq_printf(m, "power level 1 sclk: %u vddc_index: %d\n", ps->sclk_high, ps->max_voltage); } + +int rs780_dpm_force_performance_level(struct radeon_device *rdev, + enum radeon_dpm_forced_level level) +{ + struct igp_power_info *pi = rs780_get_pi(rdev); + struct radeon_ps *rps = rdev->pm.dpm.current_ps; + struct igp_ps *ps = rs780_get_ps(rps); + struct atom_clock_dividers dividers; + int ret; + + rs780_clk_scaling_enable(rdev, false); + rs780_voltage_scaling_enable(rdev, false); + + if (level == RADEON_DPM_FORCED_LEVEL_HIGH) { + if (pi->voltage_control) + rs780_force_voltage(rdev, pi->max_voltage); + + ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM, + ps->sclk_high, false, ÷rs); + if (ret) + return ret; + + rs780_force_fbdiv(rdev, dividers.fb_div); + } else if (level == RADEON_DPM_FORCED_LEVEL_LOW) { + ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM, + ps->sclk_low, false, ÷rs); + if (ret) + return ret; + + rs780_force_fbdiv(rdev, dividers.fb_div); + + if (pi->voltage_control) + rs780_force_voltage(rdev, pi->min_voltage); + } else { + if (pi->voltage_control) + rs780_force_voltage(rdev, pi->max_voltage); + + if (ps->sclk_high != ps->sclk_low) { + WREG32_P(FVTHROT_FBDIV_REG1, 0, ~FORCE_FEEDBACK_DIV); + rs780_clk_scaling_enable(rdev, true); + } + + if (pi->voltage_control) { + rs780_voltage_scaling_enable(rdev, true); + rs780_enable_voltage_scaling(rdev, rps); + } + } + + rdev->pm.dpm.forced_level = level; + + return 0; +} diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index 8ea1573ae820..873eb4b193b4 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c @@ -209,19 +209,27 @@ static void rv515_mc_init(struct radeon_device *rdev) uint32_t rv515_mc_rreg(struct radeon_device *rdev, uint32_t reg) { + unsigned long flags; uint32_t r; + spin_lock_irqsave(&rdev->mc_idx_lock, flags); WREG32(MC_IND_INDEX, 0x7f0000 | (reg & 0xffff)); r = RREG32(MC_IND_DATA); WREG32(MC_IND_INDEX, 0); + spin_unlock_irqrestore(&rdev->mc_idx_lock, flags); + return r; } void rv515_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) { + unsigned long flags; + + spin_lock_irqsave(&rdev->mc_idx_lock, flags); WREG32(MC_IND_INDEX, 0xff0000 | ((reg) & 0xffff)); WREG32(MC_IND_DATA, (v)); WREG32(MC_IND_INDEX, 0); + spin_unlock_irqrestore(&rdev->mc_idx_lock, flags); } #if defined(CONFIG_DEBUG_FS) diff --git a/drivers/gpu/drm/radeon/rv6xx_dpm.c b/drivers/gpu/drm/radeon/rv6xx_dpm.c index ab1f2016f21e..5811d277a36a 100644 --- a/drivers/gpu/drm/radeon/rv6xx_dpm.c +++ b/drivers/gpu/drm/radeon/rv6xx_dpm.c @@ -1758,8 +1758,6 @@ int rv6xx_dpm_set_power_state(struct radeon_device *rdev) rv6xx_set_uvd_clock_after_set_eng_clock(rdev, new_ps, old_ps); - rdev->pm.dpm.forced_level = RADEON_DPM_FORCED_LEVEL_AUTO; - return 0; } diff --git a/drivers/gpu/drm/radeon/rv770_dpm.c b/drivers/gpu/drm/radeon/rv770_dpm.c index 8cbb85dae5aa..913b025ae9b3 100644 --- a/drivers/gpu/drm/radeon/rv770_dpm.c +++ b/drivers/gpu/drm/radeon/rv770_dpm.c @@ -2064,12 +2064,6 @@ int rv770_dpm_set_power_state(struct radeon_device *rdev) rv770_program_dcodt_after_state_switch(rdev, new_ps, old_ps); rv770_set_uvd_clock_after_set_eng_clock(rdev, new_ps, old_ps); - ret = rv770_dpm_force_performance_level(rdev, RADEON_DPM_FORCED_LEVEL_AUTO); - if (ret) { - DRM_ERROR("rv770_dpm_force_performance_level failed\n"); - return ret; - } - return 0; } @@ -2147,14 +2141,18 @@ static void rv7xx_parse_pplib_non_clock_info(struct radeon_device *rdev, if (ATOM_PPLIB_NONCLOCKINFO_VER1 < table_rev) { rps->vclk = le32_to_cpu(non_clock_info->ulVCLK); rps->dclk = le32_to_cpu(non_clock_info->ulDCLK); - } else if (r600_is_uvd_state(rps->class, rps->class2)) { - rps->vclk = RV770_DEFAULT_VCLK_FREQ; - rps->dclk = RV770_DEFAULT_DCLK_FREQ; } else { rps->vclk = 0; rps->dclk = 0; } + if (r600_is_uvd_state(rps->class, rps->class2)) { + if ((rps->vclk == 0) || (rps->dclk == 0)) { + rps->vclk = RV770_DEFAULT_VCLK_FREQ; + rps->dclk = RV770_DEFAULT_DCLK_FREQ; + } + } + if (rps->class & ATOM_PPLIB_CLASSIFICATION_BOOT) rdev->pm.dpm.boot_ps = rps; if (rps->class & ATOM_PPLIB_CLASSIFICATION_UVDSTATE) diff --git a/drivers/gpu/drm/radeon/rv770_smc.c b/drivers/gpu/drm/radeon/rv770_smc.c index ab95da570215..b2a224407365 100644 --- a/drivers/gpu/drm/radeon/rv770_smc.c +++ b/drivers/gpu/drm/radeon/rv770_smc.c @@ -274,8 +274,8 @@ static const u8 cayman_smc_int_vectors[] = 0x08, 0x72, 0x08, 0x72 }; -int rv770_set_smc_sram_address(struct radeon_device *rdev, - u16 smc_address, u16 limit) +static int rv770_set_smc_sram_address(struct radeon_device *rdev, + u16 smc_address, u16 limit) { u32 addr; @@ -296,9 +296,10 @@ int rv770_copy_bytes_to_smc(struct radeon_device *rdev, u16 smc_start_address, const u8 *src, u16 byte_count, u16 limit) { + unsigned long flags; u32 data, original_data, extra_shift; u16 addr; - int ret; + int ret = 0; if (smc_start_address & 3) return -EINVAL; @@ -307,13 +308,14 @@ int rv770_copy_bytes_to_smc(struct radeon_device *rdev, addr = smc_start_address; + spin_lock_irqsave(&rdev->smc_idx_lock, flags); while (byte_count >= 4) { /* SMC address space is BE */ data = (src[0] << 24) | (src[1] << 16) | (src[2] << 8) | src[3]; ret = rv770_set_smc_sram_address(rdev, addr, limit); if (ret) - return ret; + goto done; WREG32(SMC_SRAM_DATA, data); @@ -328,7 +330,7 @@ int rv770_copy_bytes_to_smc(struct radeon_device *rdev, ret = rv770_set_smc_sram_address(rdev, addr, limit); if (ret) - return ret; + goto done; original_data = RREG32(SMC_SRAM_DATA); @@ -346,12 +348,15 @@ int rv770_copy_bytes_to_smc(struct radeon_device *rdev, ret = rv770_set_smc_sram_address(rdev, addr, limit); if (ret) - return ret; + goto done; WREG32(SMC_SRAM_DATA, data); } - return 0; +done: + spin_unlock_irqrestore(&rdev->smc_idx_lock, flags); + + return ret; } static int rv770_program_interrupt_vectors(struct radeon_device *rdev, @@ -461,12 +466,15 @@ PPSMC_Result rv770_wait_for_smc_inactive(struct radeon_device *rdev) static void rv770_clear_smc_sram(struct radeon_device *rdev, u16 limit) { + unsigned long flags; u16 i; + spin_lock_irqsave(&rdev->smc_idx_lock, flags); for (i = 0; i < limit; i += 4) { rv770_set_smc_sram_address(rdev, i, limit); WREG32(SMC_SRAM_DATA, 0); } + spin_unlock_irqrestore(&rdev->smc_idx_lock, flags); } int rv770_load_smc_ucode(struct radeon_device *rdev, @@ -595,27 +603,29 @@ int rv770_load_smc_ucode(struct radeon_device *rdev, int rv770_read_smc_sram_dword(struct radeon_device *rdev, u16 smc_address, u32 *value, u16 limit) { + unsigned long flags; int ret; + spin_lock_irqsave(&rdev->smc_idx_lock, flags); ret = rv770_set_smc_sram_address(rdev, smc_address, limit); - if (ret) - return ret; - - *value = RREG32(SMC_SRAM_DATA); + if (ret == 0) + *value = RREG32(SMC_SRAM_DATA); + spin_unlock_irqrestore(&rdev->smc_idx_lock, flags); - return 0; + return ret; } int rv770_write_smc_sram_dword(struct radeon_device *rdev, u16 smc_address, u32 value, u16 limit) { + unsigned long flags; int ret; + spin_lock_irqsave(&rdev->smc_idx_lock, flags); ret = rv770_set_smc_sram_address(rdev, smc_address, limit); - if (ret) - return ret; + if (ret == 0) + WREG32(SMC_SRAM_DATA, value); + spin_unlock_irqrestore(&rdev->smc_idx_lock, flags); - WREG32(SMC_SRAM_DATA, value); - - return 0; + return ret; } diff --git a/drivers/gpu/drm/radeon/rv770_smc.h b/drivers/gpu/drm/radeon/rv770_smc.h index f78d92a4b325..3b2c963c4880 100644 --- a/drivers/gpu/drm/radeon/rv770_smc.h +++ b/drivers/gpu/drm/radeon/rv770_smc.h @@ -187,8 +187,6 @@ typedef struct RV770_SMC_STATETABLE RV770_SMC_STATETABLE; #define RV770_SMC_SOFT_REGISTER_uvd_enabled 0x9C #define RV770_SMC_SOFT_REGISTER_is_asic_lombok 0xA0 -int rv770_set_smc_sram_address(struct radeon_device *rdev, - u16 smc_address, u16 limit); int rv770_copy_bytes_to_smc(struct radeon_device *rdev, u16 smc_start_address, const u8 *src, u16 byte_count, u16 limit); diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h index 9fe60e542922..1ae277152cc7 100644 --- a/drivers/gpu/drm/radeon/rv770d.h +++ b/drivers/gpu/drm/radeon/rv770d.h @@ -852,7 +852,7 @@ #define AFMT_VBI_PACKET_CONTROL 0x7608 # define AFMT_GENERIC0_UPDATE (1 << 2) #define AFMT_INFOFRAME_CONTROL0 0x760c -# define AFMT_AUDIO_INFO_SOURCE (1 << 6) /* 0 - sound block; 1 - hmdi regs */ +# define AFMT_AUDIO_INFO_SOURCE (1 << 6) /* 0 - sound block; 1 - hdmi regs */ # define AFMT_AUDIO_INFO_UPDATE (1 << 7) # define AFMT_MPEG_INFO_UPDATE (1 << 10) #define AFMT_GENERIC0_7 0x7610 diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 3e23b757dcfa..c354c1094967 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -83,6 +83,8 @@ extern void si_dma_vm_set_page(struct radeon_device *rdev, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags); +static void si_enable_gui_idle_interrupt(struct radeon_device *rdev, + bool enable); static const u32 verde_rlc_save_restore_register_list[] = { @@ -3386,6 +3388,8 @@ static int si_cp_resume(struct radeon_device *rdev) u32 rb_bufsz; int r; + si_enable_gui_idle_interrupt(rdev, false); + WREG32(CP_SEM_WAIT_TIMER, 0x0); WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0); @@ -3501,6 +3505,8 @@ static int si_cp_resume(struct radeon_device *rdev) rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; } + si_enable_gui_idle_interrupt(rdev, true); + return 0; } @@ -4888,7 +4894,7 @@ static void si_enable_gfx_cgpg(struct radeon_device *rdev, { u32 tmp; - if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG)) { + if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) { tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10); WREG32(RLC_TTOP_D, tmp); @@ -5250,6 +5256,7 @@ void si_update_cg(struct radeon_device *rdev, u32 block, bool enable) { if (block & RADEON_CG_BLOCK_GFX) { + si_enable_gui_idle_interrupt(rdev, false); /* order matters! */ if (enable) { si_enable_mgcg(rdev, true); @@ -5258,6 +5265,7 @@ void si_update_cg(struct radeon_device *rdev, si_enable_cgcg(rdev, false); si_enable_mgcg(rdev, false); } + si_enable_gui_idle_interrupt(rdev, true); } if (block & RADEON_CG_BLOCK_MC) { @@ -5408,7 +5416,7 @@ static void si_init_pg(struct radeon_device *rdev) si_init_dma_pg(rdev); } si_init_ao_cu_mask(rdev); - if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG) { + if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) { si_init_gfx_cgpg(rdev); } si_enable_dma_pg(rdev, true); @@ -5560,7 +5568,9 @@ static void si_disable_interrupt_state(struct radeon_device *rdev) { u32 tmp; - WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); + tmp = RREG32(CP_INT_CNTL_RING0) & + (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); + WREG32(CP_INT_CNTL_RING0, tmp); WREG32(CP_INT_CNTL_RING1, 0); WREG32(CP_INT_CNTL_RING2, 0); tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE; @@ -5685,7 +5695,7 @@ static int si_irq_init(struct radeon_device *rdev) int si_irq_set(struct radeon_device *rdev) { - u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE; + u32 cp_int_cntl; u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0; u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0; u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0; @@ -5706,6 +5716,9 @@ int si_irq_set(struct radeon_device *rdev) return 0; } + cp_int_cntl = RREG32(CP_INT_CNTL_RING0) & + (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); + if (!ASIC_IS_NODCE(rdev)) { hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN; hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN; diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 5be9b4e72350..cfe5d4d28915 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -6075,12 +6075,6 @@ int si_dpm_set_power_state(struct radeon_device *rdev) return ret; } - ret = si_dpm_force_performance_level(rdev, RADEON_DPM_FORCED_LEVEL_AUTO); - if (ret) { - DRM_ERROR("si_dpm_force_performance_level failed\n"); - return ret; - } - si_update_cg(rdev, (RADEON_CG_BLOCK_GFX | RADEON_CG_BLOCK_MC | RADEON_CG_BLOCK_SDMA | diff --git a/drivers/gpu/drm/radeon/si_smc.c b/drivers/gpu/drm/radeon/si_smc.c index 5f524c0a541e..d422a1cbf727 100644 --- a/drivers/gpu/drm/radeon/si_smc.c +++ b/drivers/gpu/drm/radeon/si_smc.c @@ -29,8 +29,8 @@ #include "ppsmc.h" #include "radeon_ucode.h" -int si_set_smc_sram_address(struct radeon_device *rdev, - u32 smc_address, u32 limit) +static int si_set_smc_sram_address(struct radeon_device *rdev, + u32 smc_address, u32 limit) { if (smc_address & 3) return -EINVAL; @@ -47,7 +47,8 @@ int si_copy_bytes_to_smc(struct radeon_device *rdev, u32 smc_start_address, const u8 *src, u32 byte_count, u32 limit) { - int ret; + unsigned long flags; + int ret = 0; u32 data, original_data, addr, extra_shift; if (smc_start_address & 3) @@ -57,13 +58,14 @@ int si_copy_bytes_to_smc(struct radeon_device *rdev, addr = smc_start_address; + spin_lock_irqsave(&rdev->smc_idx_lock, flags); while (byte_count >= 4) { /* SMC address space is BE */ data = (src[0] << 24) | (src[1] << 16) | (src[2] << 8) | src[3]; ret = si_set_smc_sram_address(rdev, addr, limit); if (ret) - return ret; + goto done; WREG32(SMC_IND_DATA_0, data); @@ -78,7 +80,7 @@ int si_copy_bytes_to_smc(struct radeon_device *rdev, ret = si_set_smc_sram_address(rdev, addr, limit); if (ret) - return ret; + goto done; original_data = RREG32(SMC_IND_DATA_0); @@ -96,11 +98,15 @@ int si_copy_bytes_to_smc(struct radeon_device *rdev, ret = si_set_smc_sram_address(rdev, addr, limit); if (ret) - return ret; + goto done; WREG32(SMC_IND_DATA_0, data); } - return 0; + +done: + spin_unlock_irqrestore(&rdev->smc_idx_lock, flags); + + return ret; } void si_start_smc(struct radeon_device *rdev) @@ -203,6 +209,7 @@ PPSMC_Result si_wait_for_smc_inactive(struct radeon_device *rdev) int si_load_smc_ucode(struct radeon_device *rdev, u32 limit) { + unsigned long flags; u32 ucode_start_address; u32 ucode_size; const u8 *src; @@ -241,6 +248,7 @@ int si_load_smc_ucode(struct radeon_device *rdev, u32 limit) return -EINVAL; src = (const u8 *)rdev->smc_fw->data; + spin_lock_irqsave(&rdev->smc_idx_lock, flags); WREG32(SMC_IND_INDEX_0, ucode_start_address); WREG32_P(SMC_IND_ACCESS_CNTL, AUTO_INCREMENT_IND_0, ~AUTO_INCREMENT_IND_0); while (ucode_size >= 4) { @@ -253,6 +261,7 @@ int si_load_smc_ucode(struct radeon_device *rdev, u32 limit) ucode_size -= 4; } WREG32_P(SMC_IND_ACCESS_CNTL, 0, ~AUTO_INCREMENT_IND_0); + spin_unlock_irqrestore(&rdev->smc_idx_lock, flags); return 0; } @@ -260,25 +269,29 @@ int si_load_smc_ucode(struct radeon_device *rdev, u32 limit) int si_read_smc_sram_dword(struct radeon_device *rdev, u32 smc_address, u32 *value, u32 limit) { + unsigned long flags; int ret; + spin_lock_irqsave(&rdev->smc_idx_lock, flags); ret = si_set_smc_sram_address(rdev, smc_address, limit); - if (ret) - return ret; + if (ret == 0) + *value = RREG32(SMC_IND_DATA_0); + spin_unlock_irqrestore(&rdev->smc_idx_lock, flags); - *value = RREG32(SMC_IND_DATA_0); - return 0; + return ret; } int si_write_smc_sram_dword(struct radeon_device *rdev, u32 smc_address, u32 value, u32 limit) { + unsigned long flags; int ret; + spin_lock_irqsave(&rdev->smc_idx_lock, flags); ret = si_set_smc_sram_address(rdev, smc_address, limit); - if (ret) - return ret; + if (ret == 0) + WREG32(SMC_IND_DATA_0, value); + spin_unlock_irqrestore(&rdev->smc_idx_lock, flags); - WREG32(SMC_IND_DATA_0, value); - return 0; + return ret; } diff --git a/drivers/gpu/drm/radeon/sumo_dpm.c b/drivers/gpu/drm/radeon/sumo_dpm.c index 864761c0120e..96ea6db8bf57 100644 --- a/drivers/gpu/drm/radeon/sumo_dpm.c +++ b/drivers/gpu/drm/radeon/sumo_dpm.c @@ -1319,8 +1319,6 @@ int sumo_dpm_set_power_state(struct radeon_device *rdev) if (pi->enable_dpm) sumo_set_uvd_clock_after_set_eng_clock(rdev, new_ps, old_ps); - rdev->pm.dpm.forced_level = RADEON_DPM_FORCED_LEVEL_AUTO; - return 0; } diff --git a/drivers/gpu/drm/radeon/trinity_dpm.c b/drivers/gpu/drm/radeon/trinity_dpm.c index b07b7b8f1aff..7f998bf1cc9d 100644 --- a/drivers/gpu/drm/radeon/trinity_dpm.c +++ b/drivers/gpu/drm/radeon/trinity_dpm.c @@ -1068,6 +1068,17 @@ static void trinity_update_requested_ps(struct radeon_device *rdev, pi->requested_rps.ps_priv = &pi->requested_ps; } +void trinity_dpm_enable_bapm(struct radeon_device *rdev, bool enable) +{ + struct trinity_power_info *pi = trinity_get_pi(rdev); + + if (pi->enable_bapm) { + trinity_acquire_mutex(rdev); + trinity_dpm_bapm_enable(rdev, enable); + trinity_release_mutex(rdev); + } +} + int trinity_dpm_enable(struct radeon_device *rdev) { struct trinity_power_info *pi = trinity_get_pi(rdev); @@ -1091,6 +1102,7 @@ int trinity_dpm_enable(struct radeon_device *rdev) trinity_program_sclk_dpm(rdev); trinity_start_dpm(rdev); trinity_wait_for_dpm_enabled(rdev); + trinity_dpm_bapm_enable(rdev, false); trinity_release_mutex(rdev); if (rdev->irq.installed && @@ -1116,6 +1128,7 @@ void trinity_dpm_disable(struct radeon_device *rdev) trinity_release_mutex(rdev); return; } + trinity_dpm_bapm_enable(rdev, false); trinity_disable_clock_power_gating(rdev); sumo_clear_vc(rdev); trinity_wait_for_level_0(rdev); @@ -1212,6 +1225,8 @@ int trinity_dpm_set_power_state(struct radeon_device *rdev) trinity_acquire_mutex(rdev); if (pi->enable_dpm) { + if (pi->enable_bapm) + trinity_dpm_bapm_enable(rdev, rdev->pm.dpm.ac_power); trinity_set_uvd_clock_before_set_eng_clock(rdev, new_ps, old_ps); trinity_enable_power_level_0(rdev); trinity_force_level_0(rdev); @@ -1221,7 +1236,6 @@ int trinity_dpm_set_power_state(struct radeon_device *rdev) trinity_force_level_0(rdev); trinity_unforce_levels(rdev); trinity_set_uvd_clock_after_set_eng_clock(rdev, new_ps, old_ps); - rdev->pm.dpm.forced_level = RADEON_DPM_FORCED_LEVEL_AUTO; } trinity_release_mutex(rdev); @@ -1854,6 +1868,7 @@ int trinity_dpm_init(struct radeon_device *rdev) for (i = 0; i < SUMO_MAX_HARDWARE_POWERLEVELS; i++) pi->at[i] = TRINITY_AT_DFLT; + pi->enable_bapm = true; pi->enable_nbps_policy = true; pi->enable_sclk_ds = true; pi->enable_gfx_power_gating = true; diff --git a/drivers/gpu/drm/radeon/trinity_dpm.h b/drivers/gpu/drm/radeon/trinity_dpm.h index e82df071f8b3..c261657750ca 100644 --- a/drivers/gpu/drm/radeon/trinity_dpm.h +++ b/drivers/gpu/drm/radeon/trinity_dpm.h @@ -108,6 +108,7 @@ struct trinity_power_info { bool enable_auto_thermal_throttling; bool enable_dpm; bool enable_sclk_ds; + bool enable_bapm; bool uvd_dpm; struct radeon_ps current_rps; struct trinity_ps current_ps; @@ -118,6 +119,7 @@ struct trinity_power_info { #define TRINITY_AT_DFLT 30 /* trinity_smc.c */ +int trinity_dpm_bapm_enable(struct radeon_device *rdev, bool enable); int trinity_dpm_config(struct radeon_device *rdev, bool enable); int trinity_uvd_dpm_config(struct radeon_device *rdev); int trinity_dpm_force_state(struct radeon_device *rdev, u32 n); diff --git a/drivers/gpu/drm/radeon/trinity_smc.c b/drivers/gpu/drm/radeon/trinity_smc.c index a42d89f1830c..9672bcbc7312 100644 --- a/drivers/gpu/drm/radeon/trinity_smc.c +++ b/drivers/gpu/drm/radeon/trinity_smc.c @@ -56,6 +56,14 @@ static int trinity_notify_message_to_smu(struct radeon_device *rdev, u32 id) return 0; } +int trinity_dpm_bapm_enable(struct radeon_device *rdev, bool enable) +{ + if (enable) + return trinity_notify_message_to_smu(rdev, PPSMC_MSG_EnableBAPM); + else + return trinity_notify_message_to_smu(rdev, PPSMC_MSG_DisableBAPM); +} + int trinity_dpm_config(struct radeon_device *rdev, bool enable) { if (enable) diff --git a/drivers/gpu/drm/ttm/ttm_object.c b/drivers/gpu/drm/ttm/ttm_object.c index 58a5f3261c0b..a868176c258a 100644 --- a/drivers/gpu/drm/ttm/ttm_object.c +++ b/drivers/gpu/drm/ttm/ttm_object.c @@ -218,7 +218,7 @@ struct ttm_base_object *ttm_base_object_lookup(struct ttm_object_file *tfile, uint32_t key) { struct ttm_object_device *tdev = tfile->tdev; - struct ttm_base_object *base; + struct ttm_base_object *uninitialized_var(base); struct drm_hash_item *hash; int ret; diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index bd2a3b40cd12..863bef9f9234 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -377,28 +377,26 @@ out: return nr_free; } -/* Get good estimation how many pages are free in pools */ -static int ttm_pool_get_num_unused_pages(void) -{ - unsigned i; - int total = 0; - for (i = 0; i < NUM_POOLS; ++i) - total += _manager->pools[i].npages; - - return total; -} - /** * Callback for mm to request pool to reduce number of page held. + * + * XXX: (dchinner) Deadlock warning! + * + * ttm_page_pool_free() does memory allocation using GFP_KERNEL. that means + * this can deadlock when called a sc->gfp_mask that is not equal to + * GFP_KERNEL. + * + * This code is crying out for a shrinker per pool.... */ -static int ttm_pool_mm_shrink(struct shrinker *shrink, - struct shrink_control *sc) +static unsigned long +ttm_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { static atomic_t start_pool = ATOMIC_INIT(0); unsigned i; unsigned pool_offset = atomic_add_return(1, &start_pool); struct ttm_page_pool *pool; int shrink_pages = sc->nr_to_scan; + unsigned long freed = 0; pool_offset = pool_offset % NUM_POOLS; /* select start pool in round robin fashion */ @@ -408,14 +406,28 @@ static int ttm_pool_mm_shrink(struct shrinker *shrink, break; pool = &_manager->pools[(i + pool_offset)%NUM_POOLS]; shrink_pages = ttm_page_pool_free(pool, nr_free); + freed += nr_free - shrink_pages; } - /* return estimated number of unused pages in pool */ - return ttm_pool_get_num_unused_pages(); + return freed; +} + + +static unsigned long +ttm_pool_shrink_count(struct shrinker *shrink, struct shrink_control *sc) +{ + unsigned i; + unsigned long count = 0; + + for (i = 0; i < NUM_POOLS; ++i) + count += _manager->pools[i].npages; + + return count; } static void ttm_pool_mm_shrink_init(struct ttm_pool_manager *manager) { - manager->mm_shrink.shrink = &ttm_pool_mm_shrink; + manager->mm_shrink.count_objects = ttm_pool_shrink_count; + manager->mm_shrink.scan_objects = ttm_pool_shrink_scan; manager->mm_shrink.seeks = 1; register_shrinker(&manager->mm_shrink); } diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c index b8b394319b45..7957beeeaf73 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c @@ -918,19 +918,6 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev) } EXPORT_SYMBOL_GPL(ttm_dma_populate); -/* Get good estimation how many pages are free in pools */ -static int ttm_dma_pool_get_num_unused_pages(void) -{ - struct device_pools *p; - unsigned total = 0; - - mutex_lock(&_manager->lock); - list_for_each_entry(p, &_manager->pools, pools) - total += p->pool->npages_free; - mutex_unlock(&_manager->lock); - return total; -} - /* Put all pages in pages list to correct pool to wait for reuse */ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev) { @@ -1002,18 +989,29 @@ EXPORT_SYMBOL_GPL(ttm_dma_unpopulate); /** * Callback for mm to request pool to reduce number of page held. + * + * XXX: (dchinner) Deadlock warning! + * + * ttm_dma_page_pool_free() does GFP_KERNEL memory allocation, and so attention + * needs to be paid to sc->gfp_mask to determine if this can be done or not. + * GFP_KERNEL memory allocation in a GFP_ATOMIC reclaim context woul dbe really + * bad. + * + * I'm getting sadder as I hear more pathetical whimpers about needing per-pool + * shrinkers */ -static int ttm_dma_pool_mm_shrink(struct shrinker *shrink, - struct shrink_control *sc) +static unsigned long +ttm_dma_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { static atomic_t start_pool = ATOMIC_INIT(0); unsigned idx = 0; unsigned pool_offset = atomic_add_return(1, &start_pool); unsigned shrink_pages = sc->nr_to_scan; struct device_pools *p; + unsigned long freed = 0; if (list_empty(&_manager->pools)) - return 0; + return SHRINK_STOP; mutex_lock(&_manager->lock); pool_offset = pool_offset % _manager->npools; @@ -1029,18 +1027,33 @@ static int ttm_dma_pool_mm_shrink(struct shrinker *shrink, continue; nr_free = shrink_pages; shrink_pages = ttm_dma_page_pool_free(p->pool, nr_free); + freed += nr_free - shrink_pages; + pr_debug("%s: (%s:%d) Asked to shrink %d, have %d more to go\n", p->pool->dev_name, p->pool->name, current->pid, nr_free, shrink_pages); } mutex_unlock(&_manager->lock); - /* return estimated number of unused pages in pool */ - return ttm_dma_pool_get_num_unused_pages(); + return freed; +} + +static unsigned long +ttm_dma_pool_shrink_count(struct shrinker *shrink, struct shrink_control *sc) +{ + struct device_pools *p; + unsigned long count = 0; + + mutex_lock(&_manager->lock); + list_for_each_entry(p, &_manager->pools, pools) + count += p->pool->npages_free; + mutex_unlock(&_manager->lock); + return count; } static void ttm_dma_pool_mm_shrink_init(struct ttm_pool_manager *manager) { - manager->mm_shrink.shrink = &ttm_dma_pool_mm_shrink; + manager->mm_shrink.count_objects = ttm_dma_pool_shrink_count; + manager->mm_shrink.scan_objects = &ttm_dma_pool_shrink_scan; manager->mm_shrink.seeks = 1; register_shrinker(&manager->mm_shrink); } diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 5e93a52d4f2c..210d50365162 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -170,7 +170,7 @@ void ttm_tt_destroy(struct ttm_tt *ttm) ttm_tt_unbind(ttm); } - if (likely(ttm->pages != NULL)) { + if (ttm->state == tt_unbound) { ttm->bdev->driver->ttm_tt_unpopulate(ttm); } diff --git a/drivers/gpu/drm/udl/udl_gem.c b/drivers/gpu/drm/udl/udl_gem.c index 8dbe9d0ae9a7..8bf646183bac 100644 --- a/drivers/gpu/drm/udl/udl_gem.c +++ b/drivers/gpu/drm/udl/udl_gem.c @@ -97,7 +97,6 @@ int udl_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ret = vm_insert_page(vma, (unsigned long)vmf->virtual_address, page); switch (ret) { case -EAGAIN: - set_need_resched(); case 0: case -ERESTARTSYS: return VM_FAULT_NOPAGE; diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 3d7c9f67b6d7..71b70e3a7a71 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -773,7 +773,7 @@ config HID_ZYDACRON config HID_SENSOR_HUB tristate "HID Sensors framework support" - depends on HID && GENERIC_HARDIRQS + depends on HID select MFD_CORE default n ---help--- diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index ae88a97f976e..b8470b1a10fe 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -94,7 +94,6 @@ EXPORT_SYMBOL_GPL(hid_register_report); static struct hid_field *hid_register_field(struct hid_report *report, unsigned usages, unsigned values) { struct hid_field *field; - int i; if (report->maxfield == HID_MAX_FIELDS) { hid_err(report->device, "too many fields in report\n"); @@ -113,9 +112,6 @@ static struct hid_field *hid_register_field(struct hid_report *report, unsigned field->value = (s32 *)(field->usage + usages); field->report = report; - for (i = 0; i < usages; i++) - field->usage[i].usage_index = i; - return field; } @@ -226,9 +222,9 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign { struct hid_report *report; struct hid_field *field; - int usages; + unsigned usages; unsigned offset; - int i; + unsigned i; report = hid_register_report(parser->device, report_type, parser->global.report_id); if (!report) { @@ -255,7 +251,8 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign if (!parser->local.usage_index) /* Ignore padding fields */ return 0; - usages = max_t(int, parser->local.usage_index, parser->global.report_count); + usages = max_t(unsigned, parser->local.usage_index, + parser->global.report_count); field = hid_register_field(report, usages, parser->global.report_count); if (!field) @@ -266,13 +263,14 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign field->application = hid_lookup_collection(parser, HID_COLLECTION_APPLICATION); for (i = 0; i < usages; i++) { - int j = i; + unsigned j = i; /* Duplicate the last usage we parsed if we have excess values */ if (i >= parser->local.usage_index) j = parser->local.usage_index - 1; field->usage[i].hid = parser->local.usage[j]; field->usage[i].collection_index = parser->local.collection_index[j]; + field->usage[i].usage_index = i; } field->maxusage = usages; @@ -801,6 +799,64 @@ int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size) } EXPORT_SYMBOL_GPL(hid_parse_report); +static const char * const hid_report_names[] = { + "HID_INPUT_REPORT", + "HID_OUTPUT_REPORT", + "HID_FEATURE_REPORT", +}; +/** + * hid_validate_values - validate existing device report's value indexes + * + * @device: hid device + * @type: which report type to examine + * @id: which report ID to examine (0 for first) + * @field_index: which report field to examine + * @report_counts: expected number of values + * + * Validate the number of values in a given field of a given report, after + * parsing. + */ +struct hid_report *hid_validate_values(struct hid_device *hid, + unsigned int type, unsigned int id, + unsigned int field_index, + unsigned int report_counts) +{ + struct hid_report *report; + + if (type > HID_FEATURE_REPORT) { + hid_err(hid, "invalid HID report type %u\n", type); + return NULL; + } + + if (id >= HID_MAX_IDS) { + hid_err(hid, "invalid HID report id %u\n", id); + return NULL; + } + + /* + * Explicitly not using hid_get_report() here since it depends on + * ->numbered being checked, which may not always be the case when + * drivers go to access report values. + */ + report = hid->report_enum[type].report_id_hash[id]; + if (!report) { + hid_err(hid, "missing %s %u\n", hid_report_names[type], id); + return NULL; + } + if (report->maxfield <= field_index) { + hid_err(hid, "not enough fields in %s %u\n", + hid_report_names[type], id); + return NULL; + } + if (report->field[field_index]->report_count < report_counts) { + hid_err(hid, "not enough values in %s %u field %u\n", + hid_report_names[type], id, field_index); + return NULL; + } + return report; +} +EXPORT_SYMBOL_GPL(hid_validate_values); + /** * hid_open_report - open a driver-specific device report * @@ -1296,7 +1352,7 @@ int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, int size, goto out; } - if (hid->claimed != HID_CLAIMED_HIDRAW) { + if (hid->claimed != HID_CLAIMED_HIDRAW && report->maxfield) { for (a = 0; a < report->maxfield; a++) hid_input_field(hid, report->field[a], cdata, interrupt); hdrv = hid->driver; diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index b420f4a0fd28..8741d953dcc8 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -485,6 +485,10 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel if (field->flags & HID_MAIN_ITEM_CONSTANT) goto ignore; + /* Ignore if report count is out of bounds. */ + if (field->report_count < 1) + goto ignore; + /* only LED usages are supported in output fields */ if (field->report_type == HID_OUTPUT_REPORT && (usage->hid & HID_USAGE_PAGE) != HID_UP_LED) { @@ -1236,7 +1240,11 @@ static void report_features(struct hid_device *hid) rep_enum = &hid->report_enum[HID_FEATURE_REPORT]; list_for_each_entry(rep, &rep_enum->report_list, list) - for (i = 0; i < rep->maxfield; i++) + for (i = 0; i < rep->maxfield; i++) { + /* Ignore if report count is out of bounds. */ + if (rep->field[i]->report_count < 1) + continue; + for (j = 0; j < rep->field[i]->maxusage; j++) { /* Verify if Battery Strength feature is available */ hidinput_setup_battery(hid, HID_FEATURE_REPORT, rep->field[i]); @@ -1245,6 +1253,7 @@ static void report_features(struct hid_device *hid) drv->feature_mapping(hid, rep->field[i], rep->field[i]->usage + j); } + } } static struct hid_input *hidinput_allocate(struct hid_device *hid) diff --git a/drivers/hid/hid-lenovo-tpkbd.c b/drivers/hid/hid-lenovo-tpkbd.c index 07837f5a4eb8..31cf29a6ba17 100644 --- a/drivers/hid/hid-lenovo-tpkbd.c +++ b/drivers/hid/hid-lenovo-tpkbd.c @@ -339,7 +339,15 @@ static int tpkbd_probe_tp(struct hid_device *hdev) struct tpkbd_data_pointer *data_pointer; size_t name_sz = strlen(dev_name(dev)) + 16; char *name_mute, *name_micmute; - int ret; + int i, ret; + + /* Validate required reports. */ + for (i = 0; i < 4; i++) { + if (!hid_validate_values(hdev, HID_FEATURE_REPORT, 4, i, 1)) + return -ENODEV; + } + if (!hid_validate_values(hdev, HID_OUTPUT_REPORT, 3, 0, 2)) + return -ENODEV; if (sysfs_create_group(&hdev->dev.kobj, &tpkbd_attr_group_pointer)) { @@ -406,22 +414,27 @@ static int tpkbd_probe(struct hid_device *hdev, ret = hid_parse(hdev); if (ret) { hid_err(hdev, "hid_parse failed\n"); - goto err_free; + goto err; } ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); if (ret) { hid_err(hdev, "hid_hw_start failed\n"); - goto err_free; + goto err; } uhdev = (struct usbhid_device *) hdev->driver_data; - if (uhdev->ifnum == 1) - return tpkbd_probe_tp(hdev); + if (uhdev->ifnum == 1) { + ret = tpkbd_probe_tp(hdev); + if (ret) + goto err_hid; + } return 0; -err_free: +err_hid: + hid_hw_stop(hdev); +err: return ret; } diff --git a/drivers/hid/hid-lg2ff.c b/drivers/hid/hid-lg2ff.c index b3cd1507dda2..1a42eaa6ca02 100644 --- a/drivers/hid/hid-lg2ff.c +++ b/drivers/hid/hid-lg2ff.c @@ -64,26 +64,13 @@ int lg2ff_init(struct hid_device *hid) struct hid_report *report; struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list); - struct list_head *report_list = - &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct input_dev *dev = hidinput->input; int error; - if (list_empty(report_list)) { - hid_err(hid, "no output report found\n"); + /* Check that the report looks ok */ + report = hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7); + if (!report) return -ENODEV; - } - - report = list_entry(report_list->next, struct hid_report, list); - - if (report->maxfield < 1) { - hid_err(hid, "output report is empty\n"); - return -ENODEV; - } - if (report->field[0]->report_count < 7) { - hid_err(hid, "not enough values in the field\n"); - return -ENODEV; - } lg2ff = kmalloc(sizeof(struct lg2ff_device), GFP_KERNEL); if (!lg2ff) diff --git a/drivers/hid/hid-lg3ff.c b/drivers/hid/hid-lg3ff.c index e52f181f6aa1..8c2da183d3bc 100644 --- a/drivers/hid/hid-lg3ff.c +++ b/drivers/hid/hid-lg3ff.c @@ -66,10 +66,11 @@ static int hid_lg3ff_play(struct input_dev *dev, void *data, int x, y; /* - * Maxusage should always be 63 (maximum fields) - * likely a better way to ensure this data is clean + * Available values in the field should always be 63, but we only use up to + * 35. Instead, clear the entire area, however big it is. */ - memset(report->field[0]->value, 0, sizeof(__s32)*report->field[0]->maxusage); + memset(report->field[0]->value, 0, + sizeof(__s32) * report->field[0]->report_count); switch (effect->type) { case FF_CONSTANT: @@ -129,32 +130,14 @@ static const signed short ff3_joystick_ac[] = { int lg3ff_init(struct hid_device *hid) { struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list); - struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct input_dev *dev = hidinput->input; - struct hid_report *report; - struct hid_field *field; const signed short *ff_bits = ff3_joystick_ac; int error; int i; - /* Find the report to use */ - if (list_empty(report_list)) { - hid_err(hid, "No output report found\n"); - return -1; - } - /* Check that the report looks ok */ - report = list_entry(report_list->next, struct hid_report, list); - if (!report) { - hid_err(hid, "NULL output report\n"); - return -1; - } - - field = report->field[0]; - if (!field) { - hid_err(hid, "NULL field\n"); - return -1; - } + if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 35)) + return -ENODEV; /* Assume single fixed device G940 */ for (i = 0; ff_bits[i] >= 0; i++) diff --git a/drivers/hid/hid-lg4ff.c b/drivers/hid/hid-lg4ff.c index 0ddae2a00d59..8782fe1aaa07 100644 --- a/drivers/hid/hid-lg4ff.c +++ b/drivers/hid/hid-lg4ff.c @@ -484,34 +484,16 @@ static enum led_brightness lg4ff_led_get_brightness(struct led_classdev *led_cde int lg4ff_init(struct hid_device *hid) { struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list); - struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct input_dev *dev = hidinput->input; - struct hid_report *report; - struct hid_field *field; struct lg4ff_device_entry *entry; struct lg_drv_data *drv_data; struct usb_device_descriptor *udesc; int error, i, j; __u16 bcdDevice, rev_maj, rev_min; - /* Find the report to use */ - if (list_empty(report_list)) { - hid_err(hid, "No output report found\n"); - return -1; - } - /* Check that the report looks ok */ - report = list_entry(report_list->next, struct hid_report, list); - if (!report) { - hid_err(hid, "NULL output report\n"); + if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7)) return -1; - } - - field = report->field[0]; - if (!field) { - hid_err(hid, "NULL field\n"); - return -1; - } /* Check what wheel has been connected */ for (i = 0; i < ARRAY_SIZE(lg4ff_devices); i++) { diff --git a/drivers/hid/hid-lgff.c b/drivers/hid/hid-lgff.c index d7ea8c845b40..e1394af0ae7b 100644 --- a/drivers/hid/hid-lgff.c +++ b/drivers/hid/hid-lgff.c @@ -128,27 +128,14 @@ static void hid_lgff_set_autocenter(struct input_dev *dev, u16 magnitude) int lgff_init(struct hid_device* hid) { struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list); - struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct input_dev *dev = hidinput->input; - struct hid_report *report; - struct hid_field *field; const signed short *ff_bits = ff_joystick; int error; int i; - /* Find the report to use */ - if (list_empty(report_list)) { - hid_err(hid, "No output report found\n"); - return -1; - } - /* Check that the report looks ok */ - report = list_entry(report_list->next, struct hid_report, list); - field = report->field[0]; - if (!field) { - hid_err(hid, "NULL field\n"); - return -1; - } + if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7)) + return -ENODEV; for (i = 0; i < ARRAY_SIZE(devices); i++) { if (dev->id.vendor == devices[i].idVendor && diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index 7800b1410562..2e5302462efb 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -461,7 +461,7 @@ static int logi_dj_recv_send_report(struct dj_receiver_dev *djrcv_dev, struct hid_report *report; struct hid_report_enum *output_report_enum; u8 *data = (u8 *)(&dj_report->device_index); - int i; + unsigned int i; output_report_enum = &hdev->report_enum[HID_OUTPUT_REPORT]; report = output_report_enum->report_id_hash[REPORT_ID_DJ_SHORT]; @@ -471,7 +471,7 @@ static int logi_dj_recv_send_report(struct dj_receiver_dev *djrcv_dev, return -ENODEV; } - for (i = 0; i < report->field[0]->report_count; i++) + for (i = 0; i < DJREPORT_SHORT_LENGTH - 1; i++) report->field[0]->value[i] = data[i]; hid_hw_request(hdev, report, HID_REQ_SET_REPORT); @@ -791,6 +791,12 @@ static int logi_dj_probe(struct hid_device *hdev, goto hid_parse_fail; } + if (!hid_validate_values(hdev, HID_OUTPUT_REPORT, REPORT_ID_DJ_SHORT, + 0, DJREPORT_SHORT_LENGTH - 1)) { + retval = -ENODEV; + goto hid_parse_fail; + } + /* Starts the usb device and connects to upper interfaces hiddev and * hidraw */ retval = hid_hw_start(hdev, HID_CONNECT_DEFAULT); diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index ac28f08c3866..5e5fe1b8eebb 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -101,9 +101,9 @@ struct mt_device { unsigned last_slot_field; /* the last field of a slot */ unsigned mt_report_id; /* the report ID of the multitouch device */ unsigned pen_report_id; /* the report ID of the pen device */ - __s8 inputmode; /* InputMode HID feature, -1 if non-existent */ - __s8 inputmode_index; /* InputMode HID feature index in the report */ - __s8 maxcontact_report_id; /* Maximum Contact Number HID feature, + __s16 inputmode; /* InputMode HID feature, -1 if non-existent */ + __s16 inputmode_index; /* InputMode HID feature index in the report */ + __s16 maxcontact_report_id; /* Maximum Contact Number HID feature, -1 if non-existent */ __u8 num_received; /* how many contacts we received */ __u8 num_expected; /* expected last contact index */ @@ -312,20 +312,18 @@ static void mt_feature_mapping(struct hid_device *hdev, struct hid_field *field, struct hid_usage *usage) { struct mt_device *td = hid_get_drvdata(hdev); - int i; switch (usage->hid) { case HID_DG_INPUTMODE: - td->inputmode = field->report->id; - td->inputmode_index = 0; /* has to be updated below */ - - for (i=0; i < field->maxusage; i++) { - if (field->usage[i].hid == usage->hid) { - td->inputmode_index = i; - break; - } + /* Ignore if value index is out of bounds. */ + if (usage->usage_index >= field->report_count) { + dev_err(&hdev->dev, "HID_DG_INPUTMODE out of range\n"); + break; } + td->inputmode = field->report->id; + td->inputmode_index = usage->usage_index; + break; case HID_DG_CONTACTMAX: td->maxcontact_report_id = field->report->id; @@ -511,6 +509,10 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi, mt_store_field(usage, td, hi); return 1; case HID_DG_CONTACTCOUNT: + /* Ignore if indexes are out of bounds. */ + if (field->index >= field->report->maxfield || + usage->usage_index >= field->report_count) + return 1; td->cc_index = field->index; td->cc_value_index = usage->usage_index; return 1; diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index 30dbb6b40bbf..b18320db5f7d 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -537,6 +537,10 @@ static int buzz_init(struct hid_device *hdev) drv_data = hid_get_drvdata(hdev); BUG_ON(!(drv_data->quirks & BUZZ_CONTROLLER)); + /* Validate expected report characteristics. */ + if (!hid_validate_values(hdev, HID_OUTPUT_REPORT, 0, 0, 7)) + return -ENODEV; + buzz = kzalloc(sizeof(*buzz), GFP_KERNEL); if (!buzz) { hid_err(hdev, "Insufficient memory, cannot allocate driver data\n"); diff --git a/drivers/hid/hid-steelseries.c b/drivers/hid/hid-steelseries.c index d16491192112..29f328f411fb 100644 --- a/drivers/hid/hid-steelseries.c +++ b/drivers/hid/hid-steelseries.c @@ -249,6 +249,11 @@ static int steelseries_srws1_probe(struct hid_device *hdev, goto err_free; } + if (!hid_validate_values(hdev, HID_OUTPUT_REPORT, 0, 0, 16)) { + ret = -ENODEV; + goto err_free; + } + ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); if (ret) { hid_err(hdev, "hw start failed\n"); diff --git a/drivers/hid/hid-zpff.c b/drivers/hid/hid-zpff.c index 6ec28a37c146..a29756c6ca02 100644 --- a/drivers/hid/hid-zpff.c +++ b/drivers/hid/hid-zpff.c @@ -68,21 +68,13 @@ static int zpff_init(struct hid_device *hid) struct hid_report *report; struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list); - struct list_head *report_list = - &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct input_dev *dev = hidinput->input; - int error; + int i, error; - if (list_empty(report_list)) { - hid_err(hid, "no output report found\n"); - return -ENODEV; - } - - report = list_entry(report_list->next, struct hid_report, list); - - if (report->maxfield < 4) { - hid_err(hid, "not enough fields in report\n"); - return -ENODEV; + for (i = 0; i < 4; i++) { + report = hid_validate_values(hid, HID_OUTPUT_REPORT, 0, i, 1); + if (!report) + return -ENODEV; } zpff = kzalloc(sizeof(struct zpff_device), GFP_KERNEL); diff --git a/drivers/hwmon/amc6821.c b/drivers/hwmon/amc6821.c index 4fe49d2bfe1d..eea817296513 100644 --- a/drivers/hwmon/amc6821.c +++ b/drivers/hwmon/amc6821.c @@ -364,7 +364,7 @@ static ssize_t set_pwm1_enable( if (config < 0) { dev_err(&client->dev, "Error reading configuration register, aborting.\n"); - return -EIO; + return config; } switch (val) { @@ -416,11 +416,9 @@ static ssize_t get_temp_auto_point_temp( case 1: return sprintf(buf, "%d\n", data->temp1_auto_point_temp[ix] * 1000); - break; case 2: return sprintf(buf, "%d\n", data->temp2_auto_point_temp[ix] * 1000); - break; default: dev_dbg(dev, "Unknown attr->nr (%d).\n", nr); return -EINVAL; @@ -513,7 +511,6 @@ static ssize_t set_temp_auto_point_temp( count = -EIO; } goto EXIT; - break; case 1: ptemp[1] = clamp_val(val / 1000, (ptemp[0] & 0x7C) + 4, 124); ptemp[1] &= 0x7C; @@ -665,7 +662,7 @@ static ssize_t set_fan1_div( if (config < 0) { dev_err(&client->dev, "Error reading configuration register, aborting.\n"); - return -EIO; + return config; } mutex_lock(&data->update_lock); switch (val) { diff --git a/drivers/hwmon/emc2103.c b/drivers/hwmon/emc2103.c index b07305622087..2c137b26acb4 100644 --- a/drivers/hwmon/emc2103.c +++ b/drivers/hwmon/emc2103.c @@ -248,7 +248,7 @@ static ssize_t set_temp_min(struct device *dev, struct device_attribute *da, int result = kstrtol(buf, 10, &val); if (result < 0) - return -EINVAL; + return result; val = DIV_ROUND_CLOSEST(val, 1000); if ((val < -63) || (val > 127)) @@ -272,7 +272,7 @@ static ssize_t set_temp_max(struct device *dev, struct device_attribute *da, int result = kstrtol(buf, 10, &val); if (result < 0) - return -EINVAL; + return result; val = DIV_ROUND_CLOSEST(val, 1000); if ((val < -63) || (val > 127)) @@ -320,7 +320,7 @@ static ssize_t set_fan_div(struct device *dev, struct device_attribute *da, int status = kstrtol(buf, 10, &new_div); if (status < 0) - return -EINVAL; + return status; if (new_div == old_div) /* No change */ return count; @@ -394,7 +394,7 @@ static ssize_t set_fan_target(struct device *dev, struct device_attribute *da, int result = kstrtol(buf, 10, &rpm_target); if (result < 0) - return -EINVAL; + return result; /* Datasheet states 16384 as maximum RPM target (table 3.2) */ if ((rpm_target < 0) || (rpm_target > 16384)) @@ -440,7 +440,7 @@ static ssize_t set_pwm_enable(struct device *dev, struct device_attribute *da, int result = kstrtol(buf, 10, &new_value); if (result < 0) - return -EINVAL; + return result; mutex_lock(&data->update_lock); switch (new_value) { diff --git a/drivers/hwmon/ibmaem.c b/drivers/hwmon/ibmaem.c index e2b56a2b756c..632f1dc0fe1f 100644 --- a/drivers/hwmon/ibmaem.c +++ b/drivers/hwmon/ibmaem.c @@ -292,7 +292,7 @@ static int aem_init_ipmi_data(struct aem_ipmi_data *data, int iface, dev_err(bmc, "Unable to register user with IPMI interface %d\n", data->interface); - return -EACCES; + return err; } return 0; diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c index e633856370cf..d65f3fd895dd 100644 --- a/drivers/hwmon/k10temp.c +++ b/drivers/hwmon/k10temp.c @@ -202,7 +202,6 @@ static void k10temp_remove(struct pci_dev *pdev) &sensor_dev_attr_temp1_crit.dev_attr); device_remove_file(&pdev->dev, &sensor_dev_attr_temp1_crit_hyst.dev_attr); - pci_set_drvdata(pdev, NULL); } static DEFINE_PCI_DEVICE_TABLE(k10temp_id_table) = { diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c index 964c1d688274..ae26b06fa819 100644 --- a/drivers/hwmon/tmp421.c +++ b/drivers/hwmon/tmp421.c @@ -210,7 +210,7 @@ static int tmp421_init_client(struct i2c_client *client) if (config < 0) { dev_err(&client->dev, "Could not read configuration register (%d)\n", config); - return -ENODEV; + return config; } config_orig = config; diff --git a/drivers/i2c/Kconfig b/drivers/i2c/Kconfig index e380c6eef3af..7b7ea320a258 100644 --- a/drivers/i2c/Kconfig +++ b/drivers/i2c/Kconfig @@ -75,7 +75,6 @@ config I2C_HELPER_AUTO config I2C_SMBUS tristate "SMBus-specific protocols" if !I2C_HELPER_AUTO - depends on GENERIC_HARDIRQS help Say Y here if you want support for SMBus extensions to the I2C specification. At the moment, the only supported extension is diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index fcdd321f709e..cdcbd8368ed3 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -115,7 +115,7 @@ config I2C_I801 config I2C_ISCH tristate "Intel SCH SMBus 1.0" - depends on PCI && GENERIC_HARDIRQS + depends on PCI select LPC_SCH help Say Y here if you want to use SMBus controller on the Intel SCH @@ -546,7 +546,6 @@ config I2C_NUC900 config I2C_OCORES tristate "OpenCores I2C Controller" - depends on GENERIC_HARDIRQS help If you say yes to this option, support will be included for the OpenCores I2C controller. For details see @@ -791,7 +790,7 @@ config I2C_DIOLAN_U2C config I2C_PARPORT tristate "Parallel port adapter" - depends on PARPORT && GENERIC_HARDIRQS + depends on PARPORT select I2C_ALGOBIT select I2C_SMBUS help @@ -816,7 +815,6 @@ config I2C_PARPORT config I2C_PARPORT_LIGHT tristate "Parallel port adapter (light)" - depends on GENERIC_HARDIRQS select I2C_ALGOBIT select I2C_SMBUS help diff --git a/drivers/i2c/busses/i2c-davinci.c b/drivers/i2c/busses/i2c-davinci.c index 57473415be10..132369fad4e0 100644 --- a/drivers/i2c/busses/i2c-davinci.c +++ b/drivers/i2c/busses/i2c-davinci.c @@ -662,7 +662,7 @@ static int davinci_i2c_probe(struct platform_device *pdev) #endif dev->dev = &pdev->dev; dev->irq = irq->start; - dev->pdata = dev_get_platdata(&dev->dev); + dev->pdata = dev_get_platdata(&pdev->dev); platform_set_drvdata(pdev, dev); if (!dev->pdata && pdev->dev.of_node) { diff --git a/drivers/iio/Kconfig b/drivers/iio/Kconfig index cbea3271c1b1..90cf0cda50c4 100644 --- a/drivers/iio/Kconfig +++ b/drivers/iio/Kconfig @@ -4,7 +4,6 @@ menuconfig IIO tristate "Industrial I/O support" - depends on GENERIC_HARDIRQS help The industrial I/O subsystem provides a unified framework for drivers for many different types of embedded sensors using a diff --git a/drivers/infiniband/hw/qib/Kconfig b/drivers/infiniband/hw/qib/Kconfig index d03ca4c1ff25..495be09781b1 100644 --- a/drivers/infiniband/hw/qib/Kconfig +++ b/drivers/infiniband/hw/qib/Kconfig @@ -8,7 +8,7 @@ config INFINIBAND_QIB config INFINIBAND_QIB_DCA bool "QIB DCA support" - depends on INFINIBAND_QIB && DCA && SMP && GENERIC_HARDIRQS && !(INFINIBAND_QIB=y && DCA=m) + depends on INFINIBAND_QIB && DCA && SMP && !(INFINIBAND_QIB=y && DCA=m) default y ---help--- Setting this enables DCA support on some Intel chip sets diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 3f62041222f2..3591855cc5b5 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1,7 +1,7 @@ /******************************************************************************* * This file contains iSCSI extentions for RDMA (iSER) Verbs * - * (c) Copyright 2013 RisingTide Systems LLC. + * (c) Copyright 2013 Datera, Inc. * * Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -39,7 +39,17 @@ static DEFINE_MUTEX(device_list_mutex); static LIST_HEAD(device_list); static struct workqueue_struct *isert_rx_wq; static struct workqueue_struct *isert_comp_wq; -static struct kmem_cache *isert_cmd_cache; + +static void +isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn); +static int +isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + struct isert_rdma_wr *wr); +static void +isert_unreg_rdma_frwr(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn); +static int +isert_reg_rdma_frwr(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + struct isert_rdma_wr *wr); static void isert_qp_event_callback(struct ib_event *e, void *context) @@ -80,14 +90,8 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id) { struct isert_device *device = isert_conn->conn_device; struct ib_qp_init_attr attr; - struct ib_device_attr devattr; int ret, index, min_index = 0; - memset(&devattr, 0, sizeof(struct ib_device_attr)); - ret = isert_query_device(cma_id->device, &devattr); - if (ret) - return ret; - mutex_lock(&device_list_mutex); for (index = 0; index < device->cqs_used; index++) if (device->cq_active_qps[index] < @@ -108,7 +112,7 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id) * FIXME: Use devattr.max_sge - 2 for max_send_sge as * work-around for RDMA_READ.. */ - attr.cap.max_send_sge = devattr.max_sge - 2; + attr.cap.max_send_sge = device->dev_attr.max_sge - 2; isert_conn->max_sge = attr.cap.max_send_sge; attr.cap.max_recv_sge = 1; @@ -210,14 +214,31 @@ isert_create_device_ib_res(struct isert_device *device) { struct ib_device *ib_dev = device->ib_device; struct isert_cq_desc *cq_desc; + struct ib_device_attr *dev_attr; int ret = 0, i, j; + dev_attr = &device->dev_attr; + ret = isert_query_device(ib_dev, dev_attr); + if (ret) + return ret; + + /* asign function handlers */ + if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { + device->use_frwr = 1; + device->reg_rdma_mem = isert_reg_rdma_frwr; + device->unreg_rdma_mem = isert_unreg_rdma_frwr; + } else { + device->use_frwr = 0; + device->reg_rdma_mem = isert_map_rdma; + device->unreg_rdma_mem = isert_unmap_cmd; + } + device->cqs_used = min_t(int, num_online_cpus(), device->ib_device->num_comp_vectors); device->cqs_used = min(ISERT_MAX_CQ, device->cqs_used); - pr_debug("Using %d CQs, device %s supports %d vectors\n", + pr_debug("Using %d CQs, device %s supports %d vectors support FRWR %d\n", device->cqs_used, device->ib_device->name, - device->ib_device->num_comp_vectors); + device->ib_device->num_comp_vectors, device->use_frwr); device->cq_desc = kzalloc(sizeof(struct isert_cq_desc) * device->cqs_used, GFP_KERNEL); if (!device->cq_desc) { @@ -363,6 +384,85 @@ isert_device_find_by_ib_dev(struct rdma_cm_id *cma_id) return device; } +static void +isert_conn_free_frwr_pool(struct isert_conn *isert_conn) +{ + struct fast_reg_descriptor *fr_desc, *tmp; + int i = 0; + + if (list_empty(&isert_conn->conn_frwr_pool)) + return; + + pr_debug("Freeing conn %p frwr pool", isert_conn); + + list_for_each_entry_safe(fr_desc, tmp, + &isert_conn->conn_frwr_pool, list) { + list_del(&fr_desc->list); + ib_free_fast_reg_page_list(fr_desc->data_frpl); + ib_dereg_mr(fr_desc->data_mr); + kfree(fr_desc); + ++i; + } + + if (i < isert_conn->conn_frwr_pool_size) + pr_warn("Pool still has %d regions registered\n", + isert_conn->conn_frwr_pool_size - i); +} + +static int +isert_conn_create_frwr_pool(struct isert_conn *isert_conn) +{ + struct fast_reg_descriptor *fr_desc; + struct isert_device *device = isert_conn->conn_device; + int i, ret; + + INIT_LIST_HEAD(&isert_conn->conn_frwr_pool); + isert_conn->conn_frwr_pool_size = 0; + for (i = 0; i < ISCSI_DEF_XMIT_CMDS_MAX; i++) { + fr_desc = kzalloc(sizeof(*fr_desc), GFP_KERNEL); + if (!fr_desc) { + pr_err("Failed to allocate fast_reg descriptor\n"); + ret = -ENOMEM; + goto err; + } + + fr_desc->data_frpl = + ib_alloc_fast_reg_page_list(device->ib_device, + ISCSI_ISER_SG_TABLESIZE); + if (IS_ERR(fr_desc->data_frpl)) { + pr_err("Failed to allocate fr_pg_list err=%ld\n", + PTR_ERR(fr_desc->data_frpl)); + ret = PTR_ERR(fr_desc->data_frpl); + goto err; + } + + fr_desc->data_mr = ib_alloc_fast_reg_mr(device->dev_pd, + ISCSI_ISER_SG_TABLESIZE); + if (IS_ERR(fr_desc->data_mr)) { + pr_err("Failed to allocate frmr err=%ld\n", + PTR_ERR(fr_desc->data_mr)); + ret = PTR_ERR(fr_desc->data_mr); + ib_free_fast_reg_page_list(fr_desc->data_frpl); + goto err; + } + pr_debug("Create fr_desc %p page_list %p\n", + fr_desc, fr_desc->data_frpl->page_list); + + fr_desc->valid = true; + list_add_tail(&fr_desc->list, &isert_conn->conn_frwr_pool); + isert_conn->conn_frwr_pool_size++; + } + + pr_debug("Creating conn %p frwr pool size=%d", + isert_conn, isert_conn->conn_frwr_pool_size); + + return 0; + +err: + isert_conn_free_frwr_pool(isert_conn); + return ret; +} + static int isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { @@ -389,6 +489,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) kref_init(&isert_conn->conn_kref); kref_get(&isert_conn->conn_kref); mutex_init(&isert_conn->conn_mutex); + spin_lock_init(&isert_conn->conn_lock); cma_id->context = isert_conn; isert_conn->conn_cm_id = cma_id; @@ -446,6 +547,14 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) isert_conn->conn_pd = device->dev_pd; isert_conn->conn_mr = device->dev_mr; + if (device->use_frwr) { + ret = isert_conn_create_frwr_pool(isert_conn); + if (ret) { + pr_err("Conn: %p failed to create frwr_pool\n", isert_conn); + goto out_frwr; + } + } + ret = isert_conn_setup_qp(isert_conn, cma_id); if (ret) goto out_conn_dev; @@ -459,6 +568,9 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) return 0; out_conn_dev: + if (device->use_frwr) + isert_conn_free_frwr_pool(isert_conn); +out_frwr: isert_device_try_release(device); out_rsp_dma_map: ib_dma_unmap_single(ib_dev, isert_conn->login_rsp_dma, @@ -482,6 +594,9 @@ isert_connect_release(struct isert_conn *isert_conn) pr_debug("Entering isert_connect_release(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"); + if (device->use_frwr) + isert_conn_free_frwr_pool(isert_conn); + if (isert_conn->conn_qp) { cq_index = ((struct isert_cq_desc *) isert_conn->conn_qp->recv_cq->cq_context)->cq_index; @@ -869,46 +984,37 @@ isert_rx_login_req(struct iser_rx_desc *rx_desc, int rx_buflen, size, rx_buflen, MAX_KEY_VALUE_PAIRS); memcpy(login->req_buf, &rx_desc->data[0], size); - complete(&isert_conn->conn_login_comp); -} - -static void -isert_release_cmd(struct iscsi_cmd *cmd) -{ - struct isert_cmd *isert_cmd = container_of(cmd, struct isert_cmd, - iscsi_cmd); - - pr_debug("Entering isert_release_cmd %p >>>>>>>>>>>>>>>.\n", isert_cmd); - - kfree(cmd->buf_ptr); - kfree(cmd->tmr_req); - - kmem_cache_free(isert_cmd_cache, isert_cmd); + if (login->first_request) { + complete(&isert_conn->conn_login_comp); + return; + } + schedule_delayed_work(&conn->login_work, 0); } static struct iscsi_cmd -*isert_alloc_cmd(struct iscsi_conn *conn, gfp_t gfp) +*isert_allocate_cmd(struct iscsi_conn *conn, gfp_t gfp) { struct isert_conn *isert_conn = (struct isert_conn *)conn->context; struct isert_cmd *isert_cmd; + struct iscsi_cmd *cmd; - isert_cmd = kmem_cache_zalloc(isert_cmd_cache, gfp); - if (!isert_cmd) { - pr_err("Unable to allocate isert_cmd\n"); + cmd = iscsit_allocate_cmd(conn, gfp); + if (!cmd) { + pr_err("Unable to allocate iscsi_cmd + isert_cmd\n"); return NULL; } + isert_cmd = iscsit_priv_cmd(cmd); isert_cmd->conn = isert_conn; - isert_cmd->iscsi_cmd.release_cmd = &isert_release_cmd; + isert_cmd->iscsi_cmd = cmd; - return &isert_cmd->iscsi_cmd; + return cmd; } static int isert_handle_scsi_cmd(struct isert_conn *isert_conn, - struct isert_cmd *isert_cmd, struct iser_rx_desc *rx_desc, - unsigned char *buf) + struct isert_cmd *isert_cmd, struct iscsi_cmd *cmd, + struct iser_rx_desc *rx_desc, unsigned char *buf) { - struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; struct iscsi_conn *conn = isert_conn->conn; struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)buf; struct scatterlist *sg; @@ -1015,9 +1121,9 @@ isert_handle_iscsi_dataout(struct isert_conn *isert_conn, static int isert_handle_nop_out(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, - struct iser_rx_desc *rx_desc, unsigned char *buf) + struct iscsi_cmd *cmd, struct iser_rx_desc *rx_desc, + unsigned char *buf) { - struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; struct iscsi_conn *conn = isert_conn->conn; struct iscsi_nopout *hdr = (struct iscsi_nopout *)buf; int rc; @@ -1034,9 +1140,9 @@ isert_handle_nop_out(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, static int isert_handle_text_cmd(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, - struct iser_rx_desc *rx_desc, struct iscsi_text *hdr) + struct iscsi_cmd *cmd, struct iser_rx_desc *rx_desc, + struct iscsi_text *hdr) { - struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; struct iscsi_conn *conn = isert_conn->conn; u32 payload_length = ntoh24(hdr->dlength); int rc; @@ -1081,26 +1187,26 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, switch (opcode) { case ISCSI_OP_SCSI_CMD: - cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); + cmd = isert_allocate_cmd(conn, GFP_KERNEL); if (!cmd) break; - isert_cmd = container_of(cmd, struct isert_cmd, iscsi_cmd); + isert_cmd = iscsit_priv_cmd(cmd); isert_cmd->read_stag = read_stag; isert_cmd->read_va = read_va; isert_cmd->write_stag = write_stag; isert_cmd->write_va = write_va; - ret = isert_handle_scsi_cmd(isert_conn, isert_cmd, + ret = isert_handle_scsi_cmd(isert_conn, isert_cmd, cmd, rx_desc, (unsigned char *)hdr); break; case ISCSI_OP_NOOP_OUT: - cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); + cmd = isert_allocate_cmd(conn, GFP_KERNEL); if (!cmd) break; - isert_cmd = container_of(cmd, struct isert_cmd, iscsi_cmd); - ret = isert_handle_nop_out(isert_conn, isert_cmd, + isert_cmd = iscsit_priv_cmd(cmd); + ret = isert_handle_nop_out(isert_conn, isert_cmd, cmd, rx_desc, (unsigned char *)hdr); break; case ISCSI_OP_SCSI_DATA_OUT: @@ -1108,7 +1214,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, (unsigned char *)hdr); break; case ISCSI_OP_SCSI_TMFUNC: - cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); + cmd = isert_allocate_cmd(conn, GFP_KERNEL); if (!cmd) break; @@ -1116,7 +1222,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, (unsigned char *)hdr); break; case ISCSI_OP_LOGOUT: - cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); + cmd = isert_allocate_cmd(conn, GFP_KERNEL); if (!cmd) break; @@ -1127,12 +1233,12 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, HZ); break; case ISCSI_OP_TEXT: - cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); + cmd = isert_allocate_cmd(conn, GFP_KERNEL); if (!cmd) break; - isert_cmd = container_of(cmd, struct isert_cmd, iscsi_cmd); - ret = isert_handle_text_cmd(isert_conn, isert_cmd, + isert_cmd = iscsit_priv_cmd(cmd); + ret = isert_handle_text_cmd(isert_conn, isert_cmd, cmd, rx_desc, (struct iscsi_text *)hdr); break; default: @@ -1243,26 +1349,65 @@ isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn) struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; struct ib_device *ib_dev = isert_conn->conn_cm_id->device; - pr_debug("isert_unmap_cmd >>>>>>>>>>>>>>>>>>>>>>>\n"); + pr_debug("isert_unmap_cmd: %p\n", isert_cmd); + if (wr->sge) { + pr_debug("isert_unmap_cmd: %p unmap_sg op\n", isert_cmd); + ib_dma_unmap_sg(ib_dev, wr->sge, wr->num_sge, + (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); + wr->sge = NULL; + } + + if (wr->send_wr) { + pr_debug("isert_unmap_cmd: %p free send_wr\n", isert_cmd); + kfree(wr->send_wr); + wr->send_wr = NULL; + } + + if (wr->ib_sge) { + pr_debug("isert_unmap_cmd: %p free ib_sge\n", isert_cmd); + kfree(wr->ib_sge); + wr->ib_sge = NULL; + } +} + +static void +isert_unreg_rdma_frwr(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn) +{ + struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; + struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + LIST_HEAD(unmap_list); + + pr_debug("unreg_frwr_cmd: %p\n", isert_cmd); + + if (wr->fr_desc) { + pr_debug("unreg_frwr_cmd: %p free fr_desc %p\n", + isert_cmd, wr->fr_desc); + spin_lock_bh(&isert_conn->conn_lock); + list_add_tail(&wr->fr_desc->list, &isert_conn->conn_frwr_pool); + spin_unlock_bh(&isert_conn->conn_lock); + wr->fr_desc = NULL; + } if (wr->sge) { - ib_dma_unmap_sg(ib_dev, wr->sge, wr->num_sge, DMA_TO_DEVICE); + pr_debug("unreg_frwr_cmd: %p unmap_sg op\n", isert_cmd); + ib_dma_unmap_sg(ib_dev, wr->sge, wr->num_sge, + (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); wr->sge = NULL; } - kfree(wr->send_wr); + wr->ib_sge = NULL; wr->send_wr = NULL; - - kfree(isert_cmd->ib_sge); - isert_cmd->ib_sge = NULL; } static void isert_put_cmd(struct isert_cmd *isert_cmd) { - struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; + struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; struct isert_conn *isert_conn = isert_cmd->conn; struct iscsi_conn *conn = isert_conn->conn; + struct isert_device *device = isert_conn->conn_device; pr_debug("Entering isert_put_cmd: %p\n", isert_cmd); @@ -1276,7 +1421,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd) if (cmd->data_direction == DMA_TO_DEVICE) iscsit_stop_dataout_timer(cmd); - isert_unmap_cmd(isert_cmd, isert_conn); + device->unreg_rdma_mem(isert_cmd, isert_conn); transport_generic_free_cmd(&cmd->se_cmd, 0); break; case ISCSI_OP_SCSI_TMFUNC: @@ -1311,7 +1456,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd) * Fall-through */ default: - isert_release_cmd(cmd); + iscsit_release_cmd(cmd); break; } } @@ -1347,27 +1492,16 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc, struct isert_cmd *isert_cmd) { struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; - struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; + struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; struct se_cmd *se_cmd = &cmd->se_cmd; - struct ib_device *ib_dev = isert_cmd->conn->conn_cm_id->device; + struct isert_conn *isert_conn = isert_cmd->conn; + struct isert_device *device = isert_conn->conn_device; iscsit_stop_dataout_timer(cmd); + device->unreg_rdma_mem(isert_cmd, isert_conn); + cmd->write_data_done = wr->cur_rdma_length; - if (wr->sge) { - pr_debug("isert_do_rdma_read_comp: Unmapping wr->sge from t_data_sg\n"); - ib_dma_unmap_sg(ib_dev, wr->sge, wr->num_sge, DMA_TO_DEVICE); - wr->sge = NULL; - } - - if (isert_cmd->ib_sge) { - pr_debug("isert_do_rdma_read_comp: Freeing isert_cmd->ib_sge\n"); - kfree(isert_cmd->ib_sge); - isert_cmd->ib_sge = NULL; - } - - cmd->write_data_done = se_cmd->data_length; - - pr_debug("isert_do_rdma_read_comp, calling target_execute_cmd\n"); + pr_debug("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd); spin_lock_bh(&cmd->istate_lock); cmd->cmd_flags |= ICF_GOT_LAST_DATAOUT; cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT; @@ -1383,7 +1517,7 @@ isert_do_control_comp(struct work_struct *work) struct isert_cmd, comp_work); struct isert_conn *isert_conn = isert_cmd->conn; struct ib_device *ib_dev = isert_conn->conn_cm_id->device; - struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; + struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; switch (cmd->i_state) { case ISTATE_SEND_TASKMGTRSP: @@ -1429,7 +1563,7 @@ isert_response_completion(struct iser_tx_desc *tx_desc, struct isert_conn *isert_conn, struct ib_device *ib_dev) { - struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; + struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; if (cmd->i_state == ISTATE_SEND_TASKMGTRSP || cmd->i_state == ISTATE_SEND_LOGOUTRSP || @@ -1621,8 +1755,7 @@ isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd) static int isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd) { - struct isert_cmd *isert_cmd = container_of(cmd, - struct isert_cmd, iscsi_cmd); + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = (struct isert_conn *)conn->context; struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr; struct iscsi_scsi_rsp *hdr = (struct iscsi_scsi_rsp *) @@ -1671,8 +1804,7 @@ static int isert_put_nopin(struct iscsi_cmd *cmd, struct iscsi_conn *conn, bool nopout_response) { - struct isert_cmd *isert_cmd = container_of(cmd, - struct isert_cmd, iscsi_cmd); + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = (struct isert_conn *)conn->context; struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr; @@ -1691,8 +1823,7 @@ isert_put_nopin(struct iscsi_cmd *cmd, struct iscsi_conn *conn, static int isert_put_logout_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) { - struct isert_cmd *isert_cmd = container_of(cmd, - struct isert_cmd, iscsi_cmd); + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = (struct isert_conn *)conn->context; struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr; @@ -1710,8 +1841,7 @@ isert_put_logout_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) static int isert_put_tm_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) { - struct isert_cmd *isert_cmd = container_of(cmd, - struct isert_cmd, iscsi_cmd); + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = (struct isert_conn *)conn->context; struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr; @@ -1729,8 +1859,7 @@ isert_put_tm_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) static int isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn) { - struct isert_cmd *isert_cmd = container_of(cmd, - struct isert_cmd, iscsi_cmd); + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = (struct isert_conn *)conn->context; struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr; struct ib_device *ib_dev = isert_conn->conn_cm_id->device; @@ -1762,8 +1891,7 @@ isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn) static int isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) { - struct isert_cmd *isert_cmd = container_of(cmd, - struct isert_cmd, iscsi_cmd); + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = (struct isert_conn *)conn->context; struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr; struct iscsi_text_rsp *hdr = @@ -1805,7 +1933,7 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, struct ib_sge *ib_sge, struct ib_send_wr *send_wr, u32 data_left, u32 offset) { - struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; + struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; struct scatterlist *sg_start, *tmp_sg; struct ib_device *ib_dev = isert_conn->conn_cm_id->device; u32 sg_off, page_off; @@ -1832,8 +1960,8 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, ib_sg_dma_len(ib_dev, tmp_sg) - page_off); ib_sge->lkey = isert_conn->conn_mr->lkey; - pr_debug("RDMA ib_sge: addr: 0x%16llx length: %u\n", - ib_sge->addr, ib_sge->length); + pr_debug("RDMA ib_sge: addr: 0x%16llx length: %u lkey: %08x\n", + ib_sge->addr, ib_sge->length, ib_sge->lkey); page_off = 0; data_left -= ib_sge->length; ib_sge++; @@ -1847,200 +1975,373 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, } static int -isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd) +isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + struct isert_rdma_wr *wr) { struct se_cmd *se_cmd = &cmd->se_cmd; - struct isert_cmd *isert_cmd = container_of(cmd, - struct isert_cmd, iscsi_cmd); - struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = (struct isert_conn *)conn->context; - struct ib_send_wr *wr_failed, *send_wr; struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + struct ib_send_wr *send_wr; struct ib_sge *ib_sge; - struct scatterlist *sg; - u32 offset = 0, data_len, data_left, rdma_write_max; - int rc, ret = 0, count, sg_nents, i, ib_sge_cnt; - - pr_debug("RDMA_WRITE: data_length: %u\n", se_cmd->data_length); + struct scatterlist *sg_start; + u32 sg_off = 0, sg_nents; + u32 offset = 0, data_len, data_left, rdma_write_max, va_offset = 0; + int ret = 0, count, i, ib_sge_cnt; + + if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { + data_left = se_cmd->data_length; + iscsit_increment_maxcmdsn(cmd, conn->sess); + cmd->stat_sn = conn->stat_sn++; + } else { + sg_off = cmd->write_data_done / PAGE_SIZE; + data_left = se_cmd->data_length - cmd->write_data_done; + offset = cmd->write_data_done; + isert_cmd->tx_desc.isert_cmd = isert_cmd; + } - sg = &se_cmd->t_data_sg[0]; - sg_nents = se_cmd->t_data_nents; + sg_start = &cmd->se_cmd.t_data_sg[sg_off]; + sg_nents = se_cmd->t_data_nents - sg_off; - count = ib_dma_map_sg(ib_dev, sg, sg_nents, DMA_TO_DEVICE); + count = ib_dma_map_sg(ib_dev, sg_start, sg_nents, + (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); if (unlikely(!count)) { - pr_err("Unable to map put_datain SGs\n"); + pr_err("Cmd: %p unrable to map SGs\n", isert_cmd); return -EINVAL; } - wr->sge = sg; + wr->sge = sg_start; wr->num_sge = sg_nents; - pr_debug("Mapped IB count: %u sg: %p sg_nents: %u for RDMA_WRITE\n", - count, sg, sg_nents); + wr->cur_rdma_length = data_left; + pr_debug("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n", + isert_cmd, count, sg_start, sg_nents, data_left); ib_sge = kzalloc(sizeof(struct ib_sge) * sg_nents, GFP_KERNEL); if (!ib_sge) { - pr_warn("Unable to allocate datain ib_sge\n"); + pr_warn("Unable to allocate ib_sge\n"); ret = -ENOMEM; goto unmap_sg; } - isert_cmd->ib_sge = ib_sge; - - pr_debug("Allocated ib_sge: %p from t_data_ents: %d for RDMA_WRITE\n", - ib_sge, se_cmd->t_data_nents); + wr->ib_sge = ib_sge; wr->send_wr_num = DIV_ROUND_UP(sg_nents, isert_conn->max_sge); wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num, GFP_KERNEL); if (!wr->send_wr) { - pr_err("Unable to allocate wr->send_wr\n"); + pr_debug("Unable to allocate wr->send_wr\n"); ret = -ENOMEM; goto unmap_sg; } - pr_debug("Allocated wr->send_wr: %p wr->send_wr_num: %u\n", - wr->send_wr, wr->send_wr_num); - - iscsit_increment_maxcmdsn(cmd, conn->sess); - cmd->stat_sn = conn->stat_sn++; wr->isert_cmd = isert_cmd; rdma_write_max = isert_conn->max_sge * PAGE_SIZE; - data_left = se_cmd->data_length; for (i = 0; i < wr->send_wr_num; i++) { send_wr = &isert_cmd->rdma_wr.send_wr[i]; data_len = min(data_left, rdma_write_max); - send_wr->opcode = IB_WR_RDMA_WRITE; send_wr->send_flags = 0; - send_wr->wr.rdma.remote_addr = isert_cmd->read_va + offset; - send_wr->wr.rdma.rkey = isert_cmd->read_stag; + if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { + send_wr->opcode = IB_WR_RDMA_WRITE; + send_wr->wr.rdma.remote_addr = isert_cmd->read_va + offset; + send_wr->wr.rdma.rkey = isert_cmd->read_stag; + if (i + 1 == wr->send_wr_num) + send_wr->next = &isert_cmd->tx_desc.send_wr; + else + send_wr->next = &wr->send_wr[i + 1]; + } else { + send_wr->opcode = IB_WR_RDMA_READ; + send_wr->wr.rdma.remote_addr = isert_cmd->write_va + va_offset; + send_wr->wr.rdma.rkey = isert_cmd->write_stag; + if (i + 1 == wr->send_wr_num) + send_wr->send_flags = IB_SEND_SIGNALED; + else + send_wr->next = &wr->send_wr[i + 1]; + } ib_sge_cnt = isert_build_rdma_wr(isert_conn, isert_cmd, ib_sge, send_wr, data_len, offset); ib_sge += ib_sge_cnt; - if (i + 1 == wr->send_wr_num) - send_wr->next = &isert_cmd->tx_desc.send_wr; - else - send_wr->next = &wr->send_wr[i + 1]; - offset += data_len; + va_offset += data_len; data_left -= data_len; } - /* - * Build isert_conn->tx_desc for iSCSI response PDU and attach - */ - isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc); - iscsit_build_rsp_pdu(cmd, conn, false, (struct iscsi_scsi_rsp *) - &isert_cmd->tx_desc.iscsi_header); - isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); - isert_init_send_wr(isert_cmd, &isert_cmd->tx_desc.send_wr); - atomic_inc(&isert_conn->post_send_buf_count); + return 0; +unmap_sg: + ib_dma_unmap_sg(ib_dev, sg_start, sg_nents, + (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); + return ret; +} - rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed); - if (rc) { - pr_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n"); - atomic_dec(&isert_conn->post_send_buf_count); +static int +isert_map_fr_pagelist(struct ib_device *ib_dev, + struct scatterlist *sg_start, int sg_nents, u64 *fr_pl) +{ + u64 start_addr, end_addr, page, chunk_start = 0; + struct scatterlist *tmp_sg; + int i = 0, new_chunk, last_ent, n_pages; + + n_pages = 0; + new_chunk = 1; + last_ent = sg_nents - 1; + for_each_sg(sg_start, tmp_sg, sg_nents, i) { + start_addr = ib_sg_dma_address(ib_dev, tmp_sg); + if (new_chunk) + chunk_start = start_addr; + end_addr = start_addr + ib_sg_dma_len(ib_dev, tmp_sg); + + pr_debug("SGL[%d] dma_addr: 0x%16llx len: %u\n", + i, (unsigned long long)tmp_sg->dma_address, + tmp_sg->length); + + if ((end_addr & ~PAGE_MASK) && i < last_ent) { + new_chunk = 0; + continue; + } + new_chunk = 1; + + page = chunk_start & PAGE_MASK; + do { + fr_pl[n_pages++] = page; + pr_debug("Mapped page_list[%d] page_addr: 0x%16llx\n", + n_pages - 1, page); + page += PAGE_SIZE; + } while (page < end_addr); } - pr_debug("Posted RDMA_WRITE + Response for iSER Data READ\n"); - return 1; -unmap_sg: - ib_dma_unmap_sg(ib_dev, sg, sg_nents, DMA_TO_DEVICE); + return n_pages; +} + +static int +isert_fast_reg_mr(struct fast_reg_descriptor *fr_desc, + struct isert_cmd *isert_cmd, struct isert_conn *isert_conn, + struct ib_sge *ib_sge, u32 offset, unsigned int data_len) +{ + struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; + struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + struct scatterlist *sg_start; + u32 sg_off, page_off; + struct ib_send_wr fr_wr, inv_wr; + struct ib_send_wr *bad_wr, *wr = NULL; + u8 key; + int ret, sg_nents, pagelist_len; + + sg_off = offset / PAGE_SIZE; + sg_start = &cmd->se_cmd.t_data_sg[sg_off]; + sg_nents = min_t(unsigned int, cmd->se_cmd.t_data_nents - sg_off, + ISCSI_ISER_SG_TABLESIZE); + page_off = offset % PAGE_SIZE; + + pr_debug("Cmd: %p use fr_desc %p sg_nents %d sg_off %d offset %u\n", + isert_cmd, fr_desc, sg_nents, sg_off, offset); + + pagelist_len = isert_map_fr_pagelist(ib_dev, sg_start, sg_nents, + &fr_desc->data_frpl->page_list[0]); + + if (!fr_desc->valid) { + memset(&inv_wr, 0, sizeof(inv_wr)); + inv_wr.opcode = IB_WR_LOCAL_INV; + inv_wr.ex.invalidate_rkey = fr_desc->data_mr->rkey; + wr = &inv_wr; + /* Bump the key */ + key = (u8)(fr_desc->data_mr->rkey & 0x000000FF); + ib_update_fast_reg_key(fr_desc->data_mr, ++key); + } + + /* Prepare FASTREG WR */ + memset(&fr_wr, 0, sizeof(fr_wr)); + fr_wr.opcode = IB_WR_FAST_REG_MR; + fr_wr.wr.fast_reg.iova_start = + fr_desc->data_frpl->page_list[0] + page_off; + fr_wr.wr.fast_reg.page_list = fr_desc->data_frpl; + fr_wr.wr.fast_reg.page_list_len = pagelist_len; + fr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; + fr_wr.wr.fast_reg.length = data_len; + fr_wr.wr.fast_reg.rkey = fr_desc->data_mr->rkey; + fr_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE; + + if (!wr) + wr = &fr_wr; + else + wr->next = &fr_wr; + + ret = ib_post_send(isert_conn->conn_qp, wr, &bad_wr); + if (ret) { + pr_err("fast registration failed, ret:%d\n", ret); + return ret; + } + fr_desc->valid = false; + + ib_sge->lkey = fr_desc->data_mr->lkey; + ib_sge->addr = fr_desc->data_frpl->page_list[0] + page_off; + ib_sge->length = data_len; + + pr_debug("RDMA ib_sge: addr: 0x%16llx length: %u lkey: %08x\n", + ib_sge->addr, ib_sge->length, ib_sge->lkey); + return ret; } static int -isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery) +isert_reg_rdma_frwr(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + struct isert_rdma_wr *wr) { struct se_cmd *se_cmd = &cmd->se_cmd; - struct isert_cmd *isert_cmd = container_of(cmd, - struct isert_cmd, iscsi_cmd); - struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = (struct isert_conn *)conn->context; - struct ib_send_wr *wr_failed, *send_wr; - struct ib_sge *ib_sge; struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + struct ib_send_wr *send_wr; + struct ib_sge *ib_sge; struct scatterlist *sg_start; - u32 sg_off, sg_nents, page_off, va_offset = 0; + struct fast_reg_descriptor *fr_desc; + u32 sg_off = 0, sg_nents; u32 offset = 0, data_len, data_left, rdma_write_max; - int rc, ret = 0, count, i, ib_sge_cnt; + int ret = 0, count; + unsigned long flags; - pr_debug("RDMA_READ: data_length: %u write_data_done: %u\n", - se_cmd->data_length, cmd->write_data_done); + if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { + data_left = se_cmd->data_length; + iscsit_increment_maxcmdsn(cmd, conn->sess); + cmd->stat_sn = conn->stat_sn++; + } else { + sg_off = cmd->write_data_done / PAGE_SIZE; + data_left = se_cmd->data_length - cmd->write_data_done; + offset = cmd->write_data_done; + isert_cmd->tx_desc.isert_cmd = isert_cmd; + } - sg_off = cmd->write_data_done / PAGE_SIZE; sg_start = &cmd->se_cmd.t_data_sg[sg_off]; - page_off = cmd->write_data_done % PAGE_SIZE; - - pr_debug("RDMA_READ: sg_off: %d, sg_start: %p page_off: %d\n", - sg_off, sg_start, page_off); - - data_left = se_cmd->data_length - cmd->write_data_done; sg_nents = se_cmd->t_data_nents - sg_off; - pr_debug("RDMA_READ: data_left: %d, sg_nents: %d\n", - data_left, sg_nents); - - count = ib_dma_map_sg(ib_dev, sg_start, sg_nents, DMA_FROM_DEVICE); + count = ib_dma_map_sg(ib_dev, sg_start, sg_nents, + (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); if (unlikely(!count)) { - pr_err("Unable to map get_dataout SGs\n"); + pr_err("Cmd: %p unrable to map SGs\n", isert_cmd); return -EINVAL; } wr->sge = sg_start; wr->num_sge = sg_nents; - pr_debug("Mapped IB count: %u sg_start: %p sg_nents: %u for RDMA_READ\n", - count, sg_start, sg_nents); + pr_debug("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n", + isert_cmd, count, sg_start, sg_nents, data_left); - ib_sge = kzalloc(sizeof(struct ib_sge) * sg_nents, GFP_KERNEL); - if (!ib_sge) { - pr_warn("Unable to allocate dataout ib_sge\n"); - ret = -ENOMEM; - goto unmap_sg; + memset(&wr->s_ib_sge, 0, sizeof(*ib_sge)); + ib_sge = &wr->s_ib_sge; + wr->ib_sge = ib_sge; + + wr->send_wr_num = 1; + memset(&wr->s_send_wr, 0, sizeof(*send_wr)); + wr->send_wr = &wr->s_send_wr; + + wr->isert_cmd = isert_cmd; + rdma_write_max = ISCSI_ISER_SG_TABLESIZE * PAGE_SIZE; + + send_wr = &isert_cmd->rdma_wr.s_send_wr; + send_wr->sg_list = ib_sge; + send_wr->num_sge = 1; + send_wr->wr_id = (unsigned long)&isert_cmd->tx_desc; + if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { + send_wr->opcode = IB_WR_RDMA_WRITE; + send_wr->wr.rdma.remote_addr = isert_cmd->read_va; + send_wr->wr.rdma.rkey = isert_cmd->read_stag; + send_wr->send_flags = 0; + send_wr->next = &isert_cmd->tx_desc.send_wr; + } else { + send_wr->opcode = IB_WR_RDMA_READ; + send_wr->wr.rdma.remote_addr = isert_cmd->write_va; + send_wr->wr.rdma.rkey = isert_cmd->write_stag; + send_wr->send_flags = IB_SEND_SIGNALED; } - isert_cmd->ib_sge = ib_sge; - pr_debug("Using ib_sge: %p from sg_ents: %d for RDMA_READ\n", - ib_sge, sg_nents); + data_len = min(data_left, rdma_write_max); + wr->cur_rdma_length = data_len; - wr->send_wr_num = DIV_ROUND_UP(sg_nents, isert_conn->max_sge); - wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num, - GFP_KERNEL); - if (!wr->send_wr) { - pr_debug("Unable to allocate wr->send_wr\n"); - ret = -ENOMEM; + spin_lock_irqsave(&isert_conn->conn_lock, flags); + fr_desc = list_first_entry(&isert_conn->conn_frwr_pool, + struct fast_reg_descriptor, list); + list_del(&fr_desc->list); + spin_unlock_irqrestore(&isert_conn->conn_lock, flags); + wr->fr_desc = fr_desc; + + ret = isert_fast_reg_mr(fr_desc, isert_cmd, isert_conn, + ib_sge, offset, data_len); + if (ret) { + list_add_tail(&fr_desc->list, &isert_conn->conn_frwr_pool); goto unmap_sg; } - pr_debug("Allocated wr->send_wr: %p wr->send_wr_num: %u\n", - wr->send_wr, wr->send_wr_num); - isert_cmd->tx_desc.isert_cmd = isert_cmd; + return 0; - wr->iser_ib_op = ISER_IB_RDMA_READ; - wr->isert_cmd = isert_cmd; - rdma_write_max = isert_conn->max_sge * PAGE_SIZE; - offset = cmd->write_data_done; +unmap_sg: + ib_dma_unmap_sg(ib_dev, sg_start, sg_nents, + (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); + return ret; +} - for (i = 0; i < wr->send_wr_num; i++) { - send_wr = &isert_cmd->rdma_wr.send_wr[i]; - data_len = min(data_left, rdma_write_max); +static int +isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd) +{ + struct se_cmd *se_cmd = &cmd->se_cmd; + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); + struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; + struct isert_conn *isert_conn = (struct isert_conn *)conn->context; + struct isert_device *device = isert_conn->conn_device; + struct ib_send_wr *wr_failed; + int rc; - send_wr->opcode = IB_WR_RDMA_READ; - send_wr->wr.rdma.remote_addr = isert_cmd->write_va + va_offset; - send_wr->wr.rdma.rkey = isert_cmd->write_stag; + pr_debug("Cmd: %p RDMA_WRITE data_length: %u\n", + isert_cmd, se_cmd->data_length); + wr->iser_ib_op = ISER_IB_RDMA_WRITE; + rc = device->reg_rdma_mem(conn, cmd, wr); + if (rc) { + pr_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd); + return rc; + } - ib_sge_cnt = isert_build_rdma_wr(isert_conn, isert_cmd, ib_sge, - send_wr, data_len, offset); - ib_sge += ib_sge_cnt; + /* + * Build isert_conn->tx_desc for iSCSI response PDU and attach + */ + isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc); + iscsit_build_rsp_pdu(cmd, conn, false, (struct iscsi_scsi_rsp *) + &isert_cmd->tx_desc.iscsi_header); + isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); + isert_init_send_wr(isert_cmd, &isert_cmd->tx_desc.send_wr); - if (i + 1 == wr->send_wr_num) - send_wr->send_flags = IB_SEND_SIGNALED; - else - send_wr->next = &wr->send_wr[i + 1]; + atomic_inc(&isert_conn->post_send_buf_count); - offset += data_len; - va_offset += data_len; - data_left -= data_len; + rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed); + if (rc) { + pr_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n"); + atomic_dec(&isert_conn->post_send_buf_count); + } + pr_debug("Cmd: %p posted RDMA_WRITE + Response for iSER Data READ\n", + isert_cmd); + + return 1; +} + +static int +isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery) +{ + struct se_cmd *se_cmd = &cmd->se_cmd; + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); + struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; + struct isert_conn *isert_conn = (struct isert_conn *)conn->context; + struct isert_device *device = isert_conn->conn_device; + struct ib_send_wr *wr_failed; + int rc; + + pr_debug("Cmd: %p RDMA_READ data_length: %u write_data_done: %u\n", + isert_cmd, se_cmd->data_length, cmd->write_data_done); + wr->iser_ib_op = ISER_IB_RDMA_READ; + rc = device->reg_rdma_mem(conn, cmd, wr); + if (rc) { + pr_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd); + return rc; } atomic_inc(&isert_conn->post_send_buf_count); @@ -2050,12 +2351,10 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery) pr_warn("ib_post_send() failed for IB_WR_RDMA_READ\n"); atomic_dec(&isert_conn->post_send_buf_count); } - pr_debug("Posted RDMA_READ memory for ISER Data WRITE\n"); - return 0; + pr_debug("Cmd: %p posted RDMA_READ memory for ISER Data WRITE\n", + isert_cmd); -unmap_sg: - ib_dma_unmap_sg(ib_dev, sg_start, sg_nents, DMA_FROM_DEVICE); - return ret; + return 0; } static int @@ -2224,6 +2523,14 @@ isert_get_login_rx(struct iscsi_conn *conn, struct iscsi_login *login) int ret; pr_debug("isert_get_login_rx before conn_login_comp conn: %p\n", conn); + /* + * For login requests after the first PDU, isert_rx_login_req() will + * kick schedule_delayed_work(&conn->login_work) as the packet is + * received, which turns this callback from iscsi_target_do_login_rx() + * into a NOP. + */ + if (!login->first_request) + return 0; ret = wait_for_completion_interruptible(&isert_conn->conn_login_comp); if (ret) @@ -2393,12 +2700,12 @@ static void isert_free_conn(struct iscsi_conn *conn) static struct iscsit_transport iser_target_transport = { .name = "IB/iSER", .transport_type = ISCSI_INFINIBAND, + .priv_size = sizeof(struct isert_cmd), .owner = THIS_MODULE, .iscsit_setup_np = isert_setup_np, .iscsit_accept_np = isert_accept_np, .iscsit_free_np = isert_free_np, .iscsit_free_conn = isert_free_conn, - .iscsit_alloc_cmd = isert_alloc_cmd, .iscsit_get_login_rx = isert_get_login_rx, .iscsit_put_login_tx = isert_put_login_tx, .iscsit_immediate_queue = isert_immediate_queue, @@ -2425,21 +2732,10 @@ static int __init isert_init(void) goto destroy_rx_wq; } - isert_cmd_cache = kmem_cache_create("isert_cmd_cache", - sizeof(struct isert_cmd), __alignof__(struct isert_cmd), - 0, NULL); - if (!isert_cmd_cache) { - pr_err("Unable to create isert_cmd_cache\n"); - ret = -ENOMEM; - goto destroy_tx_cq; - } - iscsit_register_transport(&iser_target_transport); pr_debug("iSER_TARGET[0] - Loaded iser_target_transport\n"); return 0; -destroy_tx_cq: - destroy_workqueue(isert_comp_wq); destroy_rx_wq: destroy_workqueue(isert_rx_wq); return ret; @@ -2447,7 +2743,6 @@ destroy_rx_wq: static void __exit isert_exit(void) { - kmem_cache_destroy(isert_cmd_cache); destroy_workqueue(isert_comp_wq); destroy_workqueue(isert_rx_wq); iscsit_unregister_transport(&iser_target_transport); diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 191117b5b508..631f2090f0b8 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -5,6 +5,7 @@ #include <rdma/rdma_cm.h> #define ISERT_RDMA_LISTEN_BACKLOG 10 +#define ISCSI_ISER_SG_TABLESIZE 256 enum isert_desc_type { ISCSI_TX_CONTROL, @@ -45,15 +46,26 @@ struct iser_tx_desc { struct ib_send_wr send_wr; } __packed; +struct fast_reg_descriptor { + struct list_head list; + struct ib_mr *data_mr; + struct ib_fast_reg_page_list *data_frpl; + bool valid; +}; + struct isert_rdma_wr { struct list_head wr_list; struct isert_cmd *isert_cmd; enum iser_ib_op_code iser_ib_op; struct ib_sge *ib_sge; + struct ib_sge s_ib_sge; int num_sge; struct scatterlist *sge; int send_wr_num; struct ib_send_wr *send_wr; + struct ib_send_wr s_send_wr; + u32 cur_rdma_length; + struct fast_reg_descriptor *fr_desc; }; struct isert_cmd { @@ -67,8 +79,7 @@ struct isert_cmd { u32 write_va_off; u32 rdma_wr_num; struct isert_conn *conn; - struct iscsi_cmd iscsi_cmd; - struct ib_sge *ib_sge; + struct iscsi_cmd *iscsi_cmd; struct iser_tx_desc tx_desc; struct isert_rdma_wr rdma_wr; struct work_struct comp_work; @@ -106,6 +117,10 @@ struct isert_conn { wait_queue_head_t conn_wait; wait_queue_head_t conn_wait_comp_err; struct kref conn_kref; + struct list_head conn_frwr_pool; + int conn_frwr_pool_size; + /* lock to protect frwr_pool */ + spinlock_t conn_lock; }; #define ISERT_MAX_CQ 64 @@ -118,6 +133,7 @@ struct isert_cq_desc { }; struct isert_device { + int use_frwr; int cqs_used; int refcount; int cq_active_qps[ISERT_MAX_CQ]; @@ -128,6 +144,12 @@ struct isert_device { struct ib_cq *dev_tx_cq[ISERT_MAX_CQ]; struct isert_cq_desc *cq_desc; struct list_head dev_node; + struct ib_device_attr dev_attr; + int (*reg_rdma_mem)(struct iscsi_conn *conn, + struct iscsi_cmd *cmd, + struct isert_rdma_wr *wr); + void (*unreg_rdma_mem)(struct isert_cmd *isert_cmd, + struct isert_conn *isert_conn); }; struct isert_np { diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index d2b34fbbc42e..b6ded17b3be3 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -48,6 +48,7 @@ struct evdev_client { struct evdev *evdev; struct list_head node; int clkid; + bool revoked; unsigned int bufsize; struct input_event buffer[]; }; @@ -164,6 +165,9 @@ static void evdev_pass_values(struct evdev_client *client, struct input_event event; bool wakeup = false; + if (client->revoked) + return; + event.time = ktime_to_timeval(client->clkid == CLOCK_MONOTONIC ? mono : real); @@ -240,7 +244,7 @@ static int evdev_flush(struct file *file, fl_owner_t id) if (retval) return retval; - if (!evdev->exist) + if (!evdev->exist || client->revoked) retval = -ENODEV; else retval = input_flush_device(&evdev->handle, file); @@ -429,7 +433,7 @@ static ssize_t evdev_write(struct file *file, const char __user *buffer, if (retval) return retval; - if (!evdev->exist) { + if (!evdev->exist || client->revoked) { retval = -ENODEV; goto out; } @@ -482,7 +486,7 @@ static ssize_t evdev_read(struct file *file, char __user *buffer, return -EINVAL; for (;;) { - if (!evdev->exist) + if (!evdev->exist || client->revoked) return -ENODEV; if (client->packet_head == client->tail && @@ -511,7 +515,7 @@ static ssize_t evdev_read(struct file *file, char __user *buffer, if (!(file->f_flags & O_NONBLOCK)) { error = wait_event_interruptible(evdev->wait, client->packet_head != client->tail || - !evdev->exist); + !evdev->exist || client->revoked); if (error) return error; } @@ -529,7 +533,11 @@ static unsigned int evdev_poll(struct file *file, poll_table *wait) poll_wait(file, &evdev->wait, wait); - mask = evdev->exist ? POLLOUT | POLLWRNORM : POLLHUP | POLLERR; + if (evdev->exist && !client->revoked) + mask = POLLOUT | POLLWRNORM; + else + mask = POLLHUP | POLLERR; + if (client->packet_head != client->tail) mask |= POLLIN | POLLRDNORM; @@ -795,6 +803,17 @@ static int evdev_handle_mt_request(struct input_dev *dev, return 0; } +static int evdev_revoke(struct evdev *evdev, struct evdev_client *client, + struct file *file) +{ + client->revoked = true; + evdev_ungrab(evdev, client); + input_flush_device(&evdev->handle, file); + wake_up_interruptible(&evdev->wait); + + return 0; +} + static long evdev_do_ioctl(struct file *file, unsigned int cmd, void __user *p, int compat_mode) { @@ -857,6 +876,12 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd, else return evdev_ungrab(evdev, client); + case EVIOCREVOKE: + if (p) + return -EINVAL; + else + return evdev_revoke(evdev, client, file); + case EVIOCSCLOCKID: if (copy_from_user(&i, p, sizeof(unsigned int))) return -EFAULT; @@ -1002,7 +1027,7 @@ static long evdev_ioctl_handler(struct file *file, unsigned int cmd, if (retval) return retval; - if (!evdev->exist) { + if (!evdev->exist || client->revoked) { retval = -ENODEV; goto out; } diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index 269d4c3658cb..c1edd39bc5ba 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -224,7 +224,7 @@ config KEYBOARD_TCA6416 config KEYBOARD_TCA8418 tristate "TCA8418 Keypad Support" - depends on I2C && GENERIC_HARDIRQS + depends on I2C select INPUT_MATRIXKMAP help This driver implements basic keypad functionality @@ -303,7 +303,7 @@ config KEYBOARD_HP7XX config KEYBOARD_LM8323 tristate "LM8323 keypad chip" - depends on I2C && GENERIC_HARDIRQS + depends on I2C depends on LEDS_CLASS help If you say yes here you get support for the National Semiconductor diff --git a/drivers/input/serio/Kconfig b/drivers/input/serio/Kconfig index 1e691a3a79cb..33b3e88fe4a2 100644 --- a/drivers/input/serio/Kconfig +++ b/drivers/input/serio/Kconfig @@ -239,7 +239,6 @@ config SERIO_PS2MULT config SERIO_ARC_PS2 tristate "ARC PS/2 support" - depends on GENERIC_HARDIRQS help Say Y here if you have an ARC FPGA platform with a PS/2 controller in it. diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index 3b9758b5f4d7..e09ec67957a3 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -389,7 +389,7 @@ config TOUCHSCREEN_MCS5000 config TOUCHSCREEN_MMS114 tristate "MELFAS MMS114 touchscreen" - depends on I2C && GENERIC_HARDIRQS + depends on I2C help Say Y here if you have the MELFAS MMS114 touchscreen controller chip in your system. @@ -845,7 +845,7 @@ config TOUCHSCREEN_TSC_SERIO config TOUCHSCREEN_TSC2005 tristate "TSC2005 based touchscreens" - depends on SPI_MASTER && GENERIC_HARDIRQS + depends on SPI_MASTER help Say Y here if you have a TSC2005 based touchscreen. diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 820d85c4a4a0..fe302e33f72e 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -17,6 +17,16 @@ config OF_IOMMU def_bool y depends on OF +config FSL_PAMU + bool "Freescale IOMMU support" + depends on PPC_E500MC + select IOMMU_API + select GENERIC_ALLOCATOR + help + Freescale PAMU support. PAMU is the IOMMU present on Freescale QorIQ platforms. + PAMU can authorize memory access, remap the memory address, and remap I/O + transaction types. + # MSM IOMMU support config MSM_IOMMU bool "MSM IOMMU Support" diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index bbe7041212dd..14c1f474cf11 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -16,3 +16,4 @@ obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o obj-$(CONFIG_SHMOBILE_IOMMU) += shmobile-iommu.o obj-$(CONFIG_SHMOBILE_IPMMU) += shmobile-ipmmu.o +obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 6dc659426a51..72531f008a5e 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -456,8 +456,10 @@ static int iommu_init_device(struct device *dev) } ret = init_iommu_group(dev); - if (ret) + if (ret) { + free_dev_data(dev_data); return ret; + } if (pci_iommuv2_capable(pdev)) { struct amd_iommu *iommu; diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 7acbf351e9af..8f798be6e398 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -1384,7 +1384,7 @@ static int iommu_init_msi(struct amd_iommu *iommu) if (iommu->int_enabled) goto enable_faults; - if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI)) + if (iommu->dev->msi_cap) ret = iommu_setup_msi(iommu); else ret = -ENODEV; diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index ebd0a4cff049..f417e89e1e7e 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -56,9 +56,6 @@ /* Maximum number of mapping groups per SMMU */ #define ARM_SMMU_MAX_SMRS 128 -/* Number of VMIDs per SMMU */ -#define ARM_SMMU_NUM_VMIDS 256 - /* SMMU global address space */ #define ARM_SMMU_GR0(smmu) ((smmu)->base) #define ARM_SMMU_GR1(smmu) ((smmu)->base + (smmu)->pagesize) @@ -87,6 +84,7 @@ #define ARM_SMMU_PTE_AP_UNPRIV (((pteval_t)1) << 6) #define ARM_SMMU_PTE_AP_RDONLY (((pteval_t)2) << 6) #define ARM_SMMU_PTE_ATTRINDX_SHIFT 2 +#define ARM_SMMU_PTE_nG (((pteval_t)1) << 11) /* Stage-2 PTE */ #define ARM_SMMU_PTE_HAP_FAULT (((pteval_t)0) << 6) @@ -223,6 +221,7 @@ #define ARM_SMMU_CB_FAR_LO 0x60 #define ARM_SMMU_CB_FAR_HI 0x64 #define ARM_SMMU_CB_FSYNR0 0x68 +#define ARM_SMMU_CB_S1_TLBIASID 0x610 #define SCTLR_S1_ASIDPNE (1 << 12) #define SCTLR_CFCFG (1 << 7) @@ -282,6 +281,8 @@ #define TTBCR2_ADDR_44 4 #define TTBCR2_ADDR_48 5 +#define TTBRn_HI_ASID_SHIFT 16 + #define MAIR_ATTR_SHIFT(n) ((n) << 3) #define MAIR_ATTR_MASK 0xff #define MAIR_ATTR_DEVICE 0x04 @@ -305,7 +306,7 @@ #define FSR_IGN (FSR_AFF | FSR_ASF | FSR_TLBMCF | \ FSR_TLBLKF) #define FSR_FAULT (FSR_MULTI | FSR_SS | FSR_UUT | \ - FSR_EF | FSR_PF | FSR_TF) + FSR_EF | FSR_PF | FSR_TF | FSR_IGN) #define FSYNR0_WNR (1 << 4) @@ -365,21 +366,21 @@ struct arm_smmu_device { u32 num_context_irqs; unsigned int *irqs; - DECLARE_BITMAP(vmid_map, ARM_SMMU_NUM_VMIDS); - struct list_head list; struct rb_root masters; }; struct arm_smmu_cfg { struct arm_smmu_device *smmu; - u8 vmid; u8 cbndx; u8 irptndx; u32 cbar; pgd_t *pgd; }; +#define ARM_SMMU_CB_ASID(cfg) ((cfg)->cbndx) +#define ARM_SMMU_CB_VMID(cfg) ((cfg)->cbndx + 1) + struct arm_smmu_domain { /* * A domain can span across multiple, chained SMMUs and requires @@ -533,6 +534,25 @@ static void arm_smmu_tlb_sync(struct arm_smmu_device *smmu) } } +static void arm_smmu_tlb_inv_context(struct arm_smmu_cfg *cfg) +{ + struct arm_smmu_device *smmu = cfg->smmu; + void __iomem *base = ARM_SMMU_GR0(smmu); + bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; + + if (stage1) { + base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + writel_relaxed(ARM_SMMU_CB_ASID(cfg), + base + ARM_SMMU_CB_S1_TLBIASID); + } else { + base = ARM_SMMU_GR0(smmu); + writel_relaxed(ARM_SMMU_CB_VMID(cfg), + base + ARM_SMMU_GR0_TLBIVMID); + } + + arm_smmu_tlb_sync(smmu); +} + static irqreturn_t arm_smmu_context_fault(int irq, void *dev) { int flags, ret; @@ -590,6 +610,9 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev) void __iomem *gr0_base = ARM_SMMU_GR0(smmu); gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR); + if (!gfsr) + return IRQ_NONE; + gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0); gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1); gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2); @@ -601,7 +624,7 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev) gfsr, gfsynr0, gfsynr1, gfsynr2); writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR); - return IRQ_NONE; + return IRQ_HANDLED; } static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain) @@ -618,14 +641,15 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain) cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, root_cfg->cbndx); /* CBAR */ - reg = root_cfg->cbar | - (root_cfg->vmid << CBAR_VMID_SHIFT); + reg = root_cfg->cbar; if (smmu->version == 1) reg |= root_cfg->irptndx << CBAR_IRPTNDX_SHIFT; /* Use the weakest memory type, so it is overridden by the pte */ if (stage1) reg |= (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT); + else + reg |= ARM_SMMU_CB_VMID(root_cfg) << CBAR_VMID_SHIFT; writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(root_cfg->cbndx)); if (smmu->version > 1) { @@ -687,15 +711,11 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain) /* TTBR0 */ reg = __pa(root_cfg->pgd); -#ifndef __BIG_ENDIAN writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO); reg = (phys_addr_t)__pa(root_cfg->pgd) >> 32; + if (stage1) + reg |= ARM_SMMU_CB_ASID(root_cfg) << TTBRn_HI_ASID_SHIFT; writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI); -#else - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI); - reg = (phys_addr_t)__pa(root_cfg->pgd) >> 32; - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO); -#endif /* * TTBCR @@ -750,10 +770,6 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain) writel_relaxed(reg, cb_base + ARM_SMMU_CB_S1_MAIR0); } - /* Nuke the TLB */ - writel_relaxed(root_cfg->vmid, gr0_base + ARM_SMMU_GR0_TLBIVMID); - arm_smmu_tlb_sync(smmu); - /* SCTLR */ reg = SCTLR_CFCFG | SCTLR_CFIE | SCTLR_CFRE | SCTLR_M | SCTLR_EAE_SBOP; if (stage1) @@ -790,11 +806,6 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, return -ENODEV; } - ret = __arm_smmu_alloc_bitmap(smmu->vmid_map, 0, ARM_SMMU_NUM_VMIDS); - if (IS_ERR_VALUE(ret)) - return ret; - - root_cfg->vmid = ret; if (smmu->features & ARM_SMMU_FEAT_TRANS_NESTED) { /* * We will likely want to change this if/when KVM gets @@ -813,10 +824,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, ret = __arm_smmu_alloc_bitmap(smmu->context_map, start, smmu->num_context_banks); if (IS_ERR_VALUE(ret)) - goto out_free_vmid; + return ret; root_cfg->cbndx = ret; - if (smmu->version == 1) { root_cfg->irptndx = atomic_inc_return(&smmu->irptndx); root_cfg->irptndx %= smmu->num_context_irqs; @@ -840,8 +850,6 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, out_free_context: __arm_smmu_free_bitmap(smmu->context_map, root_cfg->cbndx); -out_free_vmid: - __arm_smmu_free_bitmap(smmu->vmid_map, root_cfg->vmid); return ret; } @@ -850,17 +858,22 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) struct arm_smmu_domain *smmu_domain = domain->priv; struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg; struct arm_smmu_device *smmu = root_cfg->smmu; + void __iomem *cb_base; int irq; if (!smmu) return; + /* Disable the context bank and nuke the TLB before freeing it. */ + cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, root_cfg->cbndx); + writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR); + arm_smmu_tlb_inv_context(root_cfg); + if (root_cfg->irptndx != -1) { irq = smmu->irqs[smmu->num_global_irqs + root_cfg->irptndx]; free_irq(irq, domain); } - __arm_smmu_free_bitmap(smmu->vmid_map, root_cfg->vmid); __arm_smmu_free_bitmap(smmu->context_map, root_cfg->cbndx); } @@ -959,6 +972,11 @@ static void arm_smmu_free_pgtables(struct arm_smmu_domain *smmu_domain) static void arm_smmu_domain_destroy(struct iommu_domain *domain) { struct arm_smmu_domain *smmu_domain = domain->priv; + + /* + * Free the domain resources. We assume that all devices have + * already been detached. + */ arm_smmu_destroy_domain_context(domain); arm_smmu_free_pgtables(smmu_domain); kfree(smmu_domain); @@ -1199,7 +1217,7 @@ static int arm_smmu_alloc_init_pte(struct arm_smmu_device *smmu, pmd_t *pmd, } if (stage == 1) { - pteval |= ARM_SMMU_PTE_AP_UNPRIV; + pteval |= ARM_SMMU_PTE_AP_UNPRIV | ARM_SMMU_PTE_nG; if (!(flags & IOMMU_WRITE) && (flags & IOMMU_READ)) pteval |= ARM_SMMU_PTE_AP_RDONLY; @@ -1415,13 +1433,9 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, { int ret; struct arm_smmu_domain *smmu_domain = domain->priv; - struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg; - struct arm_smmu_device *smmu = root_cfg->smmu; - void __iomem *gr0_base = ARM_SMMU_GR0(smmu); ret = arm_smmu_handle_mapping(smmu_domain, iova, 0, size, 0); - writel_relaxed(root_cfg->vmid, gr0_base + ARM_SMMU_GR0_TLBIVMID); - arm_smmu_tlb_sync(smmu); + arm_smmu_tlb_inv_context(&smmu_domain->root_cfg); return ret ? ret : size; } @@ -1544,6 +1558,7 @@ static struct iommu_ops arm_smmu_ops = { static void arm_smmu_device_reset(struct arm_smmu_device *smmu) { void __iomem *gr0_base = ARM_SMMU_GR0(smmu); + void __iomem *sctlr_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB_SCTLR; int i = 0; u32 scr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sCR0); @@ -1553,6 +1568,10 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) writel_relaxed(S2CR_TYPE_BYPASS, gr0_base + ARM_SMMU_GR0_S2CR(i)); } + /* Make sure all context banks are disabled */ + for (i = 0; i < smmu->num_context_banks; ++i) + writel_relaxed(0, sctlr_base + ARM_SMMU_CB(smmu, i)); + /* Invalidate the TLB, just in case */ writel_relaxed(0, gr0_base + ARM_SMMU_GR0_STLBIALL); writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH); @@ -1906,7 +1925,7 @@ static int arm_smmu_device_remove(struct platform_device *pdev) of_node_put(master->of_node); } - if (!bitmap_empty(smmu->vmid_map, ARM_SMMU_NUM_VMIDS)) + if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS)) dev_err(dev, "removing device with active domains!\n"); for (i = 0; i < smmu->num_global_irqs; ++i) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 3f32d64ab87a..074018979cdf 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -247,50 +247,6 @@ static void __sysmmu_set_prefbuf(void __iomem *sfrbase, unsigned long base, __raw_writel(size - 1 + base, sfrbase + REG_PB0_EADDR + idx * 8); } -void exynos_sysmmu_set_prefbuf(struct device *dev, - unsigned long base0, unsigned long size0, - unsigned long base1, unsigned long size1) -{ - struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu); - unsigned long flags; - int i; - - BUG_ON((base0 + size0) <= base0); - BUG_ON((size1 > 0) && ((base1 + size1) <= base1)); - - read_lock_irqsave(&data->lock, flags); - if (!is_sysmmu_active(data)) - goto finish; - - for (i = 0; i < data->nsfrs; i++) { - if ((readl(data->sfrbases[i] + REG_MMU_VERSION) >> 28) == 3) { - if (!sysmmu_block(data->sfrbases[i])) - continue; - - if (size1 == 0) { - if (size0 <= SZ_128K) { - base1 = base0; - size1 = size0; - } else { - size1 = size0 - - ALIGN(size0 / 2, SZ_64K); - size0 = size0 - size1; - base1 = base0 + size0; - } - } - - __sysmmu_set_prefbuf( - data->sfrbases[i], base0, size0, 0); - __sysmmu_set_prefbuf( - data->sfrbases[i], base1, size1, 1); - - sysmmu_unblock(data->sfrbases[i]); - } - } -finish: - read_unlock_irqrestore(&data->lock, flags); -} - static void __set_fault_handler(struct sysmmu_drvdata *data, sysmmu_fault_handler_t handler) { diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c new file mode 100644 index 000000000000..cba0498eb011 --- /dev/null +++ b/drivers/iommu/fsl_pamu.c @@ -0,0 +1,1309 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) 2013 Freescale Semiconductor, Inc. + * + */ + +#define pr_fmt(fmt) "fsl-pamu: %s: " fmt, __func__ + +#include <linux/init.h> +#include <linux/iommu.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/device.h> +#include <linux/of_platform.h> +#include <linux/bootmem.h> +#include <linux/genalloc.h> +#include <asm/io.h> +#include <asm/bitops.h> +#include <asm/fsl_guts.h> + +#include "fsl_pamu.h" + +/* define indexes for each operation mapping scenario */ +#define OMI_QMAN 0x00 +#define OMI_FMAN 0x01 +#define OMI_QMAN_PRIV 0x02 +#define OMI_CAAM 0x03 + +#define make64(high, low) (((u64)(high) << 32) | (low)) + +struct pamu_isr_data { + void __iomem *pamu_reg_base; /* Base address of PAMU regs*/ + unsigned int count; /* The number of PAMUs */ +}; + +static struct paace *ppaact; +static struct paace *spaact; +static struct ome *omt; + +/* + * Table for matching compatible strings, for device tree + * guts node, for QorIQ SOCs. + * "fsl,qoriq-device-config-2.0" corresponds to T4 & B4 + * SOCs. For the older SOCs "fsl,qoriq-device-config-1.0" + * string would be used. +*/ +static const struct of_device_id guts_device_ids[] = { + { .compatible = "fsl,qoriq-device-config-1.0", }, + { .compatible = "fsl,qoriq-device-config-2.0", }, + {} +}; + + +/* + * Table for matching compatible strings, for device tree + * L3 cache controller node. + * "fsl,t4240-l3-cache-controller" corresponds to T4, + * "fsl,b4860-l3-cache-controller" corresponds to B4 & + * "fsl,p4080-l3-cache-controller" corresponds to other, + * SOCs. +*/ +static const struct of_device_id l3_device_ids[] = { + { .compatible = "fsl,t4240-l3-cache-controller", }, + { .compatible = "fsl,b4860-l3-cache-controller", }, + { .compatible = "fsl,p4080-l3-cache-controller", }, + {} +}; + +/* maximum subwindows permitted per liodn */ +static u32 max_subwindow_count; + +/* Pool for fspi allocation */ +struct gen_pool *spaace_pool; + +/** + * pamu_get_max_subwin_cnt() - Return the maximum supported + * subwindow count per liodn. + * + */ +u32 pamu_get_max_subwin_cnt() +{ + return max_subwindow_count; +} + +/** + * pamu_get_ppaace() - Return the primary PACCE + * @liodn: liodn PAACT index for desired PAACE + * + * Returns the ppace pointer upon success else return + * null. + */ +static struct paace *pamu_get_ppaace(int liodn) +{ + if (!ppaact || liodn >= PAACE_NUMBER_ENTRIES) { + pr_debug("PPAACT doesn't exist\n"); + return NULL; + } + + return &ppaact[liodn]; +} + +/** + * pamu_enable_liodn() - Set valid bit of PACCE + * @liodn: liodn PAACT index for desired PAACE + * + * Returns 0 upon success else error code < 0 returned + */ +int pamu_enable_liodn(int liodn) +{ + struct paace *ppaace; + + ppaace = pamu_get_ppaace(liodn); + if (!ppaace) { + pr_debug("Invalid primary paace entry\n"); + return -ENOENT; + } + + if (!get_bf(ppaace->addr_bitfields, PPAACE_AF_WSE)) { + pr_debug("liodn %d not configured\n", liodn); + return -EINVAL; + } + + /* Ensure that all other stores to the ppaace complete first */ + mb(); + + set_bf(ppaace->addr_bitfields, PAACE_AF_V, PAACE_V_VALID); + mb(); + + return 0; +} + +/** + * pamu_disable_liodn() - Clears valid bit of PACCE + * @liodn: liodn PAACT index for desired PAACE + * + * Returns 0 upon success else error code < 0 returned + */ +int pamu_disable_liodn(int liodn) +{ + struct paace *ppaace; + + ppaace = pamu_get_ppaace(liodn); + if (!ppaace) { + pr_debug("Invalid primary paace entry\n"); + return -ENOENT; + } + + set_bf(ppaace->addr_bitfields, PAACE_AF_V, PAACE_V_INVALID); + mb(); + + return 0; +} + +/* Derive the window size encoding for a particular PAACE entry */ +static unsigned int map_addrspace_size_to_wse(phys_addr_t addrspace_size) +{ + /* Bug if not a power of 2 */ + BUG_ON(!is_power_of_2(addrspace_size)); + + /* window size is 2^(WSE+1) bytes */ + return __ffs(addrspace_size) - 1; +} + +/* Derive the PAACE window count encoding for the subwindow count */ +static unsigned int map_subwindow_cnt_to_wce(u32 subwindow_cnt) +{ + /* window count is 2^(WCE+1) bytes */ + return __ffs(subwindow_cnt) - 1; +} + +/* + * Set the PAACE type as primary and set the coherency required domain + * attribute + */ +static void pamu_init_ppaace(struct paace *ppaace) +{ + set_bf(ppaace->addr_bitfields, PAACE_AF_PT, PAACE_PT_PRIMARY); + + set_bf(ppaace->domain_attr.to_host.coherency_required, PAACE_DA_HOST_CR, + PAACE_M_COHERENCE_REQ); +} + +/* + * Set the PAACE type as secondary and set the coherency required domain + * attribute. + */ +static void pamu_init_spaace(struct paace *spaace) +{ + set_bf(spaace->addr_bitfields, PAACE_AF_PT, PAACE_PT_SECONDARY); + set_bf(spaace->domain_attr.to_host.coherency_required, PAACE_DA_HOST_CR, + PAACE_M_COHERENCE_REQ); +} + +/* + * Return the spaace (corresponding to the secondary window index) + * for a particular ppaace. + */ +static struct paace *pamu_get_spaace(struct paace *paace, u32 wnum) +{ + u32 subwin_cnt; + struct paace *spaace = NULL; + + subwin_cnt = 1UL << (get_bf(paace->impl_attr, PAACE_IA_WCE) + 1); + + if (wnum < subwin_cnt) + spaace = &spaact[paace->fspi + wnum]; + else + pr_debug("secondary paace out of bounds\n"); + + return spaace; +} + +/** + * pamu_get_fspi_and_allocate() - Allocates fspi index and reserves subwindows + * required for primary PAACE in the secondary + * PAACE table. + * @subwin_cnt: Number of subwindows to be reserved. + * + * A PPAACE entry may have a number of associated subwindows. A subwindow + * corresponds to a SPAACE entry in the SPAACT table. Each PAACE entry stores + * the index (fspi) of the first SPAACE entry in the SPAACT table. This + * function returns the index of the first SPAACE entry. The remaining + * SPAACE entries are reserved contiguously from that index. + * + * Returns a valid fspi index in the range of 0 - SPAACE_NUMBER_ENTRIES on success. + * If no SPAACE entry is available or the allocator can not reserve the required + * number of contiguous entries function returns ULONG_MAX indicating a failure. + * +*/ +static unsigned long pamu_get_fspi_and_allocate(u32 subwin_cnt) +{ + unsigned long spaace_addr; + + spaace_addr = gen_pool_alloc(spaace_pool, subwin_cnt * sizeof(struct paace)); + if (!spaace_addr) + return ULONG_MAX; + + return (spaace_addr - (unsigned long)spaact) / (sizeof(struct paace)); +} + +/* Release the subwindows reserved for a particular LIODN */ +void pamu_free_subwins(int liodn) +{ + struct paace *ppaace; + u32 subwin_cnt, size; + + ppaace = pamu_get_ppaace(liodn); + if (!ppaace) { + pr_debug("Invalid liodn entry\n"); + return; + } + + if (get_bf(ppaace->addr_bitfields, PPAACE_AF_MW)) { + subwin_cnt = 1UL << (get_bf(ppaace->impl_attr, PAACE_IA_WCE) + 1); + size = (subwin_cnt - 1) * sizeof(struct paace); + gen_pool_free(spaace_pool, (unsigned long)&spaact[ppaace->fspi], size); + set_bf(ppaace->addr_bitfields, PPAACE_AF_MW, 0); + } +} + +/* + * Function used for updating stash destination for the coressponding + * LIODN. + */ +int pamu_update_paace_stash(int liodn, u32 subwin, u32 value) +{ + struct paace *paace; + + paace = pamu_get_ppaace(liodn); + if (!paace) { + pr_debug("Invalid liodn entry\n"); + return -ENOENT; + } + if (subwin) { + paace = pamu_get_spaace(paace, subwin - 1); + if (!paace) { + return -ENOENT; + } + } + set_bf(paace->impl_attr, PAACE_IA_CID, value); + + mb(); + + return 0; +} + +/* Disable a subwindow corresponding to the LIODN */ +int pamu_disable_spaace(int liodn, u32 subwin) +{ + struct paace *paace; + + paace = pamu_get_ppaace(liodn); + if (!paace) { + pr_debug("Invalid liodn entry\n"); + return -ENOENT; + } + if (subwin) { + paace = pamu_get_spaace(paace, subwin - 1); + if (!paace) { + return -ENOENT; + } + set_bf(paace->addr_bitfields, PAACE_AF_V, + PAACE_V_INVALID); + } else { + set_bf(paace->addr_bitfields, PAACE_AF_AP, + PAACE_AP_PERMS_DENIED); + } + + mb(); + + return 0; +} + + +/** + * pamu_config_paace() - Sets up PPAACE entry for specified liodn + * + * @liodn: Logical IO device number + * @win_addr: starting address of DSA window + * @win-size: size of DSA window + * @omi: Operation mapping index -- if ~omi == 0 then omi not defined + * @rpn: real (true physical) page number + * @stashid: cache stash id for associated cpu -- if ~stashid == 0 then + * stashid not defined + * @snoopid: snoop id for hardware coherency -- if ~snoopid == 0 then + * snoopid not defined + * @subwin_cnt: number of sub-windows + * @prot: window permissions + * + * Returns 0 upon success else error code < 0 returned + */ +int pamu_config_ppaace(int liodn, phys_addr_t win_addr, phys_addr_t win_size, + u32 omi, unsigned long rpn, u32 snoopid, u32 stashid, + u32 subwin_cnt, int prot) +{ + struct paace *ppaace; + unsigned long fspi; + + if (!is_power_of_2(win_size) || win_size < PAMU_PAGE_SIZE) { + pr_debug("window size too small or not a power of two %llx\n", win_size); + return -EINVAL; + } + + if (win_addr & (win_size - 1)) { + pr_debug("window address is not aligned with window size\n"); + return -EINVAL; + } + + ppaace = pamu_get_ppaace(liodn); + if (!ppaace) { + return -ENOENT; + } + + /* window size is 2^(WSE+1) bytes */ + set_bf(ppaace->addr_bitfields, PPAACE_AF_WSE, + map_addrspace_size_to_wse(win_size)); + + pamu_init_ppaace(ppaace); + + ppaace->wbah = win_addr >> (PAMU_PAGE_SHIFT + 20); + set_bf(ppaace->addr_bitfields, PPAACE_AF_WBAL, + (win_addr >> PAMU_PAGE_SHIFT)); + + /* set up operation mapping if it's configured */ + if (omi < OME_NUMBER_ENTRIES) { + set_bf(ppaace->impl_attr, PAACE_IA_OTM, PAACE_OTM_INDEXED); + ppaace->op_encode.index_ot.omi = omi; + } else if (~omi != 0) { + pr_debug("bad operation mapping index: %d\n", omi); + return -EINVAL; + } + + /* configure stash id */ + if (~stashid != 0) + set_bf(ppaace->impl_attr, PAACE_IA_CID, stashid); + + /* configure snoop id */ + if (~snoopid != 0) + ppaace->domain_attr.to_host.snpid = snoopid; + + if (subwin_cnt) { + /* The first entry is in the primary PAACE instead */ + fspi = pamu_get_fspi_and_allocate(subwin_cnt - 1); + if (fspi == ULONG_MAX) { + pr_debug("spaace indexes exhausted\n"); + return -EINVAL; + } + + /* window count is 2^(WCE+1) bytes */ + set_bf(ppaace->impl_attr, PAACE_IA_WCE, + map_subwindow_cnt_to_wce(subwin_cnt)); + set_bf(ppaace->addr_bitfields, PPAACE_AF_MW, 0x1); + ppaace->fspi = fspi; + } else { + set_bf(ppaace->impl_attr, PAACE_IA_ATM, PAACE_ATM_WINDOW_XLATE); + ppaace->twbah = rpn >> 20; + set_bf(ppaace->win_bitfields, PAACE_WIN_TWBAL, rpn); + set_bf(ppaace->addr_bitfields, PAACE_AF_AP, prot); + set_bf(ppaace->impl_attr, PAACE_IA_WCE, 0); + set_bf(ppaace->addr_bitfields, PPAACE_AF_MW, 0); + } + mb(); + + return 0; +} + +/** + * pamu_config_spaace() - Sets up SPAACE entry for specified subwindow + * + * @liodn: Logical IO device number + * @subwin_cnt: number of sub-windows associated with dma-window + * @subwin: subwindow index + * @subwin_size: size of subwindow + * @omi: Operation mapping index + * @rpn: real (true physical) page number + * @snoopid: snoop id for hardware coherency -- if ~snoopid == 0 then + * snoopid not defined + * @stashid: cache stash id for associated cpu + * @enable: enable/disable subwindow after reconfiguration + * @prot: sub window permissions + * + * Returns 0 upon success else error code < 0 returned + */ +int pamu_config_spaace(int liodn, u32 subwin_cnt, u32 subwin, + phys_addr_t subwin_size, u32 omi, unsigned long rpn, + u32 snoopid, u32 stashid, int enable, int prot) +{ + struct paace *paace; + + + /* setup sub-windows */ + if (!subwin_cnt) { + pr_debug("Invalid subwindow count\n"); + return -EINVAL; + } + + paace = pamu_get_ppaace(liodn); + if (subwin > 0 && subwin < subwin_cnt && paace) { + paace = pamu_get_spaace(paace, subwin - 1); + + if (paace && !(paace->addr_bitfields & PAACE_V_VALID)) { + pamu_init_spaace(paace); + set_bf(paace->addr_bitfields, SPAACE_AF_LIODN, liodn); + } + } + + if (!paace) { + pr_debug("Invalid liodn entry\n"); + return -ENOENT; + } + + if (!is_power_of_2(subwin_size) || subwin_size < PAMU_PAGE_SIZE) { + pr_debug("subwindow size out of range, or not a power of 2\n"); + return -EINVAL; + } + + if (rpn == ULONG_MAX) { + pr_debug("real page number out of range\n"); + return -EINVAL; + } + + /* window size is 2^(WSE+1) bytes */ + set_bf(paace->win_bitfields, PAACE_WIN_SWSE, + map_addrspace_size_to_wse(subwin_size)); + + set_bf(paace->impl_attr, PAACE_IA_ATM, PAACE_ATM_WINDOW_XLATE); + paace->twbah = rpn >> 20; + set_bf(paace->win_bitfields, PAACE_WIN_TWBAL, rpn); + set_bf(paace->addr_bitfields, PAACE_AF_AP, prot); + + /* configure snoop id */ + if (~snoopid != 0) + paace->domain_attr.to_host.snpid = snoopid; + + /* set up operation mapping if it's configured */ + if (omi < OME_NUMBER_ENTRIES) { + set_bf(paace->impl_attr, PAACE_IA_OTM, PAACE_OTM_INDEXED); + paace->op_encode.index_ot.omi = omi; + } else if (~omi != 0) { + pr_debug("bad operation mapping index: %d\n", omi); + return -EINVAL; + } + + if (~stashid != 0) + set_bf(paace->impl_attr, PAACE_IA_CID, stashid); + + smp_wmb(); + + if (enable) + set_bf(paace->addr_bitfields, PAACE_AF_V, PAACE_V_VALID); + + mb(); + + return 0; +} + +/** +* get_ome_index() - Returns the index in the operation mapping table +* for device. +* @*omi_index: pointer for storing the index value +* +*/ +void get_ome_index(u32 *omi_index, struct device *dev) +{ + if (of_device_is_compatible(dev->of_node, "fsl,qman-portal")) + *omi_index = OMI_QMAN; + if (of_device_is_compatible(dev->of_node, "fsl,qman")) + *omi_index = OMI_QMAN_PRIV; +} + +/** + * get_stash_id - Returns stash destination id corresponding to a + * cache type and vcpu. + * @stash_dest_hint: L1, L2 or L3 + * @vcpu: vpcu target for a particular cache type. + * + * Returs stash on success or ~(u32)0 on failure. + * + */ +u32 get_stash_id(u32 stash_dest_hint, u32 vcpu) +{ + const u32 *prop; + struct device_node *node; + u32 cache_level; + int len, found = 0; + int i; + + /* Fastpath, exit early if L3/CPC cache is target for stashing */ + if (stash_dest_hint == PAMU_ATTR_CACHE_L3) { + node = of_find_matching_node(NULL, l3_device_ids); + if (node) { + prop = of_get_property(node, "cache-stash-id", 0); + if (!prop) { + pr_debug("missing cache-stash-id at %s\n", node->full_name); + of_node_put(node); + return ~(u32)0; + } + of_node_put(node); + return be32_to_cpup(prop); + } + return ~(u32)0; + } + + for_each_node_by_type(node, "cpu") { + prop = of_get_property(node, "reg", &len); + for (i = 0; i < len / sizeof(u32); i++) { + if (be32_to_cpup(&prop[i]) == vcpu) { + found = 1; + goto found_cpu_node; + } + } + } +found_cpu_node: + + /* find the hwnode that represents the cache */ + for (cache_level = PAMU_ATTR_CACHE_L1; (cache_level < PAMU_ATTR_CACHE_L3) && found; cache_level++) { + if (stash_dest_hint == cache_level) { + prop = of_get_property(node, "cache-stash-id", 0); + if (!prop) { + pr_debug("missing cache-stash-id at %s\n", node->full_name); + of_node_put(node); + return ~(u32)0; + } + of_node_put(node); + return be32_to_cpup(prop); + } + + prop = of_get_property(node, "next-level-cache", 0); + if (!prop) { + pr_debug("can't find next-level-cache at %s\n", + node->full_name); + of_node_put(node); + return ~(u32)0; /* can't traverse any further */ + } + of_node_put(node); + + /* advance to next node in cache hierarchy */ + node = of_find_node_by_phandle(*prop); + if (!node) { + pr_debug("Invalid node for cache hierarchy %s\n", + node->full_name); + return ~(u32)0; + } + } + + pr_debug("stash dest not found for %d on vcpu %d\n", + stash_dest_hint, vcpu); + return ~(u32)0; +} + +/* Identify if the PAACT table entry belongs to QMAN, BMAN or QMAN Portal */ +#define QMAN_PAACE 1 +#define QMAN_PORTAL_PAACE 2 +#define BMAN_PAACE 3 + +/** + * Setup operation mapping and stash destinations for QMAN and QMAN portal. + * Memory accesses to QMAN and BMAN private memory need not be coherent, so + * clear the PAACE entry coherency attribute for them. + */ +static void setup_qbman_paace(struct paace *ppaace, int paace_type) +{ + switch (paace_type) { + case QMAN_PAACE: + set_bf(ppaace->impl_attr, PAACE_IA_OTM, PAACE_OTM_INDEXED); + ppaace->op_encode.index_ot.omi = OMI_QMAN_PRIV; + /* setup QMAN Private data stashing for the L3 cache */ + set_bf(ppaace->impl_attr, PAACE_IA_CID, get_stash_id(PAMU_ATTR_CACHE_L3, 0)); + set_bf(ppaace->domain_attr.to_host.coherency_required, PAACE_DA_HOST_CR, + 0); + break; + case QMAN_PORTAL_PAACE: + set_bf(ppaace->impl_attr, PAACE_IA_OTM, PAACE_OTM_INDEXED); + ppaace->op_encode.index_ot.omi = OMI_QMAN; + /*Set DQRR and Frame stashing for the L3 cache */ + set_bf(ppaace->impl_attr, PAACE_IA_CID, get_stash_id(PAMU_ATTR_CACHE_L3, 0)); + break; + case BMAN_PAACE: + set_bf(ppaace->domain_attr.to_host.coherency_required, PAACE_DA_HOST_CR, + 0); + break; + } +} + +/** + * Setup the operation mapping table for various devices. This is a static + * table where each table index corresponds to a particular device. PAMU uses + * this table to translate device transaction to appropriate corenet + * transaction. + */ +static void __init setup_omt(struct ome *omt) +{ + struct ome *ome; + + /* Configure OMI_QMAN */ + ome = &omt[OMI_QMAN]; + + ome->moe[IOE_READ_IDX] = EOE_VALID | EOE_READ; + ome->moe[IOE_EREAD0_IDX] = EOE_VALID | EOE_RSA; + ome->moe[IOE_WRITE_IDX] = EOE_VALID | EOE_WRITE; + ome->moe[IOE_EWRITE0_IDX] = EOE_VALID | EOE_WWSAO; + + ome->moe[IOE_DIRECT0_IDX] = EOE_VALID | EOE_LDEC; + ome->moe[IOE_DIRECT1_IDX] = EOE_VALID | EOE_LDECPE; + + /* Configure OMI_FMAN */ + ome = &omt[OMI_FMAN]; + ome->moe[IOE_READ_IDX] = EOE_VALID | EOE_READI; + ome->moe[IOE_WRITE_IDX] = EOE_VALID | EOE_WRITE; + + /* Configure OMI_QMAN private */ + ome = &omt[OMI_QMAN_PRIV]; + ome->moe[IOE_READ_IDX] = EOE_VALID | EOE_READ; + ome->moe[IOE_WRITE_IDX] = EOE_VALID | EOE_WRITE; + ome->moe[IOE_EREAD0_IDX] = EOE_VALID | EOE_RSA; + ome->moe[IOE_EWRITE0_IDX] = EOE_VALID | EOE_WWSA; + + /* Configure OMI_CAAM */ + ome = &omt[OMI_CAAM]; + ome->moe[IOE_READ_IDX] = EOE_VALID | EOE_READI; + ome->moe[IOE_WRITE_IDX] = EOE_VALID | EOE_WRITE; +} + +/* + * Get the maximum number of PAACT table entries + * and subwindows supported by PAMU + */ +static void get_pamu_cap_values(unsigned long pamu_reg_base) +{ + u32 pc_val; + + pc_val = in_be32((u32 *)(pamu_reg_base + PAMU_PC3)); + /* Maximum number of subwindows per liodn */ + max_subwindow_count = 1 << (1 + PAMU_PC3_MWCE(pc_val)); +} + +/* Setup PAMU registers pointing to PAACT, SPAACT and OMT */ +int setup_one_pamu(unsigned long pamu_reg_base, unsigned long pamu_reg_size, + phys_addr_t ppaact_phys, phys_addr_t spaact_phys, + phys_addr_t omt_phys) +{ + u32 *pc; + struct pamu_mmap_regs *pamu_regs; + + pc = (u32 *) (pamu_reg_base + PAMU_PC); + pamu_regs = (struct pamu_mmap_regs *) + (pamu_reg_base + PAMU_MMAP_REGS_BASE); + + /* set up pointers to corenet control blocks */ + + out_be32(&pamu_regs->ppbah, upper_32_bits(ppaact_phys)); + out_be32(&pamu_regs->ppbal, lower_32_bits(ppaact_phys)); + ppaact_phys = ppaact_phys + PAACT_SIZE; + out_be32(&pamu_regs->pplah, upper_32_bits(ppaact_phys)); + out_be32(&pamu_regs->pplal, lower_32_bits(ppaact_phys)); + + out_be32(&pamu_regs->spbah, upper_32_bits(spaact_phys)); + out_be32(&pamu_regs->spbal, lower_32_bits(spaact_phys)); + spaact_phys = spaact_phys + SPAACT_SIZE; + out_be32(&pamu_regs->splah, upper_32_bits(spaact_phys)); + out_be32(&pamu_regs->splal, lower_32_bits(spaact_phys)); + + out_be32(&pamu_regs->obah, upper_32_bits(omt_phys)); + out_be32(&pamu_regs->obal, lower_32_bits(omt_phys)); + omt_phys = omt_phys + OMT_SIZE; + out_be32(&pamu_regs->olah, upper_32_bits(omt_phys)); + out_be32(&pamu_regs->olal, lower_32_bits(omt_phys)); + + /* + * set PAMU enable bit, + * allow ppaact & omt to be cached + * & enable PAMU access violation interrupts. + */ + + out_be32((u32 *)(pamu_reg_base + PAMU_PICS), + PAMU_ACCESS_VIOLATION_ENABLE); + out_be32(pc, PAMU_PC_PE | PAMU_PC_OCE | PAMU_PC_SPCC | PAMU_PC_PPCC); + return 0; +} + +/* Enable all device LIODNS */ +static void __init setup_liodns(void) +{ + int i, len; + struct paace *ppaace; + struct device_node *node = NULL; + const u32 *prop; + + for_each_node_with_property(node, "fsl,liodn") { + prop = of_get_property(node, "fsl,liodn", &len); + for (i = 0; i < len / sizeof(u32); i++) { + int liodn; + + liodn = be32_to_cpup(&prop[i]); + if (liodn >= PAACE_NUMBER_ENTRIES) { + pr_debug("Invalid LIODN value %d\n", liodn); + continue; + } + ppaace = pamu_get_ppaace(liodn); + pamu_init_ppaace(ppaace); + /* window size is 2^(WSE+1) bytes */ + set_bf(ppaace->addr_bitfields, PPAACE_AF_WSE, 35); + ppaace->wbah = 0; + set_bf(ppaace->addr_bitfields, PPAACE_AF_WBAL, 0); + set_bf(ppaace->impl_attr, PAACE_IA_ATM, + PAACE_ATM_NO_XLATE); + set_bf(ppaace->addr_bitfields, PAACE_AF_AP, + PAACE_AP_PERMS_ALL); + if (of_device_is_compatible(node, "fsl,qman-portal")) + setup_qbman_paace(ppaace, QMAN_PORTAL_PAACE); + if (of_device_is_compatible(node, "fsl,qman")) + setup_qbman_paace(ppaace, QMAN_PAACE); + if (of_device_is_compatible(node, "fsl,bman")) + setup_qbman_paace(ppaace, BMAN_PAACE); + mb(); + pamu_enable_liodn(liodn); + } + } +} + +irqreturn_t pamu_av_isr(int irq, void *arg) +{ + struct pamu_isr_data *data = arg; + phys_addr_t phys; + unsigned int i, j, ret; + + pr_emerg("access violation interrupt\n"); + + for (i = 0; i < data->count; i++) { + void __iomem *p = data->pamu_reg_base + i * PAMU_OFFSET; + u32 pics = in_be32(p + PAMU_PICS); + + if (pics & PAMU_ACCESS_VIOLATION_STAT) { + u32 avs1 = in_be32(p + PAMU_AVS1); + struct paace *paace; + + pr_emerg("POES1=%08x\n", in_be32(p + PAMU_POES1)); + pr_emerg("POES2=%08x\n", in_be32(p + PAMU_POES2)); + pr_emerg("AVS1=%08x\n", avs1); + pr_emerg("AVS2=%08x\n", in_be32(p + PAMU_AVS2)); + pr_emerg("AVA=%016llx\n", make64(in_be32(p + PAMU_AVAH), + in_be32(p + PAMU_AVAL))); + pr_emerg("UDAD=%08x\n", in_be32(p + PAMU_UDAD)); + pr_emerg("POEA=%016llx\n", make64(in_be32(p + PAMU_POEAH), + in_be32(p + PAMU_POEAL))); + + phys = make64(in_be32(p + PAMU_POEAH), + in_be32(p + PAMU_POEAL)); + + /* Assume that POEA points to a PAACE */ + if (phys) { + u32 *paace = phys_to_virt(phys); + + /* Only the first four words are relevant */ + for (j = 0; j < 4; j++) + pr_emerg("PAACE[%u]=%08x\n", j, in_be32(paace + j)); + } + + /* clear access violation condition */ + out_be32((p + PAMU_AVS1), avs1 & PAMU_AV_MASK); + paace = pamu_get_ppaace(avs1 >> PAMU_AVS1_LIODN_SHIFT); + BUG_ON(!paace); + /* check if we got a violation for a disabled LIODN */ + if (!get_bf(paace->addr_bitfields, PAACE_AF_V)) { + /* + * As per hardware erratum A-003638, access + * violation can be reported for a disabled + * LIODN. If we hit that condition, disable + * access violation reporting. + */ + pics &= ~PAMU_ACCESS_VIOLATION_ENABLE; + } else { + /* Disable the LIODN */ + ret = pamu_disable_liodn(avs1 >> PAMU_AVS1_LIODN_SHIFT); + BUG_ON(ret); + pr_emerg("Disabling liodn %x\n", avs1 >> PAMU_AVS1_LIODN_SHIFT); + } + out_be32((p + PAMU_PICS), pics); + } + } + + + return IRQ_HANDLED; +} + +#define LAWAR_EN 0x80000000 +#define LAWAR_TARGET_MASK 0x0FF00000 +#define LAWAR_TARGET_SHIFT 20 +#define LAWAR_SIZE_MASK 0x0000003F +#define LAWAR_CSDID_MASK 0x000FF000 +#define LAWAR_CSDID_SHIFT 12 + +#define LAW_SIZE_4K 0xb + +struct ccsr_law { + u32 lawbarh; /* LAWn base address high */ + u32 lawbarl; /* LAWn base address low */ + u32 lawar; /* LAWn attributes */ + u32 reserved; +}; + +/* + * Create a coherence subdomain for a given memory block. + */ +static int __init create_csd(phys_addr_t phys, size_t size, u32 csd_port_id) +{ + struct device_node *np; + const __be32 *iprop; + void __iomem *lac = NULL; /* Local Access Control registers */ + struct ccsr_law __iomem *law; + void __iomem *ccm = NULL; + u32 __iomem *csdids; + unsigned int i, num_laws, num_csds; + u32 law_target = 0; + u32 csd_id = 0; + int ret = 0; + + np = of_find_compatible_node(NULL, NULL, "fsl,corenet-law"); + if (!np) + return -ENODEV; + + iprop = of_get_property(np, "fsl,num-laws", NULL); + if (!iprop) { + ret = -ENODEV; + goto error; + } + + num_laws = be32_to_cpup(iprop); + if (!num_laws) { + ret = -ENODEV; + goto error; + } + + lac = of_iomap(np, 0); + if (!lac) { + ret = -ENODEV; + goto error; + } + + /* LAW registers are at offset 0xC00 */ + law = lac + 0xC00; + + of_node_put(np); + + np = of_find_compatible_node(NULL, NULL, "fsl,corenet-cf"); + if (!np) { + ret = -ENODEV; + goto error; + } + + iprop = of_get_property(np, "fsl,ccf-num-csdids", NULL); + if (!iprop) { + ret = -ENODEV; + goto error; + } + + num_csds = be32_to_cpup(iprop); + if (!num_csds) { + ret = -ENODEV; + goto error; + } + + ccm = of_iomap(np, 0); + if (!ccm) { + ret = -ENOMEM; + goto error; + } + + /* The undocumented CSDID registers are at offset 0x600 */ + csdids = ccm + 0x600; + + of_node_put(np); + np = NULL; + + /* Find an unused coherence subdomain ID */ + for (csd_id = 0; csd_id < num_csds; csd_id++) { + if (!csdids[csd_id]) + break; + } + + /* Store the Port ID in the (undocumented) proper CIDMRxx register */ + csdids[csd_id] = csd_port_id; + + /* Find the DDR LAW that maps to our buffer. */ + for (i = 0; i < num_laws; i++) { + if (law[i].lawar & LAWAR_EN) { + phys_addr_t law_start, law_end; + + law_start = make64(law[i].lawbarh, law[i].lawbarl); + law_end = law_start + + (2ULL << (law[i].lawar & LAWAR_SIZE_MASK)); + + if (law_start <= phys && phys < law_end) { + law_target = law[i].lawar & LAWAR_TARGET_MASK; + break; + } + } + } + + if (i == 0 || i == num_laws) { + /* This should never happen*/ + ret = -ENOENT; + goto error; + } + + /* Find a free LAW entry */ + while (law[--i].lawar & LAWAR_EN) { + if (i == 0) { + /* No higher priority LAW slots available */ + ret = -ENOENT; + goto error; + } + } + + law[i].lawbarh = upper_32_bits(phys); + law[i].lawbarl = lower_32_bits(phys); + wmb(); + law[i].lawar = LAWAR_EN | law_target | (csd_id << LAWAR_CSDID_SHIFT) | + (LAW_SIZE_4K + get_order(size)); + wmb(); + +error: + if (ccm) + iounmap(ccm); + + if (lac) + iounmap(lac); + + if (np) + of_node_put(np); + + return ret; +} + +/* + * Table of SVRs and the corresponding PORT_ID values. Port ID corresponds to a + * bit map of snoopers for a given range of memory mapped by a LAW. + * + * All future CoreNet-enabled SOCs will have this erratum(A-004510) fixed, so this + * table should never need to be updated. SVRs are guaranteed to be unique, so + * there is no worry that a future SOC will inadvertently have one of these + * values. + */ +static const struct { + u32 svr; + u32 port_id; +} port_id_map[] = { + {0x82100010, 0xFF000000}, /* P2040 1.0 */ + {0x82100011, 0xFF000000}, /* P2040 1.1 */ + {0x82100110, 0xFF000000}, /* P2041 1.0 */ + {0x82100111, 0xFF000000}, /* P2041 1.1 */ + {0x82110310, 0xFF000000}, /* P3041 1.0 */ + {0x82110311, 0xFF000000}, /* P3041 1.1 */ + {0x82010020, 0xFFF80000}, /* P4040 2.0 */ + {0x82000020, 0xFFF80000}, /* P4080 2.0 */ + {0x82210010, 0xFC000000}, /* P5010 1.0 */ + {0x82210020, 0xFC000000}, /* P5010 2.0 */ + {0x82200010, 0xFC000000}, /* P5020 1.0 */ + {0x82050010, 0xFF800000}, /* P5021 1.0 */ + {0x82040010, 0xFF800000}, /* P5040 1.0 */ +}; + +#define SVR_SECURITY 0x80000 /* The Security (E) bit */ + +static int __init fsl_pamu_probe(struct platform_device *pdev) +{ + void __iomem *pamu_regs = NULL; + struct ccsr_guts __iomem *guts_regs = NULL; + u32 pamubypenr, pamu_counter; + unsigned long pamu_reg_off; + unsigned long pamu_reg_base; + struct pamu_isr_data *data = NULL; + struct device_node *guts_node; + u64 size; + struct page *p; + int ret = 0; + int irq; + phys_addr_t ppaact_phys; + phys_addr_t spaact_phys; + phys_addr_t omt_phys; + size_t mem_size = 0; + unsigned int order = 0; + u32 csd_port_id = 0; + unsigned i; + /* + * enumerate all PAMUs and allocate and setup PAMU tables + * for each of them, + * NOTE : All PAMUs share the same LIODN tables. + */ + + pamu_regs = of_iomap(pdev->dev.of_node, 0); + if (!pamu_regs) { + dev_err(&pdev->dev, "ioremap of PAMU node failed\n"); + return -ENOMEM; + } + of_get_address(pdev->dev.of_node, 0, &size, NULL); + + irq = irq_of_parse_and_map(pdev->dev.of_node, 0); + if (irq == NO_IRQ) { + dev_warn(&pdev->dev, "no interrupts listed in PAMU node\n"); + goto error; + } + + data = kzalloc(sizeof(struct pamu_isr_data), GFP_KERNEL); + if (!data) { + dev_err(&pdev->dev, "PAMU isr data memory allocation failed\n"); + ret = -ENOMEM; + goto error; + } + data->pamu_reg_base = pamu_regs; + data->count = size / PAMU_OFFSET; + + /* The ISR needs access to the regs, so we won't iounmap them */ + ret = request_irq(irq, pamu_av_isr, 0, "pamu", data); + if (ret < 0) { + dev_err(&pdev->dev, "error %i installing ISR for irq %i\n", + ret, irq); + goto error; + } + + guts_node = of_find_matching_node(NULL, guts_device_ids); + if (!guts_node) { + dev_err(&pdev->dev, "could not find GUTS node %s\n", + pdev->dev.of_node->full_name); + ret = -ENODEV; + goto error; + } + + guts_regs = of_iomap(guts_node, 0); + of_node_put(guts_node); + if (!guts_regs) { + dev_err(&pdev->dev, "ioremap of GUTS node failed\n"); + ret = -ENODEV; + goto error; + } + + /* read in the PAMU capability registers */ + get_pamu_cap_values((unsigned long)pamu_regs); + /* + * To simplify the allocation of a coherency domain, we allocate the + * PAACT and the OMT in the same memory buffer. Unfortunately, this + * wastes more memory compared to allocating the buffers separately. + */ + /* Determine how much memory we need */ + mem_size = (PAGE_SIZE << get_order(PAACT_SIZE)) + + (PAGE_SIZE << get_order(SPAACT_SIZE)) + + (PAGE_SIZE << get_order(OMT_SIZE)); + order = get_order(mem_size); + + p = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); + if (!p) { + dev_err(&pdev->dev, "unable to allocate PAACT/SPAACT/OMT block\n"); + ret = -ENOMEM; + goto error; + } + + ppaact = page_address(p); + ppaact_phys = page_to_phys(p); + + /* Make sure the memory is naturally aligned */ + if (ppaact_phys & ((PAGE_SIZE << order) - 1)) { + dev_err(&pdev->dev, "PAACT/OMT block is unaligned\n"); + ret = -ENOMEM; + goto error; + } + + spaact = (void *)ppaact + (PAGE_SIZE << get_order(PAACT_SIZE)); + omt = (void *)spaact + (PAGE_SIZE << get_order(SPAACT_SIZE)); + + dev_dbg(&pdev->dev, "ppaact virt=%p phys=0x%llx\n", ppaact, + (unsigned long long) ppaact_phys); + + /* Check to see if we need to implement the work-around on this SOC */ + + /* Determine the Port ID for our coherence subdomain */ + for (i = 0; i < ARRAY_SIZE(port_id_map); i++) { + if (port_id_map[i].svr == (mfspr(SPRN_SVR) & ~SVR_SECURITY)) { + csd_port_id = port_id_map[i].port_id; + dev_dbg(&pdev->dev, "found matching SVR %08x\n", + port_id_map[i].svr); + break; + } + } + + if (csd_port_id) { + dev_dbg(&pdev->dev, "creating coherency subdomain at address " + "0x%llx, size %zu, port id 0x%08x", ppaact_phys, + mem_size, csd_port_id); + + ret = create_csd(ppaact_phys, mem_size, csd_port_id); + if (ret) { + dev_err(&pdev->dev, "could not create coherence " + "subdomain\n"); + return ret; + } + } + + spaact_phys = virt_to_phys(spaact); + omt_phys = virt_to_phys(omt); + + spaace_pool = gen_pool_create(ilog2(sizeof(struct paace)), -1); + if (!spaace_pool) { + ret = -ENOMEM; + dev_err(&pdev->dev, "PAMU : failed to allocate spaace gen pool\n"); + goto error; + } + + ret = gen_pool_add(spaace_pool, (unsigned long)spaact, SPAACT_SIZE, -1); + if (ret) + goto error_genpool; + + pamubypenr = in_be32(&guts_regs->pamubypenr); + + for (pamu_reg_off = 0, pamu_counter = 0x80000000; pamu_reg_off < size; + pamu_reg_off += PAMU_OFFSET, pamu_counter >>= 1) { + + pamu_reg_base = (unsigned long) pamu_regs + pamu_reg_off; + setup_one_pamu(pamu_reg_base, pamu_reg_off, ppaact_phys, + spaact_phys, omt_phys); + /* Disable PAMU bypass for this PAMU */ + pamubypenr &= ~pamu_counter; + } + + setup_omt(omt); + + /* Enable all relevant PAMU(s) */ + out_be32(&guts_regs->pamubypenr, pamubypenr); + + iounmap(guts_regs); + + /* Enable DMA for the LIODNs in the device tree*/ + + setup_liodns(); + + return 0; + +error_genpool: + gen_pool_destroy(spaace_pool); + +error: + if (irq != NO_IRQ) + free_irq(irq, data); + + if (data) { + memset(data, 0, sizeof(struct pamu_isr_data)); + kfree(data); + } + + if (pamu_regs) + iounmap(pamu_regs); + + if (guts_regs) + iounmap(guts_regs); + + if (ppaact) + free_pages((unsigned long)ppaact, order); + + ppaact = NULL; + + return ret; +} + +static const struct of_device_id fsl_of_pamu_ids[] = { + { + .compatible = "fsl,p4080-pamu", + }, + { + .compatible = "fsl,pamu", + }, + {}, +}; + +static struct platform_driver fsl_of_pamu_driver = { + .driver = { + .name = "fsl-of-pamu", + .owner = THIS_MODULE, + }, + .probe = fsl_pamu_probe, +}; + +static __init int fsl_pamu_init(void) +{ + struct platform_device *pdev = NULL; + struct device_node *np; + int ret; + + /* + * The normal OF process calls the probe function at some + * indeterminate later time, after most drivers have loaded. This is + * too late for us, because PAMU clients (like the Qman driver) + * depend on PAMU being initialized early. + * + * So instead, we "manually" call our probe function by creating the + * platform devices ourselves. + */ + + /* + * We assume that there is only one PAMU node in the device tree. A + * single PAMU node represents all of the PAMU devices in the SOC + * already. Everything else already makes that assumption, and the + * binding for the PAMU nodes doesn't allow for any parent-child + * relationships anyway. In other words, support for more than one + * PAMU node would require significant changes to a lot of code. + */ + + np = of_find_compatible_node(NULL, NULL, "fsl,pamu"); + if (!np) { + pr_err("could not find a PAMU node\n"); + return -ENODEV; + } + + ret = platform_driver_register(&fsl_of_pamu_driver); + if (ret) { + pr_err("could not register driver (err=%i)\n", ret); + goto error_driver_register; + } + + pdev = platform_device_alloc("fsl-of-pamu", 0); + if (!pdev) { + pr_err("could not allocate device %s\n", + np->full_name); + ret = -ENOMEM; + goto error_device_alloc; + } + pdev->dev.of_node = of_node_get(np); + + ret = pamu_domain_init(); + if (ret) + goto error_device_add; + + ret = platform_device_add(pdev); + if (ret) { + pr_err("could not add device %s (err=%i)\n", + np->full_name, ret); + goto error_device_add; + } + + return 0; + +error_device_add: + of_node_put(pdev->dev.of_node); + pdev->dev.of_node = NULL; + + platform_device_put(pdev); + +error_device_alloc: + platform_driver_unregister(&fsl_of_pamu_driver); + +error_driver_register: + of_node_put(np); + + return ret; +} +arch_initcall(fsl_pamu_init); diff --git a/drivers/iommu/fsl_pamu.h b/drivers/iommu/fsl_pamu.h new file mode 100644 index 000000000000..8fc1a125b16e --- /dev/null +++ b/drivers/iommu/fsl_pamu.h @@ -0,0 +1,410 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) 2013 Freescale Semiconductor, Inc. + * + */ + +#ifndef __FSL_PAMU_H +#define __FSL_PAMU_H + +#include <asm/fsl_pamu_stash.h> + +/* Bit Field macros + * v = bit field variable; m = mask, m##_SHIFT = shift, x = value to load + */ +#define set_bf(v, m, x) (v = ((v) & ~(m)) | (((x) << (m##_SHIFT)) & (m))) +#define get_bf(v, m) (((v) & (m)) >> (m##_SHIFT)) + +/* PAMU CCSR space */ +#define PAMU_PGC 0x00000000 /* Allows all peripheral accesses */ +#define PAMU_PE 0x40000000 /* enable PAMU */ + +/* PAMU_OFFSET to the next pamu space in ccsr */ +#define PAMU_OFFSET 0x1000 + +#define PAMU_MMAP_REGS_BASE 0 + +struct pamu_mmap_regs { + u32 ppbah; + u32 ppbal; + u32 pplah; + u32 pplal; + u32 spbah; + u32 spbal; + u32 splah; + u32 splal; + u32 obah; + u32 obal; + u32 olah; + u32 olal; +}; + +/* PAMU Error Registers */ +#define PAMU_POES1 0x0040 +#define PAMU_POES2 0x0044 +#define PAMU_POEAH 0x0048 +#define PAMU_POEAL 0x004C +#define PAMU_AVS1 0x0050 +#define PAMU_AVS1_AV 0x1 +#define PAMU_AVS1_OTV 0x6 +#define PAMU_AVS1_APV 0x78 +#define PAMU_AVS1_WAV 0x380 +#define PAMU_AVS1_LAV 0x1c00 +#define PAMU_AVS1_GCV 0x2000 +#define PAMU_AVS1_PDV 0x4000 +#define PAMU_AV_MASK (PAMU_AVS1_AV | PAMU_AVS1_OTV | PAMU_AVS1_APV | PAMU_AVS1_WAV \ + | PAMU_AVS1_LAV | PAMU_AVS1_GCV | PAMU_AVS1_PDV) +#define PAMU_AVS1_LIODN_SHIFT 16 +#define PAMU_LAV_LIODN_NOT_IN_PPAACT 0x400 + +#define PAMU_AVS2 0x0054 +#define PAMU_AVAH 0x0058 +#define PAMU_AVAL 0x005C +#define PAMU_EECTL 0x0060 +#define PAMU_EEDIS 0x0064 +#define PAMU_EEINTEN 0x0068 +#define PAMU_EEDET 0x006C +#define PAMU_EEATTR 0x0070 +#define PAMU_EEAHI 0x0074 +#define PAMU_EEALO 0x0078 +#define PAMU_EEDHI 0X007C +#define PAMU_EEDLO 0x0080 +#define PAMU_EECC 0x0084 +#define PAMU_UDAD 0x0090 + +/* PAMU Revision Registers */ +#define PAMU_PR1 0x0BF8 +#define PAMU_PR2 0x0BFC + +/* PAMU version mask */ +#define PAMU_PR1_MASK 0xffff + +/* PAMU Capabilities Registers */ +#define PAMU_PC1 0x0C00 +#define PAMU_PC2 0x0C04 +#define PAMU_PC3 0x0C08 +#define PAMU_PC4 0x0C0C + +/* PAMU Control Register */ +#define PAMU_PC 0x0C10 + +/* PAMU control defs */ +#define PAMU_CONTROL 0x0C10 +#define PAMU_PC_PGC 0x80000000 /* PAMU gate closed bit */ +#define PAMU_PC_PE 0x40000000 /* PAMU enable bit */ +#define PAMU_PC_SPCC 0x00000010 /* sPAACE cache enable */ +#define PAMU_PC_PPCC 0x00000001 /* pPAACE cache enable */ +#define PAMU_PC_OCE 0x00001000 /* OMT cache enable */ + +#define PAMU_PFA1 0x0C14 +#define PAMU_PFA2 0x0C18 + +#define PAMU_PC2_MLIODN(X) ((X) >> 16) +#define PAMU_PC3_MWCE(X) (((X) >> 21) & 0xf) + +/* PAMU Interrupt control and Status Register */ +#define PAMU_PICS 0x0C1C +#define PAMU_ACCESS_VIOLATION_STAT 0x8 +#define PAMU_ACCESS_VIOLATION_ENABLE 0x4 + +/* PAMU Debug Registers */ +#define PAMU_PD1 0x0F00 +#define PAMU_PD2 0x0F04 +#define PAMU_PD3 0x0F08 +#define PAMU_PD4 0x0F0C + +#define PAACE_AP_PERMS_DENIED 0x0 +#define PAACE_AP_PERMS_QUERY 0x1 +#define PAACE_AP_PERMS_UPDATE 0x2 +#define PAACE_AP_PERMS_ALL 0x3 + +#define PAACE_DD_TO_HOST 0x0 +#define PAACE_DD_TO_IO 0x1 +#define PAACE_PT_PRIMARY 0x0 +#define PAACE_PT_SECONDARY 0x1 +#define PAACE_V_INVALID 0x0 +#define PAACE_V_VALID 0x1 +#define PAACE_MW_SUBWINDOWS 0x1 + +#define PAACE_WSE_4K 0xB +#define PAACE_WSE_8K 0xC +#define PAACE_WSE_16K 0xD +#define PAACE_WSE_32K 0xE +#define PAACE_WSE_64K 0xF +#define PAACE_WSE_128K 0x10 +#define PAACE_WSE_256K 0x11 +#define PAACE_WSE_512K 0x12 +#define PAACE_WSE_1M 0x13 +#define PAACE_WSE_2M 0x14 +#define PAACE_WSE_4M 0x15 +#define PAACE_WSE_8M 0x16 +#define PAACE_WSE_16M 0x17 +#define PAACE_WSE_32M 0x18 +#define PAACE_WSE_64M 0x19 +#define PAACE_WSE_128M 0x1A +#define PAACE_WSE_256M 0x1B +#define PAACE_WSE_512M 0x1C +#define PAACE_WSE_1G 0x1D +#define PAACE_WSE_2G 0x1E +#define PAACE_WSE_4G 0x1F + +#define PAACE_DID_PCI_EXPRESS_1 0x00 +#define PAACE_DID_PCI_EXPRESS_2 0x01 +#define PAACE_DID_PCI_EXPRESS_3 0x02 +#define PAACE_DID_PCI_EXPRESS_4 0x03 +#define PAACE_DID_LOCAL_BUS 0x04 +#define PAACE_DID_SRIO 0x0C +#define PAACE_DID_MEM_1 0x10 +#define PAACE_DID_MEM_2 0x11 +#define PAACE_DID_MEM_3 0x12 +#define PAACE_DID_MEM_4 0x13 +#define PAACE_DID_MEM_1_2 0x14 +#define PAACE_DID_MEM_3_4 0x15 +#define PAACE_DID_MEM_1_4 0x16 +#define PAACE_DID_BM_SW_PORTAL 0x18 +#define PAACE_DID_PAMU 0x1C +#define PAACE_DID_CAAM 0x21 +#define PAACE_DID_QM_SW_PORTAL 0x3C +#define PAACE_DID_CORE0_INST 0x80 +#define PAACE_DID_CORE0_DATA 0x81 +#define PAACE_DID_CORE1_INST 0x82 +#define PAACE_DID_CORE1_DATA 0x83 +#define PAACE_DID_CORE2_INST 0x84 +#define PAACE_DID_CORE2_DATA 0x85 +#define PAACE_DID_CORE3_INST 0x86 +#define PAACE_DID_CORE3_DATA 0x87 +#define PAACE_DID_CORE4_INST 0x88 +#define PAACE_DID_CORE4_DATA 0x89 +#define PAACE_DID_CORE5_INST 0x8A +#define PAACE_DID_CORE5_DATA 0x8B +#define PAACE_DID_CORE6_INST 0x8C +#define PAACE_DID_CORE6_DATA 0x8D +#define PAACE_DID_CORE7_INST 0x8E +#define PAACE_DID_CORE7_DATA 0x8F +#define PAACE_DID_BROADCAST 0xFF + +#define PAACE_ATM_NO_XLATE 0x00 +#define PAACE_ATM_WINDOW_XLATE 0x01 +#define PAACE_ATM_PAGE_XLATE 0x02 +#define PAACE_ATM_WIN_PG_XLATE \ + (PAACE_ATM_WINDOW_XLATE | PAACE_ATM_PAGE_XLATE) +#define PAACE_OTM_NO_XLATE 0x00 +#define PAACE_OTM_IMMEDIATE 0x01 +#define PAACE_OTM_INDEXED 0x02 +#define PAACE_OTM_RESERVED 0x03 + +#define PAACE_M_COHERENCE_REQ 0x01 + +#define PAACE_PID_0 0x0 +#define PAACE_PID_1 0x1 +#define PAACE_PID_2 0x2 +#define PAACE_PID_3 0x3 +#define PAACE_PID_4 0x4 +#define PAACE_PID_5 0x5 +#define PAACE_PID_6 0x6 +#define PAACE_PID_7 0x7 + +#define PAACE_TCEF_FORMAT0_8B 0x00 +#define PAACE_TCEF_FORMAT1_RSVD 0x01 +/* + * Hard coded value for the PAACT size to accomodate + * maximum LIODN value generated by u-boot. + */ +#define PAACE_NUMBER_ENTRIES 0x500 +/* Hard coded value for the SPAACT size */ +#define SPAACE_NUMBER_ENTRIES 0x800 + +#define OME_NUMBER_ENTRIES 16 + +/* PAACE Bit Field Defines */ +#define PPAACE_AF_WBAL 0xfffff000 +#define PPAACE_AF_WBAL_SHIFT 12 +#define PPAACE_AF_WSE 0x00000fc0 +#define PPAACE_AF_WSE_SHIFT 6 +#define PPAACE_AF_MW 0x00000020 +#define PPAACE_AF_MW_SHIFT 5 + +#define SPAACE_AF_LIODN 0xffff0000 +#define SPAACE_AF_LIODN_SHIFT 16 + +#define PAACE_AF_AP 0x00000018 +#define PAACE_AF_AP_SHIFT 3 +#define PAACE_AF_DD 0x00000004 +#define PAACE_AF_DD_SHIFT 2 +#define PAACE_AF_PT 0x00000002 +#define PAACE_AF_PT_SHIFT 1 +#define PAACE_AF_V 0x00000001 +#define PAACE_AF_V_SHIFT 0 + +#define PAACE_DA_HOST_CR 0x80 +#define PAACE_DA_HOST_CR_SHIFT 7 + +#define PAACE_IA_CID 0x00FF0000 +#define PAACE_IA_CID_SHIFT 16 +#define PAACE_IA_WCE 0x000000F0 +#define PAACE_IA_WCE_SHIFT 4 +#define PAACE_IA_ATM 0x0000000C +#define PAACE_IA_ATM_SHIFT 2 +#define PAACE_IA_OTM 0x00000003 +#define PAACE_IA_OTM_SHIFT 0 + +#define PAACE_WIN_TWBAL 0xfffff000 +#define PAACE_WIN_TWBAL_SHIFT 12 +#define PAACE_WIN_SWSE 0x00000fc0 +#define PAACE_WIN_SWSE_SHIFT 6 + +/* PAMU Data Structures */ +/* primary / secondary paact structure */ +struct paace { + /* PAACE Offset 0x00 */ + u32 wbah; /* only valid for Primary PAACE */ + u32 addr_bitfields; /* See P/S PAACE_AF_* */ + + /* PAACE Offset 0x08 */ + /* Interpretation of first 32 bits dependent on DD above */ + union { + struct { + /* Destination ID, see PAACE_DID_* defines */ + u8 did; + /* Partition ID */ + u8 pid; + /* Snoop ID */ + u8 snpid; + /* coherency_required : 1 reserved : 7 */ + u8 coherency_required; /* See PAACE_DA_* */ + } to_host; + struct { + /* Destination ID, see PAACE_DID_* defines */ + u8 did; + u8 reserved1; + u16 reserved2; + } to_io; + } domain_attr; + + /* Implementation attributes + window count + address & operation translation modes */ + u32 impl_attr; /* See PAACE_IA_* */ + + /* PAACE Offset 0x10 */ + /* Translated window base address */ + u32 twbah; + u32 win_bitfields; /* See PAACE_WIN_* */ + + /* PAACE Offset 0x18 */ + /* first secondary paace entry */ + u32 fspi; /* only valid for Primary PAACE */ + union { + struct { + u8 ioea; + u8 moea; + u8 ioeb; + u8 moeb; + } immed_ot; + struct { + u16 reserved; + u16 omi; + } index_ot; + } op_encode; + + /* PAACE Offsets 0x20-0x38 */ + u32 reserved[8]; /* not currently implemented */ +}; + +/* OME : Operation mapping entry + * MOE : Mapped Operation Encodings + * The operation mapping table is table containing operation mapping entries (OME). + * The index of a particular OME is programmed in the PAACE entry for translation + * in bound I/O operations corresponding to an LIODN. The OMT is used for translation + * specifically in case of the indexed translation mode. Each OME contains a 128 + * byte mapped operation encoding (MOE), where each byte represents an MOE. + */ +#define NUM_MOE 128 +struct ome { + u8 moe[NUM_MOE]; +} __attribute__((packed)); + +#define PAACT_SIZE (sizeof(struct paace) * PAACE_NUMBER_ENTRIES) +#define SPAACT_SIZE (sizeof(struct paace) * SPAACE_NUMBER_ENTRIES) +#define OMT_SIZE (sizeof(struct ome) * OME_NUMBER_ENTRIES) + +#define PAMU_PAGE_SHIFT 12 +#define PAMU_PAGE_SIZE 4096ULL + +#define IOE_READ 0x00 +#define IOE_READ_IDX 0x00 +#define IOE_WRITE 0x81 +#define IOE_WRITE_IDX 0x01 +#define IOE_EREAD0 0x82 /* Enhanced read type 0 */ +#define IOE_EREAD0_IDX 0x02 /* Enhanced read type 0 */ +#define IOE_EWRITE0 0x83 /* Enhanced write type 0 */ +#define IOE_EWRITE0_IDX 0x03 /* Enhanced write type 0 */ +#define IOE_DIRECT0 0x84 /* Directive type 0 */ +#define IOE_DIRECT0_IDX 0x04 /* Directive type 0 */ +#define IOE_EREAD1 0x85 /* Enhanced read type 1 */ +#define IOE_EREAD1_IDX 0x05 /* Enhanced read type 1 */ +#define IOE_EWRITE1 0x86 /* Enhanced write type 1 */ +#define IOE_EWRITE1_IDX 0x06 /* Enhanced write type 1 */ +#define IOE_DIRECT1 0x87 /* Directive type 1 */ +#define IOE_DIRECT1_IDX 0x07 /* Directive type 1 */ +#define IOE_RAC 0x8c /* Read with Atomic clear */ +#define IOE_RAC_IDX 0x0c /* Read with Atomic clear */ +#define IOE_RAS 0x8d /* Read with Atomic set */ +#define IOE_RAS_IDX 0x0d /* Read with Atomic set */ +#define IOE_RAD 0x8e /* Read with Atomic decrement */ +#define IOE_RAD_IDX 0x0e /* Read with Atomic decrement */ +#define IOE_RAI 0x8f /* Read with Atomic increment */ +#define IOE_RAI_IDX 0x0f /* Read with Atomic increment */ + +#define EOE_READ 0x00 +#define EOE_WRITE 0x01 +#define EOE_RAC 0x0c /* Read with Atomic clear */ +#define EOE_RAS 0x0d /* Read with Atomic set */ +#define EOE_RAD 0x0e /* Read with Atomic decrement */ +#define EOE_RAI 0x0f /* Read with Atomic increment */ +#define EOE_LDEC 0x10 /* Load external cache */ +#define EOE_LDECL 0x11 /* Load external cache with stash lock */ +#define EOE_LDECPE 0x12 /* Load external cache with preferred exclusive */ +#define EOE_LDECPEL 0x13 /* Load external cache with preferred exclusive and lock */ +#define EOE_LDECFE 0x14 /* Load external cache with forced exclusive */ +#define EOE_LDECFEL 0x15 /* Load external cache with forced exclusive and lock */ +#define EOE_RSA 0x16 /* Read with stash allocate */ +#define EOE_RSAU 0x17 /* Read with stash allocate and unlock */ +#define EOE_READI 0x18 /* Read with invalidate */ +#define EOE_RWNITC 0x19 /* Read with no intention to cache */ +#define EOE_WCI 0x1a /* Write cache inhibited */ +#define EOE_WWSA 0x1b /* Write with stash allocate */ +#define EOE_WWSAL 0x1c /* Write with stash allocate and lock */ +#define EOE_WWSAO 0x1d /* Write with stash allocate only */ +#define EOE_WWSAOL 0x1e /* Write with stash allocate only and lock */ +#define EOE_VALID 0x80 + +/* Function prototypes */ +int pamu_domain_init(void); +int pamu_enable_liodn(int liodn); +int pamu_disable_liodn(int liodn); +void pamu_free_subwins(int liodn); +int pamu_config_ppaace(int liodn, phys_addr_t win_addr, phys_addr_t win_size, + u32 omi, unsigned long rpn, u32 snoopid, uint32_t stashid, + u32 subwin_cnt, int prot); +int pamu_config_spaace(int liodn, u32 subwin_cnt, u32 subwin_addr, + phys_addr_t subwin_size, u32 omi, unsigned long rpn, + uint32_t snoopid, u32 stashid, int enable, int prot); + +u32 get_stash_id(u32 stash_dest_hint, u32 vcpu); +void get_ome_index(u32 *omi_index, struct device *dev); +int pamu_update_paace_stash(int liodn, u32 subwin, u32 value); +int pamu_disable_spaace(int liodn, u32 subwin); +u32 pamu_get_max_subwin_cnt(void); + +#endif /* __FSL_PAMU_H */ diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c new file mode 100644 index 000000000000..c857c30da979 --- /dev/null +++ b/drivers/iommu/fsl_pamu_domain.c @@ -0,0 +1,1172 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) 2013 Freescale Semiconductor, Inc. + * Author: Varun Sethi <varun.sethi@freescale.com> + * + */ + +#define pr_fmt(fmt) "fsl-pamu-domain: %s: " fmt, __func__ + +#include <linux/init.h> +#include <linux/iommu.h> +#include <linux/notifier.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/device.h> +#include <linux/of_platform.h> +#include <linux/bootmem.h> +#include <linux/err.h> +#include <asm/io.h> +#include <asm/bitops.h> + +#include <asm/pci-bridge.h> +#include <sysdev/fsl_pci.h> + +#include "fsl_pamu_domain.h" +#include "pci.h" + +/* + * Global spinlock that needs to be held while + * configuring PAMU. + */ +static DEFINE_SPINLOCK(iommu_lock); + +static struct kmem_cache *fsl_pamu_domain_cache; +static struct kmem_cache *iommu_devinfo_cache; +static DEFINE_SPINLOCK(device_domain_lock); + +static int __init iommu_init_mempool(void) +{ + + fsl_pamu_domain_cache = kmem_cache_create("fsl_pamu_domain", + sizeof(struct fsl_dma_domain), + 0, + SLAB_HWCACHE_ALIGN, + + NULL); + if (!fsl_pamu_domain_cache) { + pr_debug("Couldn't create fsl iommu_domain cache\n"); + return -ENOMEM; + } + + iommu_devinfo_cache = kmem_cache_create("iommu_devinfo", + sizeof(struct device_domain_info), + 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!iommu_devinfo_cache) { + pr_debug("Couldn't create devinfo cache\n"); + kmem_cache_destroy(fsl_pamu_domain_cache); + return -ENOMEM; + } + + return 0; +} + +static phys_addr_t get_phys_addr(struct fsl_dma_domain *dma_domain, dma_addr_t iova) +{ + u32 win_cnt = dma_domain->win_cnt; + struct dma_window *win_ptr = + &dma_domain->win_arr[0]; + struct iommu_domain_geometry *geom; + + geom = &dma_domain->iommu_domain->geometry; + + if (!win_cnt || !dma_domain->geom_size) { + pr_debug("Number of windows/geometry not configured for the domain\n"); + return 0; + } + + if (win_cnt > 1) { + u64 subwin_size; + dma_addr_t subwin_iova; + u32 wnd; + + subwin_size = dma_domain->geom_size >> ilog2(win_cnt); + subwin_iova = iova & ~(subwin_size - 1); + wnd = (subwin_iova - geom->aperture_start) >> ilog2(subwin_size); + win_ptr = &dma_domain->win_arr[wnd]; + } + + if (win_ptr->valid) + return (win_ptr->paddr + (iova & (win_ptr->size - 1))); + + return 0; +} + +static int map_subwins(int liodn, struct fsl_dma_domain *dma_domain) +{ + struct dma_window *sub_win_ptr = + &dma_domain->win_arr[0]; + int i, ret; + unsigned long rpn, flags; + + for (i = 0; i < dma_domain->win_cnt; i++) { + if (sub_win_ptr[i].valid) { + rpn = sub_win_ptr[i].paddr >> + PAMU_PAGE_SHIFT; + spin_lock_irqsave(&iommu_lock, flags); + ret = pamu_config_spaace(liodn, dma_domain->win_cnt, i, + sub_win_ptr[i].size, + ~(u32)0, + rpn, + dma_domain->snoop_id, + dma_domain->stash_id, + (i > 0) ? 1 : 0, + sub_win_ptr[i].prot); + spin_unlock_irqrestore(&iommu_lock, flags); + if (ret) { + pr_debug("PAMU SPAACE configuration failed for liodn %d\n", + liodn); + return ret; + } + } + } + + return ret; +} + +static int map_win(int liodn, struct fsl_dma_domain *dma_domain) +{ + int ret; + struct dma_window *wnd = &dma_domain->win_arr[0]; + phys_addr_t wnd_addr = dma_domain->iommu_domain->geometry.aperture_start; + unsigned long flags; + + spin_lock_irqsave(&iommu_lock, flags); + ret = pamu_config_ppaace(liodn, wnd_addr, + wnd->size, + ~(u32)0, + wnd->paddr >> PAMU_PAGE_SHIFT, + dma_domain->snoop_id, dma_domain->stash_id, + 0, wnd->prot); + spin_unlock_irqrestore(&iommu_lock, flags); + if (ret) + pr_debug("PAMU PAACE configuration failed for liodn %d\n", + liodn); + + return ret; +} + +/* Map the DMA window corresponding to the LIODN */ +static int map_liodn(int liodn, struct fsl_dma_domain *dma_domain) +{ + if (dma_domain->win_cnt > 1) + return map_subwins(liodn, dma_domain); + else + return map_win(liodn, dma_domain); + +} + +/* Update window/subwindow mapping for the LIODN */ +static int update_liodn(int liodn, struct fsl_dma_domain *dma_domain, u32 wnd_nr) +{ + int ret; + struct dma_window *wnd = &dma_domain->win_arr[wnd_nr]; + unsigned long flags; + + spin_lock_irqsave(&iommu_lock, flags); + if (dma_domain->win_cnt > 1) { + ret = pamu_config_spaace(liodn, dma_domain->win_cnt, wnd_nr, + wnd->size, + ~(u32)0, + wnd->paddr >> PAMU_PAGE_SHIFT, + dma_domain->snoop_id, + dma_domain->stash_id, + (wnd_nr > 0) ? 1 : 0, + wnd->prot); + if (ret) + pr_debug("Subwindow reconfiguration failed for liodn %d\n", liodn); + } else { + phys_addr_t wnd_addr; + + wnd_addr = dma_domain->iommu_domain->geometry.aperture_start; + + ret = pamu_config_ppaace(liodn, wnd_addr, + wnd->size, + ~(u32)0, + wnd->paddr >> PAMU_PAGE_SHIFT, + dma_domain->snoop_id, dma_domain->stash_id, + 0, wnd->prot); + if (ret) + pr_debug("Window reconfiguration failed for liodn %d\n", liodn); + } + + spin_unlock_irqrestore(&iommu_lock, flags); + + return ret; +} + +static int update_liodn_stash(int liodn, struct fsl_dma_domain *dma_domain, + u32 val) +{ + int ret = 0, i; + unsigned long flags; + + spin_lock_irqsave(&iommu_lock, flags); + if (!dma_domain->win_arr) { + pr_debug("Windows not configured, stash destination update failed for liodn %d\n", liodn); + spin_unlock_irqrestore(&iommu_lock, flags); + return -EINVAL; + } + + for (i = 0; i < dma_domain->win_cnt; i++) { + ret = pamu_update_paace_stash(liodn, i, val); + if (ret) { + pr_debug("Failed to update SPAACE %d field for liodn %d\n ", i, liodn); + spin_unlock_irqrestore(&iommu_lock, flags); + return ret; + } + } + + spin_unlock_irqrestore(&iommu_lock, flags); + + return ret; +} + +/* Set the geometry parameters for a LIODN */ +static int pamu_set_liodn(int liodn, struct device *dev, + struct fsl_dma_domain *dma_domain, + struct iommu_domain_geometry *geom_attr, + u32 win_cnt) +{ + phys_addr_t window_addr, window_size; + phys_addr_t subwin_size; + int ret = 0, i; + u32 omi_index = ~(u32)0; + unsigned long flags; + + /* + * Configure the omi_index at the geometry setup time. + * This is a static value which depends on the type of + * device and would not change thereafter. + */ + get_ome_index(&omi_index, dev); + + window_addr = geom_attr->aperture_start; + window_size = dma_domain->geom_size; + + spin_lock_irqsave(&iommu_lock, flags); + ret = pamu_disable_liodn(liodn); + if (!ret) + ret = pamu_config_ppaace(liodn, window_addr, window_size, omi_index, + 0, dma_domain->snoop_id, + dma_domain->stash_id, win_cnt, 0); + spin_unlock_irqrestore(&iommu_lock, flags); + if (ret) { + pr_debug("PAMU PAACE configuration failed for liodn %d, win_cnt =%d\n", liodn, win_cnt); + return ret; + } + + if (win_cnt > 1) { + subwin_size = window_size >> ilog2(win_cnt); + for (i = 0; i < win_cnt; i++) { + spin_lock_irqsave(&iommu_lock, flags); + ret = pamu_disable_spaace(liodn, i); + if (!ret) + ret = pamu_config_spaace(liodn, win_cnt, i, + subwin_size, omi_index, + 0, dma_domain->snoop_id, + dma_domain->stash_id, + 0, 0); + spin_unlock_irqrestore(&iommu_lock, flags); + if (ret) { + pr_debug("PAMU SPAACE configuration failed for liodn %d\n", liodn); + return ret; + } + } + } + + return ret; +} + +static int check_size(u64 size, dma_addr_t iova) +{ + /* + * Size must be a power of two and at least be equal + * to PAMU page size. + */ + if (!is_power_of_2(size) || size < PAMU_PAGE_SIZE) { + pr_debug("%s: size too small or not a power of two\n", __func__); + return -EINVAL; + } + + /* iova must be page size aligned*/ + if (iova & (size - 1)) { + pr_debug("%s: address is not aligned with window size\n", __func__); + return -EINVAL; + } + + return 0; +} + +static struct fsl_dma_domain *iommu_alloc_dma_domain(void) +{ + struct fsl_dma_domain *domain; + + domain = kmem_cache_zalloc(fsl_pamu_domain_cache, GFP_KERNEL); + if (!domain) + return NULL; + + domain->stash_id = ~(u32)0; + domain->snoop_id = ~(u32)0; + domain->win_cnt = pamu_get_max_subwin_cnt(); + domain->geom_size = 0; + + INIT_LIST_HEAD(&domain->devices); + + spin_lock_init(&domain->domain_lock); + + return domain; +} + +static inline struct device_domain_info *find_domain(struct device *dev) +{ + return dev->archdata.iommu_domain; +} + +static void remove_device_ref(struct device_domain_info *info, u32 win_cnt) +{ + unsigned long flags; + + list_del(&info->link); + spin_lock_irqsave(&iommu_lock, flags); + if (win_cnt > 1) + pamu_free_subwins(info->liodn); + pamu_disable_liodn(info->liodn); + spin_unlock_irqrestore(&iommu_lock, flags); + spin_lock_irqsave(&device_domain_lock, flags); + info->dev->archdata.iommu_domain = NULL; + kmem_cache_free(iommu_devinfo_cache, info); + spin_unlock_irqrestore(&device_domain_lock, flags); +} + +static void detach_device(struct device *dev, struct fsl_dma_domain *dma_domain) +{ + struct device_domain_info *info, *tmp; + unsigned long flags; + + spin_lock_irqsave(&dma_domain->domain_lock, flags); + /* Remove the device from the domain device list */ + list_for_each_entry_safe(info, tmp, &dma_domain->devices, link) { + if (!dev || (info->dev == dev)) + remove_device_ref(info, dma_domain->win_cnt); + } + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); +} + +static void attach_device(struct fsl_dma_domain *dma_domain, int liodn, struct device *dev) +{ + struct device_domain_info *info, *old_domain_info; + unsigned long flags; + + spin_lock_irqsave(&device_domain_lock, flags); + /* + * Check here if the device is already attached to domain or not. + * If the device is already attached to a domain detach it. + */ + old_domain_info = find_domain(dev); + if (old_domain_info && old_domain_info->domain != dma_domain) { + spin_unlock_irqrestore(&device_domain_lock, flags); + detach_device(dev, old_domain_info->domain); + spin_lock_irqsave(&device_domain_lock, flags); + } + + info = kmem_cache_zalloc(iommu_devinfo_cache, GFP_ATOMIC); + + info->dev = dev; + info->liodn = liodn; + info->domain = dma_domain; + + list_add(&info->link, &dma_domain->devices); + /* + * In case of devices with multiple LIODNs just store + * the info for the first LIODN as all + * LIODNs share the same domain + */ + if (!old_domain_info) + dev->archdata.iommu_domain = info; + spin_unlock_irqrestore(&device_domain_lock, flags); + +} + +static phys_addr_t fsl_pamu_iova_to_phys(struct iommu_domain *domain, + dma_addr_t iova) +{ + struct fsl_dma_domain *dma_domain = domain->priv; + + if ((iova < domain->geometry.aperture_start) || + iova > (domain->geometry.aperture_end)) + return 0; + + return get_phys_addr(dma_domain, iova); +} + +static int fsl_pamu_domain_has_cap(struct iommu_domain *domain, + unsigned long cap) +{ + return cap == IOMMU_CAP_CACHE_COHERENCY; +} + +static void fsl_pamu_domain_destroy(struct iommu_domain *domain) +{ + struct fsl_dma_domain *dma_domain = domain->priv; + + domain->priv = NULL; + + /* remove all the devices from the device list */ + detach_device(NULL, dma_domain); + + dma_domain->enabled = 0; + dma_domain->mapped = 0; + + kmem_cache_free(fsl_pamu_domain_cache, dma_domain); +} + +static int fsl_pamu_domain_init(struct iommu_domain *domain) +{ + struct fsl_dma_domain *dma_domain; + + dma_domain = iommu_alloc_dma_domain(); + if (!dma_domain) { + pr_debug("dma_domain allocation failed\n"); + return -ENOMEM; + } + domain->priv = dma_domain; + dma_domain->iommu_domain = domain; + /* defaul geometry 64 GB i.e. maximum system address */ + domain->geometry.aperture_start = 0; + domain->geometry.aperture_end = (1ULL << 36) - 1; + domain->geometry.force_aperture = true; + + return 0; +} + +/* Configure geometry settings for all LIODNs associated with domain */ +static int pamu_set_domain_geometry(struct fsl_dma_domain *dma_domain, + struct iommu_domain_geometry *geom_attr, + u32 win_cnt) +{ + struct device_domain_info *info; + int ret = 0; + + list_for_each_entry(info, &dma_domain->devices, link) { + ret = pamu_set_liodn(info->liodn, info->dev, dma_domain, + geom_attr, win_cnt); + if (ret) + break; + } + + return ret; +} + +/* Update stash destination for all LIODNs associated with the domain */ +static int update_domain_stash(struct fsl_dma_domain *dma_domain, u32 val) +{ + struct device_domain_info *info; + int ret = 0; + + list_for_each_entry(info, &dma_domain->devices, link) { + ret = update_liodn_stash(info->liodn, dma_domain, val); + if (ret) + break; + } + + return ret; +} + +/* Update domain mappings for all LIODNs associated with the domain */ +static int update_domain_mapping(struct fsl_dma_domain *dma_domain, u32 wnd_nr) +{ + struct device_domain_info *info; + int ret = 0; + + list_for_each_entry(info, &dma_domain->devices, link) { + ret = update_liodn(info->liodn, dma_domain, wnd_nr); + if (ret) + break; + } + return ret; +} + +static int disable_domain_win(struct fsl_dma_domain *dma_domain, u32 wnd_nr) +{ + struct device_domain_info *info; + int ret = 0; + + list_for_each_entry(info, &dma_domain->devices, link) { + if (dma_domain->win_cnt == 1 && dma_domain->enabled) { + ret = pamu_disable_liodn(info->liodn); + if (!ret) + dma_domain->enabled = 0; + } else { + ret = pamu_disable_spaace(info->liodn, wnd_nr); + } + } + + return ret; +} + +static void fsl_pamu_window_disable(struct iommu_domain *domain, u32 wnd_nr) +{ + struct fsl_dma_domain *dma_domain = domain->priv; + unsigned long flags; + int ret; + + spin_lock_irqsave(&dma_domain->domain_lock, flags); + if (!dma_domain->win_arr) { + pr_debug("Number of windows not configured\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return; + } + + if (wnd_nr >= dma_domain->win_cnt) { + pr_debug("Invalid window index\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return; + } + + if (dma_domain->win_arr[wnd_nr].valid) { + ret = disable_domain_win(dma_domain, wnd_nr); + if (!ret) { + dma_domain->win_arr[wnd_nr].valid = 0; + dma_domain->mapped--; + } + } + + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + +} + +static int fsl_pamu_window_enable(struct iommu_domain *domain, u32 wnd_nr, + phys_addr_t paddr, u64 size, int prot) +{ + struct fsl_dma_domain *dma_domain = domain->priv; + struct dma_window *wnd; + int pamu_prot = 0; + int ret; + unsigned long flags; + u64 win_size; + + if (prot & IOMMU_READ) + pamu_prot |= PAACE_AP_PERMS_QUERY; + if (prot & IOMMU_WRITE) + pamu_prot |= PAACE_AP_PERMS_UPDATE; + + spin_lock_irqsave(&dma_domain->domain_lock, flags); + if (!dma_domain->win_arr) { + pr_debug("Number of windows not configured\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -ENODEV; + } + + if (wnd_nr >= dma_domain->win_cnt) { + pr_debug("Invalid window index\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EINVAL; + } + + win_size = dma_domain->geom_size >> ilog2(dma_domain->win_cnt); + if (size > win_size) { + pr_debug("Invalid window size \n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EINVAL; + } + + if (dma_domain->win_cnt == 1) { + if (dma_domain->enabled) { + pr_debug("Disable the window before updating the mapping\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EBUSY; + } + + ret = check_size(size, domain->geometry.aperture_start); + if (ret) { + pr_debug("Aperture start not aligned to the size\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EINVAL; + } + } + + wnd = &dma_domain->win_arr[wnd_nr]; + if (!wnd->valid) { + wnd->paddr = paddr; + wnd->size = size; + wnd->prot = pamu_prot; + + ret = update_domain_mapping(dma_domain, wnd_nr); + if (!ret) { + wnd->valid = 1; + dma_domain->mapped++; + } + } else { + pr_debug("Disable the window before updating the mapping\n"); + ret = -EBUSY; + } + + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + + return ret; +} + +/* + * Attach the LIODN to the DMA domain and configure the geometry + * and window mappings. + */ +static int handle_attach_device(struct fsl_dma_domain *dma_domain, + struct device *dev, const u32 *liodn, + int num) +{ + unsigned long flags; + struct iommu_domain *domain = dma_domain->iommu_domain; + int ret = 0; + int i; + + spin_lock_irqsave(&dma_domain->domain_lock, flags); + for (i = 0; i < num; i++) { + + /* Ensure that LIODN value is valid */ + if (liodn[i] >= PAACE_NUMBER_ENTRIES) { + pr_debug("Invalid liodn %d, attach device failed for %s\n", + liodn[i], dev->of_node->full_name); + ret = -EINVAL; + break; + } + + attach_device(dma_domain, liodn[i], dev); + /* + * Check if geometry has already been configured + * for the domain. If yes, set the geometry for + * the LIODN. + */ + if (dma_domain->win_arr) { + u32 win_cnt = dma_domain->win_cnt > 1 ? dma_domain->win_cnt : 0; + ret = pamu_set_liodn(liodn[i], dev, dma_domain, + &domain->geometry, + win_cnt); + if (ret) + break; + if (dma_domain->mapped) { + /* + * Create window/subwindow mapping for + * the LIODN. + */ + ret = map_liodn(liodn[i], dma_domain); + if (ret) + break; + } + } + } + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + + return ret; +} + +static int fsl_pamu_attach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct fsl_dma_domain *dma_domain = domain->priv; + const u32 *liodn; + u32 liodn_cnt; + int len, ret = 0; + struct pci_dev *pdev = NULL; + struct pci_controller *pci_ctl; + + /* + * Use LIODN of the PCI controller while attaching a + * PCI device. + */ + if (dev->bus == &pci_bus_type) { + pdev = to_pci_dev(dev); + pci_ctl = pci_bus_to_host(pdev->bus); + /* + * make dev point to pci controller device + * so we can get the LIODN programmed by + * u-boot. + */ + dev = pci_ctl->parent; + } + + liodn = of_get_property(dev->of_node, "fsl,liodn", &len); + if (liodn) { + liodn_cnt = len / sizeof(u32); + ret = handle_attach_device(dma_domain, dev, + liodn, liodn_cnt); + } else { + pr_debug("missing fsl,liodn property at %s\n", + dev->of_node->full_name); + ret = -EINVAL; + } + + return ret; +} + +static void fsl_pamu_detach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct fsl_dma_domain *dma_domain = domain->priv; + const u32 *prop; + int len; + struct pci_dev *pdev = NULL; + struct pci_controller *pci_ctl; + + /* + * Use LIODN of the PCI controller while detaching a + * PCI device. + */ + if (dev->bus == &pci_bus_type) { + pdev = to_pci_dev(dev); + pci_ctl = pci_bus_to_host(pdev->bus); + /* + * make dev point to pci controller device + * so we can get the LIODN programmed by + * u-boot. + */ + dev = pci_ctl->parent; + } + + prop = of_get_property(dev->of_node, "fsl,liodn", &len); + if (prop) + detach_device(dev, dma_domain); + else + pr_debug("missing fsl,liodn property at %s\n", + dev->of_node->full_name); +} + +static int configure_domain_geometry(struct iommu_domain *domain, void *data) +{ + struct iommu_domain_geometry *geom_attr = data; + struct fsl_dma_domain *dma_domain = domain->priv; + dma_addr_t geom_size; + unsigned long flags; + + geom_size = geom_attr->aperture_end - geom_attr->aperture_start + 1; + /* + * Sanity check the geometry size. Also, we do not support + * DMA outside of the geometry. + */ + if (check_size(geom_size, geom_attr->aperture_start) || + !geom_attr->force_aperture) { + pr_debug("Invalid PAMU geometry attributes\n"); + return -EINVAL; + } + + spin_lock_irqsave(&dma_domain->domain_lock, flags); + if (dma_domain->enabled) { + pr_debug("Can't set geometry attributes as domain is active\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EBUSY; + } + + /* Copy the domain geometry information */ + memcpy(&domain->geometry, geom_attr, + sizeof(struct iommu_domain_geometry)); + dma_domain->geom_size = geom_size; + + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + + return 0; +} + +/* Set the domain stash attribute */ +static int configure_domain_stash(struct fsl_dma_domain *dma_domain, void *data) +{ + struct pamu_stash_attribute *stash_attr = data; + unsigned long flags; + int ret; + + spin_lock_irqsave(&dma_domain->domain_lock, flags); + + memcpy(&dma_domain->dma_stash, stash_attr, + sizeof(struct pamu_stash_attribute)); + + dma_domain->stash_id = get_stash_id(stash_attr->cache, + stash_attr->cpu); + if (dma_domain->stash_id == ~(u32)0) { + pr_debug("Invalid stash attributes\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EINVAL; + } + + ret = update_domain_stash(dma_domain, dma_domain->stash_id); + + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + + return ret; +} + +/* Configure domain dma state i.e. enable/disable DMA*/ +static int configure_domain_dma_state(struct fsl_dma_domain *dma_domain, bool enable) +{ + struct device_domain_info *info; + unsigned long flags; + int ret; + + spin_lock_irqsave(&dma_domain->domain_lock, flags); + + if (enable && !dma_domain->mapped) { + pr_debug("Can't enable DMA domain without valid mapping\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -ENODEV; + } + + dma_domain->enabled = enable; + list_for_each_entry(info, &dma_domain->devices, + link) { + ret = (enable) ? pamu_enable_liodn(info->liodn) : + pamu_disable_liodn(info->liodn); + if (ret) + pr_debug("Unable to set dma state for liodn %d", + info->liodn); + } + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + + return 0; +} + +static int fsl_pamu_set_domain_attr(struct iommu_domain *domain, + enum iommu_attr attr_type, void *data) +{ + struct fsl_dma_domain *dma_domain = domain->priv; + int ret = 0; + + + switch (attr_type) { + case DOMAIN_ATTR_GEOMETRY: + ret = configure_domain_geometry(domain, data); + break; + case DOMAIN_ATTR_FSL_PAMU_STASH: + ret = configure_domain_stash(dma_domain, data); + break; + case DOMAIN_ATTR_FSL_PAMU_ENABLE: + ret = configure_domain_dma_state(dma_domain, *(int *)data); + break; + default: + pr_debug("Unsupported attribute type\n"); + ret = -EINVAL; + break; + }; + + return ret; +} + +static int fsl_pamu_get_domain_attr(struct iommu_domain *domain, + enum iommu_attr attr_type, void *data) +{ + struct fsl_dma_domain *dma_domain = domain->priv; + int ret = 0; + + + switch (attr_type) { + case DOMAIN_ATTR_FSL_PAMU_STASH: + memcpy((struct pamu_stash_attribute *) data, &dma_domain->dma_stash, + sizeof(struct pamu_stash_attribute)); + break; + case DOMAIN_ATTR_FSL_PAMU_ENABLE: + *(int *)data = dma_domain->enabled; + break; + case DOMAIN_ATTR_FSL_PAMUV1: + *(int *)data = DOMAIN_ATTR_FSL_PAMUV1; + break; + default: + pr_debug("Unsupported attribute type\n"); + ret = -EINVAL; + break; + }; + + return ret; +} + +#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) + +static struct iommu_group *get_device_iommu_group(struct device *dev) +{ + struct iommu_group *group; + + group = iommu_group_get(dev); + if (!group) + group = iommu_group_alloc(); + + return group; +} + +static bool check_pci_ctl_endpt_part(struct pci_controller *pci_ctl) +{ + u32 version; + + /* Check the PCI controller version number by readding BRR1 register */ + version = in_be32(pci_ctl->cfg_addr + (PCI_FSL_BRR1 >> 2)); + version &= PCI_FSL_BRR1_VER; + /* If PCI controller version is >= 0x204 we can partition endpoints*/ + if (version >= 0x204) + return 1; + + return 0; +} + +/* Get iommu group information from peer devices or devices on the parent bus */ +static struct iommu_group *get_shared_pci_device_group(struct pci_dev *pdev) +{ + struct pci_dev *tmp; + struct iommu_group *group; + struct pci_bus *bus = pdev->bus; + + /* + * Traverese the pci bus device list to get + * the shared iommu group. + */ + while (bus) { + list_for_each_entry(tmp, &bus->devices, bus_list) { + if (tmp == pdev) + continue; + group = iommu_group_get(&tmp->dev); + if (group) + return group; + } + + bus = bus->parent; + } + + return NULL; +} + +static struct iommu_group *get_pci_device_group(struct pci_dev *pdev) +{ + struct pci_controller *pci_ctl; + bool pci_endpt_partioning; + struct iommu_group *group = NULL; + struct pci_dev *bridge, *dma_pdev = NULL; + + pci_ctl = pci_bus_to_host(pdev->bus); + pci_endpt_partioning = check_pci_ctl_endpt_part(pci_ctl); + /* We can partition PCIe devices so assign device group to the device */ + if (pci_endpt_partioning) { + bridge = pci_find_upstream_pcie_bridge(pdev); + if (bridge) { + if (pci_is_pcie(bridge)) + dma_pdev = pci_get_domain_bus_and_slot( + pci_domain_nr(pdev->bus), + bridge->subordinate->number, 0); + if (!dma_pdev) + dma_pdev = pci_dev_get(bridge); + } else + dma_pdev = pci_dev_get(pdev); + + /* Account for quirked devices */ + swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev)); + + /* + * If it's a multifunction device that does not support our + * required ACS flags, add to the same group as lowest numbered + * function that also does not suport the required ACS flags. + */ + if (dma_pdev->multifunction && + !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) { + u8 i, slot = PCI_SLOT(dma_pdev->devfn); + + for (i = 0; i < 8; i++) { + struct pci_dev *tmp; + + tmp = pci_get_slot(dma_pdev->bus, PCI_DEVFN(slot, i)); + if (!tmp) + continue; + + if (!pci_acs_enabled(tmp, REQ_ACS_FLAGS)) { + swap_pci_ref(&dma_pdev, tmp); + break; + } + pci_dev_put(tmp); + } + } + + /* + * Devices on the root bus go through the iommu. If that's not us, + * find the next upstream device and test ACS up to the root bus. + * Finding the next device may require skipping virtual buses. + */ + while (!pci_is_root_bus(dma_pdev->bus)) { + struct pci_bus *bus = dma_pdev->bus; + + while (!bus->self) { + if (!pci_is_root_bus(bus)) + bus = bus->parent; + else + goto root_bus; + } + + if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) + break; + + swap_pci_ref(&dma_pdev, pci_dev_get(bus->self)); + } + +root_bus: + group = get_device_iommu_group(&dma_pdev->dev); + pci_dev_put(dma_pdev); + /* + * PCIe controller is not a paritionable entity + * free the controller device iommu_group. + */ + if (pci_ctl->parent->iommu_group) + iommu_group_remove_device(pci_ctl->parent); + } else { + /* + * All devices connected to the controller will share the + * PCI controllers device group. If this is the first + * device to be probed for the pci controller, copy the + * device group information from the PCI controller device + * node and remove the PCI controller iommu group. + * For subsequent devices, the iommu group information can + * be obtained from sibling devices (i.e. from the bus_devices + * link list). + */ + if (pci_ctl->parent->iommu_group) { + group = get_device_iommu_group(pci_ctl->parent); + iommu_group_remove_device(pci_ctl->parent); + } else + group = get_shared_pci_device_group(pdev); + } + + return group; +} + +static int fsl_pamu_add_device(struct device *dev) +{ + struct iommu_group *group = NULL; + struct pci_dev *pdev; + const u32 *prop; + int ret, len; + + /* + * For platform devices we allocate a separate group for + * each of the devices. + */ + if (dev->bus == &pci_bus_type) { + pdev = to_pci_dev(dev); + /* Don't create device groups for virtual PCI bridges */ + if (pdev->subordinate) + return 0; + + group = get_pci_device_group(pdev); + + } else { + prop = of_get_property(dev->of_node, "fsl,liodn", &len); + if (prop) + group = get_device_iommu_group(dev); + } + + if (!group || IS_ERR(group)) + return PTR_ERR(group); + + ret = iommu_group_add_device(group, dev); + + iommu_group_put(group); + return ret; +} + +static void fsl_pamu_remove_device(struct device *dev) +{ + iommu_group_remove_device(dev); +} + +static int fsl_pamu_set_windows(struct iommu_domain *domain, u32 w_count) +{ + struct fsl_dma_domain *dma_domain = domain->priv; + unsigned long flags; + int ret; + + spin_lock_irqsave(&dma_domain->domain_lock, flags); + /* Ensure domain is inactive i.e. DMA should be disabled for the domain */ + if (dma_domain->enabled) { + pr_debug("Can't set geometry attributes as domain is active\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EBUSY; + } + + /* Ensure that the geometry has been set for the domain */ + if (!dma_domain->geom_size) { + pr_debug("Please configure geometry before setting the number of windows\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EINVAL; + } + + /* + * Ensure we have valid window count i.e. it should be less than + * maximum permissible limit and should be a power of two. + */ + if (w_count > pamu_get_max_subwin_cnt() || !is_power_of_2(w_count)) { + pr_debug("Invalid window count\n"); + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -EINVAL; + } + + ret = pamu_set_domain_geometry(dma_domain, &domain->geometry, + ((w_count > 1) ? w_count : 0)); + if (!ret) { + if (dma_domain->win_arr) + kfree(dma_domain->win_arr); + dma_domain->win_arr = kzalloc(sizeof(struct dma_window) * + w_count, GFP_ATOMIC); + if (!dma_domain->win_arr) { + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + return -ENOMEM; + } + dma_domain->win_cnt = w_count; + } + spin_unlock_irqrestore(&dma_domain->domain_lock, flags); + + return ret; +} + +static u32 fsl_pamu_get_windows(struct iommu_domain *domain) +{ + struct fsl_dma_domain *dma_domain = domain->priv; + + return dma_domain->win_cnt; +} + +static struct iommu_ops fsl_pamu_ops = { + .domain_init = fsl_pamu_domain_init, + .domain_destroy = fsl_pamu_domain_destroy, + .attach_dev = fsl_pamu_attach_device, + .detach_dev = fsl_pamu_detach_device, + .domain_window_enable = fsl_pamu_window_enable, + .domain_window_disable = fsl_pamu_window_disable, + .domain_get_windows = fsl_pamu_get_windows, + .domain_set_windows = fsl_pamu_set_windows, + .iova_to_phys = fsl_pamu_iova_to_phys, + .domain_has_cap = fsl_pamu_domain_has_cap, + .domain_set_attr = fsl_pamu_set_domain_attr, + .domain_get_attr = fsl_pamu_get_domain_attr, + .add_device = fsl_pamu_add_device, + .remove_device = fsl_pamu_remove_device, +}; + +int pamu_domain_init() +{ + int ret = 0; + + ret = iommu_init_mempool(); + if (ret) + return ret; + + bus_set_iommu(&platform_bus_type, &fsl_pamu_ops); + bus_set_iommu(&pci_bus_type, &fsl_pamu_ops); + + return ret; +} diff --git a/drivers/iommu/fsl_pamu_domain.h b/drivers/iommu/fsl_pamu_domain.h new file mode 100644 index 000000000000..c90293f99709 --- /dev/null +++ b/drivers/iommu/fsl_pamu_domain.h @@ -0,0 +1,85 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) 2013 Freescale Semiconductor, Inc. + * + */ + +#ifndef __FSL_PAMU_DOMAIN_H +#define __FSL_PAMU_DOMAIN_H + +#include "fsl_pamu.h" + +struct dma_window { + phys_addr_t paddr; + u64 size; + int valid; + int prot; +}; + +struct fsl_dma_domain { + /* + * Indicates the geometry size for the domain. + * This would be set when the geometry is + * configured for the domain. + */ + dma_addr_t geom_size; + /* + * Number of windows assocaited with this domain. + * During domain initialization, it is set to the + * the maximum number of subwindows allowed for a LIODN. + * Minimum value for this is 1 indicating a single PAMU + * window, without any sub windows. Value can be set/ + * queried by set_attr/get_attr API for DOMAIN_ATTR_WINDOWS. + * Value can only be set once the geometry has been configured. + */ + u32 win_cnt; + /* + * win_arr contains information of the configured + * windows for a domain. This is allocated only + * when the number of windows for the domain are + * set. + */ + struct dma_window *win_arr; + /* list of devices associated with the domain */ + struct list_head devices; + /* dma_domain states: + * mapped - A particular mapping has been created + * within the configured geometry. + * enabled - DMA has been enabled for the given + * domain. This translates to setting of the + * valid bit for the primary PAACE in the PAMU + * PAACT table. Domain geometry should be set and + * it must have a valid mapping before DMA can be + * enabled for it. + * + */ + int mapped; + int enabled; + /* stash_id obtained from the stash attribute details */ + u32 stash_id; + struct pamu_stash_attribute dma_stash; + u32 snoop_id; + struct iommu_domain *iommu_domain; + spinlock_t domain_lock; +}; + +/* domain-device relationship */ +struct device_domain_info { + struct list_head link; /* link to domain siblings */ + struct device *dev; + u32 liodn; + struct fsl_dma_domain *domain; /* pointer to domain */ +}; +#endif /* __FSL_PAMU_DOMAIN_H */ diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index eec0d3e04bf5..15e9b57e9cf0 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -890,56 +890,54 @@ static int dma_pte_clear_range(struct dmar_domain *domain, return order; } +static void dma_pte_free_level(struct dmar_domain *domain, int level, + struct dma_pte *pte, unsigned long pfn, + unsigned long start_pfn, unsigned long last_pfn) +{ + pfn = max(start_pfn, pfn); + pte = &pte[pfn_level_offset(pfn, level)]; + + do { + unsigned long level_pfn; + struct dma_pte *level_pte; + + if (!dma_pte_present(pte) || dma_pte_superpage(pte)) + goto next; + + level_pfn = pfn & level_mask(level - 1); + level_pte = phys_to_virt(dma_pte_addr(pte)); + + if (level > 2) + dma_pte_free_level(domain, level - 1, level_pte, + level_pfn, start_pfn, last_pfn); + + /* If range covers entire pagetable, free it */ + if (!(start_pfn > level_pfn || + last_pfn < level_pfn + level_size(level))) { + dma_clear_pte(pte); + domain_flush_cache(domain, pte, sizeof(*pte)); + free_pgtable_page(level_pte); + } +next: + pfn += level_size(level); + } while (!first_pte_in_page(++pte) && pfn <= last_pfn); +} + /* free page table pages. last level pte should already be cleared */ static void dma_pte_free_pagetable(struct dmar_domain *domain, unsigned long start_pfn, unsigned long last_pfn) { int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; - struct dma_pte *first_pte, *pte; - int total = agaw_to_level(domain->agaw); - int level; - unsigned long tmp; - int large_page = 2; BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); BUG_ON(start_pfn > last_pfn); /* We don't need lock here; nobody else touches the iova range */ - level = 2; - while (level <= total) { - tmp = align_to_level(start_pfn, level); - - /* If we can't even clear one PTE at this level, we're done */ - if (tmp + level_size(level) - 1 > last_pfn) - return; - - do { - large_page = level; - first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page); - if (large_page > level) - level = large_page + 1; - if (!pte) { - tmp = align_to_level(tmp + 1, level + 1); - continue; - } - do { - if (dma_pte_present(pte)) { - free_pgtable_page(phys_to_virt(dma_pte_addr(pte))); - dma_clear_pte(pte); - } - pte++; - tmp += level_size(level); - } while (!first_pte_in_page(pte) && - tmp + level_size(level) - 1 <= last_pfn); + dma_pte_free_level(domain, agaw_to_level(domain->agaw), + domain->pgd, 0, start_pfn, last_pfn); - domain_flush_cache(domain, first_pte, - (void *)pte - (void *)first_pte); - - } while (tmp && tmp + level_size(level) - 1 <= last_pfn); - level++; - } /* free pgd */ if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { free_pgtable_page(domain->pgd); diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index 074bcb3892b5..875bbe4c962e 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -194,11 +194,11 @@ config LEDS_LP3944 module will be called leds-lp3944. config LEDS_LP55XX_COMMON - tristate "Common Driver for TI/National LP5521, LP5523/55231 and LP5562" - depends on LEDS_LP5521 || LEDS_LP5523 || LEDS_LP5562 + tristate "Common Driver for TI/National LP5521/5523/55231/5562/8501" + depends on LEDS_LP5521 || LEDS_LP5523 || LEDS_LP5562 || LEDS_LP8501 select FW_LOADER help - This option supports common operations for LP5521 and LP5523/55231 + This option supports common operations for LP5521/5523/55231/5562/8501 devices. config LEDS_LP5521 @@ -232,6 +232,18 @@ config LEDS_LP5562 Driver provides direct control via LED class and interface for programming the engines. +config LEDS_LP8501 + tristate "LED Support for TI LP8501 LED driver chip" + depends on LEDS_CLASS && I2C + select LEDS_LP55XX_COMMON + help + If you say yes here you get support for TI LP8501 LED driver. + It is 9 channel chip with programmable engines. + Driver provides direct control via LED class and interface for + programming the engines. + It is similar as LP5523, but output power selection is available. + And register layout and engine program schemes are different. + config LEDS_LP8788 tristate "LED support for the TI LP8788 PMIC" depends on LEDS_CLASS @@ -279,13 +291,14 @@ config LEDS_PCA955X LED driver chips accessed via the I2C bus. Supported devices include PCA9550, PCA9551, PCA9552, and PCA9553. -config LEDS_PCA9633 - tristate "LED support for PCA9633 I2C chip" +config LEDS_PCA963X + tristate "LED support for PCA963x I2C chip" depends on LEDS_CLASS depends on I2C help - This option enables support for LEDs connected to the PCA9633 - LED driver chip accessed via the I2C bus. + This option enables support for LEDs connected to the PCA963x + LED driver chip accessed via the I2C bus. Supported + devices include PCA9633 and PCA9634 config LEDS_WM831X_STATUS tristate "LED support for status LEDs on WM831x PMICs" @@ -398,10 +411,7 @@ config LEDS_MC13783 config LEDS_NS2 tristate "LED support for Network Space v2 GPIO LEDs" depends on LEDS_CLASS - depends on MACH_NETSPACE_V2 || MACH_INETSPACE_V2 || \ - MACH_NETSPACE_MAX_V2 || MACH_D2NET_V2 || \ - MACH_NETSPACE_V2_DT || MACH_INETSPACE_V2_DT || \ - MACH_NETSPACE_MAX_V2_DT || MACH_NETSPACE_MINI_V2_DT + depends on ARCH_KIRKWOOD default y help This option enable support for the dual-GPIO LED found on the @@ -410,8 +420,8 @@ config LEDS_NS2 config LEDS_NETXBIG tristate "LED support for Big Network series LEDs" - depends on MACH_NET2BIG_V2 || MACH_NET5BIG_V2 depends on LEDS_CLASS + depends on ARCH_KIRKWOOD default y help This option enable support for LEDs found on the LaCie 2Big diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile index ae4b6135f665..8979b0b2c85e 100644 --- a/drivers/leds/Makefile +++ b/drivers/leds/Makefile @@ -27,6 +27,7 @@ obj-$(CONFIG_LEDS_LP55XX_COMMON) += leds-lp55xx-common.o obj-$(CONFIG_LEDS_LP5521) += leds-lp5521.o obj-$(CONFIG_LEDS_LP5523) += leds-lp5523.o obj-$(CONFIG_LEDS_LP5562) += leds-lp5562.o +obj-$(CONFIG_LEDS_LP8501) += leds-lp8501.o obj-$(CONFIG_LEDS_LP8788) += leds-lp8788.o obj-$(CONFIG_LEDS_TCA6507) += leds-tca6507.o obj-$(CONFIG_LEDS_CLEVO_MAIL) += leds-clevo-mail.o @@ -34,7 +35,7 @@ obj-$(CONFIG_LEDS_HP6XX) += leds-hp6xx.o obj-$(CONFIG_LEDS_OT200) += leds-ot200.o obj-$(CONFIG_LEDS_FSG) += leds-fsg.o obj-$(CONFIG_LEDS_PCA955X) += leds-pca955x.o -obj-$(CONFIG_LEDS_PCA9633) += leds-pca9633.o +obj-$(CONFIG_LEDS_PCA963X) += leds-pca963x.o obj-$(CONFIG_LEDS_DA903X) += leds-da903x.o obj-$(CONFIG_LEDS_DA9052) += leds-da9052.o obj-$(CONFIG_LEDS_WM831X_STATUS) += leds-wm831x-status.o diff --git a/drivers/leds/leds-88pm860x.c b/drivers/leds/leds-88pm860x.c index 232b3ce902e5..5f588c0a376e 100644 --- a/drivers/leds/leds-88pm860x.c +++ b/drivers/leds/leds-88pm860x.c @@ -157,7 +157,7 @@ static int pm860x_led_dt_init(struct platform_device *pdev, static int pm860x_led_probe(struct platform_device *pdev) { struct pm860x_chip *chip = dev_get_drvdata(pdev->dev.parent); - struct pm860x_led_pdata *pdata = pdev->dev.platform_data; + struct pm860x_led_pdata *pdata = dev_get_platdata(&pdev->dev); struct pm860x_led *data; struct resource *res; int ret = 0; diff --git a/drivers/leds/leds-adp5520.c b/drivers/leds/leds-adp5520.c index e8072abe76e5..7e311a120b11 100644 --- a/drivers/leds/leds-adp5520.c +++ b/drivers/leds/leds-adp5520.c @@ -87,7 +87,7 @@ static int adp5520_led_setup(struct adp5520_led *led) static int adp5520_led_prepare(struct platform_device *pdev) { - struct adp5520_leds_platform_data *pdata = pdev->dev.platform_data; + struct adp5520_leds_platform_data *pdata = dev_get_platdata(&pdev->dev); struct device *dev = pdev->dev.parent; int ret = 0; @@ -103,7 +103,7 @@ static int adp5520_led_prepare(struct platform_device *pdev) static int adp5520_led_probe(struct platform_device *pdev) { - struct adp5520_leds_platform_data *pdata = pdev->dev.platform_data; + struct adp5520_leds_platform_data *pdata = dev_get_platdata(&pdev->dev); struct adp5520_led *led, *led_dat; struct led_info *cur_led; int ret, i; @@ -185,7 +185,7 @@ err: static int adp5520_led_remove(struct platform_device *pdev) { - struct adp5520_leds_platform_data *pdata = pdev->dev.platform_data; + struct adp5520_leds_platform_data *pdata = dev_get_platdata(&pdev->dev); struct adp5520_led *led; int i; diff --git a/drivers/leds/leds-asic3.c b/drivers/leds/leds-asic3.c index cf9efe421c2b..6de216a89a0c 100644 --- a/drivers/leds/leds-asic3.c +++ b/drivers/leds/leds-asic3.c @@ -94,7 +94,7 @@ static int blink_set(struct led_classdev *cdev, static int asic3_led_probe(struct platform_device *pdev) { - struct asic3_led *led = pdev->dev.platform_data; + struct asic3_led *led = dev_get_platdata(&pdev->dev); int ret; ret = mfd_cell_enable(pdev); @@ -127,7 +127,7 @@ out: static int asic3_led_remove(struct platform_device *pdev) { - struct asic3_led *led = pdev->dev.platform_data; + struct asic3_led *led = dev_get_platdata(&pdev->dev); led_classdev_unregister(led->cdev); diff --git a/drivers/leds/leds-atmel-pwm.c b/drivers/leds/leds-atmel-pwm.c index 90518f84b9c0..56cec8d6a2ac 100644 --- a/drivers/leds/leds-atmel-pwm.c +++ b/drivers/leds/leds-atmel-pwm.c @@ -42,7 +42,7 @@ static int pwmled_probe(struct platform_device *pdev) int i; int status; - pdata = pdev->dev.platform_data; + pdata = dev_get_platdata(&pdev->dev); if (!pdata || pdata->num_leds < 1) return -ENODEV; @@ -119,7 +119,7 @@ static int pwmled_remove(struct platform_device *pdev) struct pwmled *leds; unsigned i; - pdata = pdev->dev.platform_data; + pdata = dev_get_platdata(&pdev->dev); leds = platform_get_drvdata(pdev); for (i = 0; i < pdata->num_leds; i++) { diff --git a/drivers/leds/leds-bd2802.c b/drivers/leds/leds-bd2802.c index 2db04231a792..fb5a3472d614 100644 --- a/drivers/leds/leds-bd2802.c +++ b/drivers/leds/leds-bd2802.c @@ -684,7 +684,7 @@ static int bd2802_probe(struct i2c_client *client, } led->client = client; - pdata = led->pdata = client->dev.platform_data; + pdata = led->pdata = dev_get_platdata(&client->dev); i2c_set_clientdata(client, led); /* Configure RESET GPIO (L: RESET, H: RESET cancel) */ diff --git a/drivers/leds/leds-clevo-mail.c b/drivers/leds/leds-clevo-mail.c index 6a8405df76a3..d93e2455da5c 100644 --- a/drivers/leds/leds-clevo-mail.c +++ b/drivers/leds/leds-clevo-mail.c @@ -40,7 +40,7 @@ static int __init clevo_mail_led_dmi_callback(const struct dmi_system_id *id) * detected as working, but in reality it is not) as low as * possible. */ -static struct dmi_system_id __initdata clevo_mail_led_dmi_table[] = { +static struct dmi_system_id clevo_mail_led_dmi_table[] __initdata = { { .callback = clevo_mail_led_dmi_callback, .ident = "Clevo D410J", diff --git a/drivers/leds/leds-da903x.c b/drivers/leds/leds-da903x.c index c263a21db829..2a4b87f8091a 100644 --- a/drivers/leds/leds-da903x.c +++ b/drivers/leds/leds-da903x.c @@ -93,7 +93,7 @@ static void da903x_led_set(struct led_classdev *led_cdev, static int da903x_led_probe(struct platform_device *pdev) { - struct led_info *pdata = pdev->dev.platform_data; + struct led_info *pdata = dev_get_platdata(&pdev->dev); struct da903x_led *led; int id, ret; diff --git a/drivers/leds/leds-da9052.c b/drivers/leds/leds-da9052.c index efec43344e9f..865d4faf874a 100644 --- a/drivers/leds/leds-da9052.c +++ b/drivers/leds/leds-da9052.c @@ -112,7 +112,7 @@ static int da9052_led_probe(struct platform_device *pdev) int i; da9052 = dev_get_drvdata(pdev->dev.parent); - pdata = da9052->dev->platform_data; + pdata = dev_get_platdata(da9052->dev); if (pdata == NULL) { dev_err(&pdev->dev, "No platform data\n"); goto err; @@ -185,7 +185,7 @@ static int da9052_led_remove(struct platform_device *pdev) int i; da9052 = dev_get_drvdata(pdev->dev.parent); - pdata = da9052->dev->platform_data; + pdata = dev_get_platdata(da9052->dev); pled = pdata->pled; for (i = 0; i < pled->num_leds; i++) { diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c index 84d74c373cae..e8b01e57348d 100644 --- a/drivers/leds/leds-gpio.c +++ b/drivers/leds/leds-gpio.c @@ -233,7 +233,7 @@ static struct gpio_leds_priv *gpio_leds_create_of(struct platform_device *pdev) static int gpio_led_probe(struct platform_device *pdev) { - struct gpio_led_platform_data *pdata = pdev->dev.platform_data; + struct gpio_led_platform_data *pdata = dev_get_platdata(&pdev->dev); struct gpio_leds_priv *priv; int i, ret = 0; diff --git a/drivers/leds/leds-lm3530.c b/drivers/leds/leds-lm3530.c index a036a19040fe..652368c2ea9a 100644 --- a/drivers/leds/leds-lm3530.c +++ b/drivers/leds/leds-lm3530.c @@ -403,7 +403,7 @@ static DEVICE_ATTR(mode, 0644, lm3530_mode_get, lm3530_mode_set); static int lm3530_probe(struct i2c_client *client, const struct i2c_device_id *id) { - struct lm3530_platform_data *pdata = client->dev.platform_data; + struct lm3530_platform_data *pdata = dev_get_platdata(&client->dev); struct lm3530_data *drvdata; int err = 0; diff --git a/drivers/leds/leds-lm3533.c b/drivers/leds/leds-lm3533.c index bbf24d038a7f..027ede73b80d 100644 --- a/drivers/leds/leds-lm3533.c +++ b/drivers/leds/leds-lm3533.c @@ -671,7 +671,7 @@ static int lm3533_led_probe(struct platform_device *pdev) if (!lm3533) return -EINVAL; - pdata = pdev->dev.platform_data; + pdata = dev_get_platdata(&pdev->dev); if (!pdata) { dev_err(&pdev->dev, "no platform data\n"); return -EINVAL; diff --git a/drivers/leds/leds-lm355x.c b/drivers/leds/leds-lm355x.c index d81a8e7afd6c..591eb5e58ae3 100644 --- a/drivers/leds/leds-lm355x.c +++ b/drivers/leds/leds-lm355x.c @@ -423,7 +423,7 @@ static const struct regmap_config lm355x_regmap = { static int lm355x_probe(struct i2c_client *client, const struct i2c_device_id *id) { - struct lm355x_platform_data *pdata = client->dev.platform_data; + struct lm355x_platform_data *pdata = dev_get_platdata(&client->dev); struct lm355x_chip_data *chip; int err; diff --git a/drivers/leds/leds-lm3642.c b/drivers/leds/leds-lm3642.c index f361bbef2dec..ceb6b3cde6fe 100644 --- a/drivers/leds/leds-lm3642.c +++ b/drivers/leds/leds-lm3642.c @@ -316,7 +316,7 @@ static const struct regmap_config lm3642_regmap = { static int lm3642_probe(struct i2c_client *client, const struct i2c_device_id *id) { - struct lm3642_platform_data *pdata = client->dev.platform_data; + struct lm3642_platform_data *pdata = dev_get_platdata(&client->dev); struct lm3642_chip_data *chip; int err; diff --git a/drivers/leds/leds-lp3944.c b/drivers/leds/leds-lp3944.c index 0c4386e656c1..8e1abdcd4c9d 100644 --- a/drivers/leds/leds-lp3944.c +++ b/drivers/leds/leds-lp3944.c @@ -289,7 +289,7 @@ static void lp3944_led_set_brightness(struct led_classdev *led_cdev, dev_dbg(&led->client->dev, "%s: %s, %d\n", __func__, led_cdev->name, brightness); - led->status = brightness; + led->status = !!brightness; schedule_work(&led->work); } @@ -377,7 +377,8 @@ exit: static int lp3944_probe(struct i2c_client *client, const struct i2c_device_id *id) { - struct lp3944_platform_data *lp3944_pdata = client->dev.platform_data; + struct lp3944_platform_data *lp3944_pdata = + dev_get_platdata(&client->dev); struct lp3944_data *data; int err; @@ -413,7 +414,7 @@ static int lp3944_probe(struct i2c_client *client, static int lp3944_remove(struct i2c_client *client) { - struct lp3944_platform_data *pdata = client->dev.platform_data; + struct lp3944_platform_data *pdata = dev_get_platdata(&client->dev); struct lp3944_data *data = i2c_get_clientdata(client); int i; diff --git a/drivers/leds/leds-lp5521.c b/drivers/leds/leds-lp5521.c index 1392feb1bcf7..05188351711d 100644 --- a/drivers/leds/leds-lp5521.c +++ b/drivers/leds/leds-lp5521.c @@ -220,17 +220,11 @@ static int lp5521_update_program_memory(struct lp55xx_chip *chip, }; unsigned cmd; char c[3]; - int program_size; int nrchars; - int offset = 0; int ret; - int i; - - /* clear program memory before updating */ - for (i = 0; i < LP5521_PROGRAM_LENGTH; i++) - lp55xx_write(chip, addr[idx] + i, 0); + int offset = 0; + int i = 0; - i = 0; while ((offset < size - 1) && (i < LP5521_PROGRAM_LENGTH)) { /* separate sscanfs because length is working only for %s */ ret = sscanf(data + offset, "%2s%n ", c, &nrchars); @@ -250,11 +244,19 @@ static int lp5521_update_program_memory(struct lp55xx_chip *chip, if (i % 2) goto err; - program_size = i; - for (i = 0; i < program_size; i++) - lp55xx_write(chip, addr[idx] + i, pattern[i]); + mutex_lock(&chip->lock); - return 0; + for (i = 0; i < LP5521_PROGRAM_LENGTH; i++) { + ret = lp55xx_write(chip, addr[idx] + i, pattern[i]); + if (ret) { + mutex_unlock(&chip->lock); + return -EINVAL; + } + } + + mutex_unlock(&chip->lock); + + return size; err: dev_err(&chip->cl->dev, "wrong pattern format\n"); @@ -365,6 +367,80 @@ static void lp5521_led_brightness_work(struct work_struct *work) mutex_unlock(&chip->lock); } +static ssize_t show_engine_mode(struct device *dev, + struct device_attribute *attr, + char *buf, int nr) +{ + struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev)); + struct lp55xx_chip *chip = led->chip; + enum lp55xx_engine_mode mode = chip->engines[nr - 1].mode; + + switch (mode) { + case LP55XX_ENGINE_RUN: + return sprintf(buf, "run\n"); + case LP55XX_ENGINE_LOAD: + return sprintf(buf, "load\n"); + case LP55XX_ENGINE_DISABLED: + default: + return sprintf(buf, "disabled\n"); + } +} +show_mode(1) +show_mode(2) +show_mode(3) + +static ssize_t store_engine_mode(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len, int nr) +{ + struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev)); + struct lp55xx_chip *chip = led->chip; + struct lp55xx_engine *engine = &chip->engines[nr - 1]; + + mutex_lock(&chip->lock); + + chip->engine_idx = nr; + + if (!strncmp(buf, "run", 3)) { + lp5521_run_engine(chip, true); + engine->mode = LP55XX_ENGINE_RUN; + } else if (!strncmp(buf, "load", 4)) { + lp5521_stop_engine(chip); + lp5521_load_engine(chip); + engine->mode = LP55XX_ENGINE_LOAD; + } else if (!strncmp(buf, "disabled", 8)) { + lp5521_stop_engine(chip); + engine->mode = LP55XX_ENGINE_DISABLED; + } + + mutex_unlock(&chip->lock); + + return len; +} +store_mode(1) +store_mode(2) +store_mode(3) + +static ssize_t store_engine_load(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len, int nr) +{ + struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev)); + struct lp55xx_chip *chip = led->chip; + + mutex_lock(&chip->lock); + + chip->engine_idx = nr; + lp5521_load_engine(chip); + + mutex_unlock(&chip->lock); + + return lp5521_update_program_memory(chip, buf, len); +} +store_load(1) +store_load(2) +store_load(3) + static ssize_t lp5521_selftest(struct device *dev, struct device_attribute *attr, char *buf) @@ -381,9 +457,21 @@ static ssize_t lp5521_selftest(struct device *dev, } /* device attributes */ -static DEVICE_ATTR(selftest, S_IRUGO, lp5521_selftest, NULL); +static LP55XX_DEV_ATTR_RW(engine1_mode, show_engine1_mode, store_engine1_mode); +static LP55XX_DEV_ATTR_RW(engine2_mode, show_engine2_mode, store_engine2_mode); +static LP55XX_DEV_ATTR_RW(engine3_mode, show_engine3_mode, store_engine3_mode); +static LP55XX_DEV_ATTR_WO(engine1_load, store_engine1_load); +static LP55XX_DEV_ATTR_WO(engine2_load, store_engine2_load); +static LP55XX_DEV_ATTR_WO(engine3_load, store_engine3_load); +static LP55XX_DEV_ATTR_RO(selftest, lp5521_selftest); static struct attribute *lp5521_attributes[] = { + &dev_attr_engine1_mode.attr, + &dev_attr_engine2_mode.attr, + &dev_attr_engine3_mode.attr, + &dev_attr_engine1_load.attr, + &dev_attr_engine2_load.attr, + &dev_attr_engine3_load.attr, &dev_attr_selftest.attr, NULL }; @@ -420,7 +508,7 @@ static int lp5521_probe(struct i2c_client *client, struct lp55xx_platform_data *pdata; struct device_node *np = client->dev.of_node; - if (!client->dev.platform_data) { + if (!dev_get_platdata(&client->dev)) { if (np) { ret = lp55xx_of_populate_pdata(&client->dev, np); if (ret < 0) @@ -430,7 +518,7 @@ static int lp5521_probe(struct i2c_client *client, return -EINVAL; } } - pdata = client->dev.platform_data; + pdata = dev_get_platdata(&client->dev); chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL); if (!chip) diff --git a/drivers/leds/leds-lp5523.c b/drivers/leds/leds-lp5523.c index 3979428f3100..fe3bcbb5747f 100644 --- a/drivers/leds/leds-lp5523.c +++ b/drivers/leds/leds-lp5523.c @@ -49,6 +49,9 @@ #define LP5523_REG_RESET 0x3D #define LP5523_REG_LED_TEST_CTRL 0x41 #define LP5523_REG_LED_TEST_ADC 0x42 +#define LP5523_REG_CH1_PROG_START 0x4C +#define LP5523_REG_CH2_PROG_START 0x4D +#define LP5523_REG_CH3_PROG_START 0x4E #define LP5523_REG_PROG_PAGE_SEL 0x4F #define LP5523_REG_PROG_MEM 0x50 @@ -65,11 +68,15 @@ #define LP5523_RESET 0xFF #define LP5523_ADC_SHORTCIRC_LIM 80 #define LP5523_EXT_CLK_USED 0x08 +#define LP5523_ENG_STATUS_MASK 0x07 /* Memory Page Selection */ #define LP5523_PAGE_ENG1 0 #define LP5523_PAGE_ENG2 1 #define LP5523_PAGE_ENG3 2 +#define LP5523_PAGE_MUX1 3 +#define LP5523_PAGE_MUX2 4 +#define LP5523_PAGE_MUX3 5 /* Program Memory Operations */ #define LP5523_MODE_ENG1_M 0x30 /* Operation Mode Register */ @@ -94,11 +101,15 @@ #define LP5523_RUN_ENG2 0x08 #define LP5523_RUN_ENG3 0x02 +#define LED_ACTIVE(mux, led) (!!(mux & (0x0001 << led))) + enum lp5523_chip_id { LP5523, LP55231, }; +static int lp5523_init_program_engine(struct lp55xx_chip *chip); + static inline void lp5523_wait_opmode_done(void) { usleep_range(1000, 2000); @@ -134,7 +145,11 @@ static int lp5523_post_init_device(struct lp55xx_chip *chip) if (ret) return ret; - return lp55xx_write(chip, LP5523_REG_ENABLE_LEDS_LSB, 0xff); + ret = lp55xx_write(chip, LP5523_REG_ENABLE_LEDS_LSB, 0xff); + if (ret) + return ret; + + return lp5523_init_program_engine(chip); } static void lp5523_load_engine(struct lp55xx_chip *chip) @@ -152,15 +167,21 @@ static void lp5523_load_engine(struct lp55xx_chip *chip) [LP55XX_ENGINE_3] = LP5523_LOAD_ENG3, }; + lp55xx_update_bits(chip, LP5523_REG_OP_MODE, mask[idx], val[idx]); + + lp5523_wait_opmode_done(); +} + +static void lp5523_load_engine_and_select_page(struct lp55xx_chip *chip) +{ + enum lp55xx_engine_index idx = chip->engine_idx; u8 page_sel[] = { [LP55XX_ENGINE_1] = LP5523_PAGE_ENG1, [LP55XX_ENGINE_2] = LP5523_PAGE_ENG2, [LP55XX_ENGINE_3] = LP5523_PAGE_ENG3, }; - lp55xx_update_bits(chip, LP5523_REG_OP_MODE, mask[idx], val[idx]); - - lp5523_wait_opmode_done(); + lp5523_load_engine(chip); lp55xx_write(chip, LP5523_REG_PROG_PAGE_SEL, page_sel[idx]); } @@ -227,23 +248,75 @@ static void lp5523_run_engine(struct lp55xx_chip *chip, bool start) lp55xx_update_bits(chip, LP5523_REG_ENABLE, LP5523_EXEC_M, exec); } +static int lp5523_init_program_engine(struct lp55xx_chip *chip) +{ + int i; + int j; + int ret; + u8 status; + /* one pattern per engine setting LED MUX start and stop addresses */ + static const u8 pattern[][LP5523_PROGRAM_LENGTH] = { + { 0x9c, 0x30, 0x9c, 0xb0, 0x9d, 0x80, 0xd8, 0x00, 0}, + { 0x9c, 0x40, 0x9c, 0xc0, 0x9d, 0x80, 0xd8, 0x00, 0}, + { 0x9c, 0x50, 0x9c, 0xd0, 0x9d, 0x80, 0xd8, 0x00, 0}, + }; + + /* hardcode 32 bytes of memory for each engine from program memory */ + ret = lp55xx_write(chip, LP5523_REG_CH1_PROG_START, 0x00); + if (ret) + return ret; + + ret = lp55xx_write(chip, LP5523_REG_CH2_PROG_START, 0x10); + if (ret) + return ret; + + ret = lp55xx_write(chip, LP5523_REG_CH3_PROG_START, 0x20); + if (ret) + return ret; + + /* write LED MUX address space for each engine */ + for (i = LP55XX_ENGINE_1; i <= LP55XX_ENGINE_3; i++) { + chip->engine_idx = i; + lp5523_load_engine_and_select_page(chip); + + for (j = 0; j < LP5523_PROGRAM_LENGTH; j++) { + ret = lp55xx_write(chip, LP5523_REG_PROG_MEM + j, + pattern[i - 1][j]); + if (ret) + goto out; + } + } + + lp5523_run_engine(chip, true); + + /* Let the programs run for couple of ms and check the engine status */ + usleep_range(3000, 6000); + lp55xx_read(chip, LP5523_REG_STATUS, &status); + status &= LP5523_ENG_STATUS_MASK; + + if (status != LP5523_ENG_STATUS_MASK) { + dev_err(&chip->cl->dev, + "cound not configure LED engine, status = 0x%.2x\n", + status); + ret = -1; + } + +out: + lp5523_stop_engine(chip); + return ret; +} + static int lp5523_update_program_memory(struct lp55xx_chip *chip, const u8 *data, size_t size) { u8 pattern[LP5523_PROGRAM_LENGTH] = {0}; unsigned cmd; char c[3]; - int update_size; int nrchars; - int offset = 0; int ret; - int i; - - /* clear program memory before updating */ - for (i = 0; i < LP5523_PROGRAM_LENGTH; i++) - lp55xx_write(chip, LP5523_REG_PROG_MEM + i, 0); + int offset = 0; + int i = 0; - i = 0; while ((offset < size - 1) && (i < LP5523_PROGRAM_LENGTH)) { /* separate sscanfs because length is working only for %s */ ret = sscanf(data + offset, "%2s%n ", c, &nrchars); @@ -263,11 +336,19 @@ static int lp5523_update_program_memory(struct lp55xx_chip *chip, if (i % 2) goto err; - update_size = i; - for (i = 0; i < update_size; i++) - lp55xx_write(chip, LP5523_REG_PROG_MEM + i, pattern[i]); + mutex_lock(&chip->lock); - return 0; + for (i = 0; i < LP5523_PROGRAM_LENGTH; i++) { + ret = lp55xx_write(chip, LP5523_REG_PROG_MEM + i, pattern[i]); + if (ret) { + mutex_unlock(&chip->lock); + return -EINVAL; + } + } + + mutex_unlock(&chip->lock); + + return size; err: dev_err(&chip->cl->dev, "wrong pattern format\n"); @@ -290,10 +371,196 @@ static void lp5523_firmware_loaded(struct lp55xx_chip *chip) * 2) write firmware data into program memory */ - lp5523_load_engine(chip); + lp5523_load_engine_and_select_page(chip); lp5523_update_program_memory(chip, fw->data, fw->size); } +static ssize_t show_engine_mode(struct device *dev, + struct device_attribute *attr, + char *buf, int nr) +{ + struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev)); + struct lp55xx_chip *chip = led->chip; + enum lp55xx_engine_mode mode = chip->engines[nr - 1].mode; + + switch (mode) { + case LP55XX_ENGINE_RUN: + return sprintf(buf, "run\n"); + case LP55XX_ENGINE_LOAD: + return sprintf(buf, "load\n"); + case LP55XX_ENGINE_DISABLED: + default: + return sprintf(buf, "disabled\n"); + } +} +show_mode(1) +show_mode(2) +show_mode(3) + +static ssize_t store_engine_mode(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len, int nr) +{ + struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev)); + struct lp55xx_chip *chip = led->chip; + struct lp55xx_engine *engine = &chip->engines[nr - 1]; + + mutex_lock(&chip->lock); + + chip->engine_idx = nr; + + if (!strncmp(buf, "run", 3)) { + lp5523_run_engine(chip, true); + engine->mode = LP55XX_ENGINE_RUN; + } else if (!strncmp(buf, "load", 4)) { + lp5523_stop_engine(chip); + lp5523_load_engine(chip); + engine->mode = LP55XX_ENGINE_LOAD; + } else if (!strncmp(buf, "disabled", 8)) { + lp5523_stop_engine(chip); + engine->mode = LP55XX_ENGINE_DISABLED; + } + + mutex_unlock(&chip->lock); + + return len; +} +store_mode(1) +store_mode(2) +store_mode(3) + +static int lp5523_mux_parse(const char *buf, u16 *mux, size_t len) +{ + u16 tmp_mux = 0; + int i; + + len = min_t(int, len, LP5523_MAX_LEDS); + + for (i = 0; i < len; i++) { + switch (buf[i]) { + case '1': + tmp_mux |= (1 << i); + break; + case '0': + break; + case '\n': + i = len; + break; + default: + return -1; + } + } + *mux = tmp_mux; + + return 0; +} + +static void lp5523_mux_to_array(u16 led_mux, char *array) +{ + int i, pos = 0; + for (i = 0; i < LP5523_MAX_LEDS; i++) + pos += sprintf(array + pos, "%x", LED_ACTIVE(led_mux, i)); + + array[pos] = '\0'; +} + +static ssize_t show_engine_leds(struct device *dev, + struct device_attribute *attr, + char *buf, int nr) +{ + struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev)); + struct lp55xx_chip *chip = led->chip; + char mux[LP5523_MAX_LEDS + 1]; + + lp5523_mux_to_array(chip->engines[nr - 1].led_mux, mux); + + return sprintf(buf, "%s\n", mux); +} +show_leds(1) +show_leds(2) +show_leds(3) + +static int lp5523_load_mux(struct lp55xx_chip *chip, u16 mux, int nr) +{ + struct lp55xx_engine *engine = &chip->engines[nr - 1]; + int ret; + u8 mux_page[] = { + [LP55XX_ENGINE_1] = LP5523_PAGE_MUX1, + [LP55XX_ENGINE_2] = LP5523_PAGE_MUX2, + [LP55XX_ENGINE_3] = LP5523_PAGE_MUX3, + }; + + lp5523_load_engine(chip); + + ret = lp55xx_write(chip, LP5523_REG_PROG_PAGE_SEL, mux_page[nr]); + if (ret) + return ret; + + ret = lp55xx_write(chip, LP5523_REG_PROG_MEM , (u8)(mux >> 8)); + if (ret) + return ret; + + ret = lp55xx_write(chip, LP5523_REG_PROG_MEM + 1, (u8)(mux)); + if (ret) + return ret; + + engine->led_mux = mux; + return 0; +} + +static ssize_t store_engine_leds(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len, int nr) +{ + struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev)); + struct lp55xx_chip *chip = led->chip; + struct lp55xx_engine *engine = &chip->engines[nr - 1]; + u16 mux = 0; + ssize_t ret; + + if (lp5523_mux_parse(buf, &mux, len)) + return -EINVAL; + + mutex_lock(&chip->lock); + + chip->engine_idx = nr; + ret = -EINVAL; + + if (engine->mode != LP55XX_ENGINE_LOAD) + goto leave; + + if (lp5523_load_mux(chip, mux, nr)) + goto leave; + + ret = len; +leave: + mutex_unlock(&chip->lock); + return ret; +} +store_leds(1) +store_leds(2) +store_leds(3) + +static ssize_t store_engine_load(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len, int nr) +{ + struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev)); + struct lp55xx_chip *chip = led->chip; + + mutex_lock(&chip->lock); + + chip->engine_idx = nr; + lp5523_load_engine_and_select_page(chip); + + mutex_unlock(&chip->lock); + + return lp5523_update_program_memory(chip, buf, len); +} +store_load(1) +store_load(2) +store_load(3) + static ssize_t lp5523_selftest(struct device *dev, struct device_attribute *attr, char *buf) @@ -393,9 +660,27 @@ static void lp5523_led_brightness_work(struct work_struct *work) mutex_unlock(&chip->lock); } -static DEVICE_ATTR(selftest, S_IRUGO, lp5523_selftest, NULL); +static LP55XX_DEV_ATTR_RW(engine1_mode, show_engine1_mode, store_engine1_mode); +static LP55XX_DEV_ATTR_RW(engine2_mode, show_engine2_mode, store_engine2_mode); +static LP55XX_DEV_ATTR_RW(engine3_mode, show_engine3_mode, store_engine3_mode); +static LP55XX_DEV_ATTR_RW(engine1_leds, show_engine1_leds, store_engine1_leds); +static LP55XX_DEV_ATTR_RW(engine2_leds, show_engine2_leds, store_engine2_leds); +static LP55XX_DEV_ATTR_RW(engine3_leds, show_engine3_leds, store_engine3_leds); +static LP55XX_DEV_ATTR_WO(engine1_load, store_engine1_load); +static LP55XX_DEV_ATTR_WO(engine2_load, store_engine2_load); +static LP55XX_DEV_ATTR_WO(engine3_load, store_engine3_load); +static LP55XX_DEV_ATTR_RO(selftest, lp5523_selftest); static struct attribute *lp5523_attributes[] = { + &dev_attr_engine1_mode.attr, + &dev_attr_engine2_mode.attr, + &dev_attr_engine3_mode.attr, + &dev_attr_engine1_load.attr, + &dev_attr_engine2_load.attr, + &dev_attr_engine3_load.attr, + &dev_attr_engine1_leds.attr, + &dev_attr_engine2_leds.attr, + &dev_attr_engine3_leds.attr, &dev_attr_selftest.attr, NULL, }; @@ -432,7 +717,7 @@ static int lp5523_probe(struct i2c_client *client, struct lp55xx_platform_data *pdata; struct device_node *np = client->dev.of_node; - if (!client->dev.platform_data) { + if (!dev_get_platdata(&client->dev)) { if (np) { ret = lp55xx_of_populate_pdata(&client->dev, np); if (ret < 0) @@ -442,7 +727,7 @@ static int lp5523_probe(struct i2c_client *client, return -EINVAL; } } - pdata = client->dev.platform_data; + pdata = dev_get_platdata(&client->dev); chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL); if (!chip) diff --git a/drivers/leds/leds-lp5562.c b/drivers/leds/leds-lp5562.c index cbd856dac150..2585cfd57711 100644 --- a/drivers/leds/leds-lp5562.c +++ b/drivers/leds/leds-lp5562.c @@ -477,8 +477,8 @@ static ssize_t lp5562_store_engine_mux(struct device *dev, return len; } -static DEVICE_ATTR(led_pattern, S_IWUSR, NULL, lp5562_store_pattern); -static DEVICE_ATTR(engine_mux, S_IWUSR, NULL, lp5562_store_engine_mux); +static LP55XX_DEV_ATTR_WO(led_pattern, lp5562_store_pattern); +static LP55XX_DEV_ATTR_WO(engine_mux, lp5562_store_engine_mux); static struct attribute *lp5562_attributes[] = { &dev_attr_led_pattern.attr, @@ -518,7 +518,7 @@ static int lp5562_probe(struct i2c_client *client, struct lp55xx_platform_data *pdata; struct device_node *np = client->dev.of_node; - if (!client->dev.platform_data) { + if (!dev_get_platdata(&client->dev)) { if (np) { ret = lp55xx_of_populate_pdata(&client->dev, np); if (ret < 0) @@ -528,7 +528,7 @@ static int lp5562_probe(struct i2c_client *client, return -EINVAL; } } - pdata = client->dev.platform_data; + pdata = dev_get_platdata(&client->dev); chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL); if (!chip) diff --git a/drivers/leds/leds-lp55xx-common.c b/drivers/leds/leds-lp55xx-common.c index c2fecd4d391c..351825b96f16 100644 --- a/drivers/leds/leds-lp55xx-common.c +++ b/drivers/leds/leds-lp55xx-common.c @@ -593,6 +593,9 @@ int lp55xx_of_populate_pdata(struct device *dev, struct device_node *np) of_property_read_string(np, "label", &pdata->label); of_property_read_u8(np, "clock-mode", &pdata->clock_mode); + /* LP8501 specific */ + of_property_read_u8(np, "pwr-sel", (u8 *)&pdata->pwr_sel); + dev->platform_data = pdata; return 0; diff --git a/drivers/leds/leds-lp55xx-common.h b/drivers/leds/leds-lp55xx-common.h index dbbf86df0f1f..cceab483edd0 100644 --- a/drivers/leds/leds-lp55xx-common.h +++ b/drivers/leds/leds-lp55xx-common.h @@ -20,8 +20,62 @@ enum lp55xx_engine_index { LP55XX_ENGINE_1, LP55XX_ENGINE_2, LP55XX_ENGINE_3, + LP55XX_ENGINE_MAX = LP55XX_ENGINE_3, }; +enum lp55xx_engine_mode { + LP55XX_ENGINE_DISABLED, + LP55XX_ENGINE_LOAD, + LP55XX_ENGINE_RUN, +}; + +#define LP55XX_DEV_ATTR_RW(name, show, store) \ + DEVICE_ATTR(name, S_IRUGO | S_IWUSR, show, store) +#define LP55XX_DEV_ATTR_RO(name, show) \ + DEVICE_ATTR(name, S_IRUGO, show, NULL) +#define LP55XX_DEV_ATTR_WO(name, store) \ + DEVICE_ATTR(name, S_IWUSR, NULL, store) + +#define show_mode(nr) \ +static ssize_t show_engine##nr##_mode(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + return show_engine_mode(dev, attr, buf, nr); \ +} + +#define store_mode(nr) \ +static ssize_t store_engine##nr##_mode(struct device *dev, \ + struct device_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + return store_engine_mode(dev, attr, buf, len, nr); \ +} + +#define show_leds(nr) \ +static ssize_t show_engine##nr##_leds(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + return show_engine_leds(dev, attr, buf, nr); \ +} + +#define store_leds(nr) \ +static ssize_t store_engine##nr##_leds(struct device *dev, \ + struct device_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + return store_engine_leds(dev, attr, buf, len, nr); \ +} + +#define store_load(nr) \ +static ssize_t store_engine##nr##_load(struct device *dev, \ + struct device_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + return store_engine_load(dev, attr, buf, len, nr); \ +} + struct lp55xx_led; struct lp55xx_chip; @@ -72,6 +126,16 @@ struct lp55xx_device_config { }; /* + * struct lp55xx_engine + * @mode : Engine mode + * @led_mux : Mux bits for LED selection. Only used in LP5523 + */ +struct lp55xx_engine { + enum lp55xx_engine_mode mode; + u16 led_mux; +}; + +/* * struct lp55xx_chip * @cl : I2C communication for access registers * @pdata : Platform specific data @@ -79,6 +143,7 @@ struct lp55xx_device_config { * @num_leds : Number of registered LEDs * @cfg : Device specific configuration data * @engine_idx : Selected engine number + * @engines : Engine structure for the device attribute R/W interface * @fw : Firmware data for running a LED pattern */ struct lp55xx_chip { @@ -89,6 +154,7 @@ struct lp55xx_chip { int num_leds; struct lp55xx_device_config *cfg; enum lp55xx_engine_index engine_idx; + struct lp55xx_engine engines[LP55XX_ENGINE_MAX]; const struct firmware *fw; }; diff --git a/drivers/leds/leds-lp8501.c b/drivers/leds/leds-lp8501.c new file mode 100644 index 000000000000..8d55a780ca46 --- /dev/null +++ b/drivers/leds/leds-lp8501.c @@ -0,0 +1,410 @@ +/* + * TI LP8501 9 channel LED Driver + * + * Copyright (C) 2013 Texas Instruments + * + * Author: Milo(Woogyom) Kim <milo.kim@ti.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + */ + +#include <linux/delay.h> +#include <linux/firmware.h> +#include <linux/i2c.h> +#include <linux/init.h> +#include <linux/leds.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/platform_data/leds-lp55xx.h> +#include <linux/slab.h> + +#include "leds-lp55xx-common.h" + +#define LP8501_PROGRAM_LENGTH 32 +#define LP8501_MAX_LEDS 9 + +/* Registers */ +#define LP8501_REG_ENABLE 0x00 +#define LP8501_ENABLE BIT(6) +#define LP8501_EXEC_M 0x3F +#define LP8501_EXEC_ENG1_M 0x30 +#define LP8501_EXEC_ENG2_M 0x0C +#define LP8501_EXEC_ENG3_M 0x03 +#define LP8501_RUN_ENG1 0x20 +#define LP8501_RUN_ENG2 0x08 +#define LP8501_RUN_ENG3 0x02 + +#define LP8501_REG_OP_MODE 0x01 +#define LP8501_MODE_ENG1_M 0x30 +#define LP8501_MODE_ENG2_M 0x0C +#define LP8501_MODE_ENG3_M 0x03 +#define LP8501_LOAD_ENG1 0x10 +#define LP8501_LOAD_ENG2 0x04 +#define LP8501_LOAD_ENG3 0x01 + +#define LP8501_REG_PWR_CONFIG 0x05 +#define LP8501_PWR_CONFIG_M 0x03 + +#define LP8501_REG_LED_PWM_BASE 0x16 + +#define LP8501_REG_LED_CURRENT_BASE 0x26 + +#define LP8501_REG_CONFIG 0x36 +#define LP8501_PWM_PSAVE BIT(7) +#define LP8501_AUTO_INC BIT(6) +#define LP8501_PWR_SAVE BIT(5) +#define LP8501_CP_AUTO 0x18 +#define LP8501_INT_CLK BIT(0) +#define LP8501_DEFAULT_CFG \ + (LP8501_PWM_PSAVE | LP8501_AUTO_INC | LP8501_PWR_SAVE | LP8501_CP_AUTO) + +#define LP8501_REG_RESET 0x3D +#define LP8501_RESET 0xFF + +#define LP8501_REG_PROG_PAGE_SEL 0x4F +#define LP8501_PAGE_ENG1 0 +#define LP8501_PAGE_ENG2 1 +#define LP8501_PAGE_ENG3 2 + +#define LP8501_REG_PROG_MEM 0x50 + +#define LP8501_ENG1_IS_LOADING(mode) \ + ((mode & LP8501_MODE_ENG1_M) == LP8501_LOAD_ENG1) +#define LP8501_ENG2_IS_LOADING(mode) \ + ((mode & LP8501_MODE_ENG2_M) == LP8501_LOAD_ENG2) +#define LP8501_ENG3_IS_LOADING(mode) \ + ((mode & LP8501_MODE_ENG3_M) == LP8501_LOAD_ENG3) + +static inline void lp8501_wait_opmode_done(void) +{ + usleep_range(1000, 2000); +} + +static void lp8501_set_led_current(struct lp55xx_led *led, u8 led_current) +{ + led->led_current = led_current; + lp55xx_write(led->chip, LP8501_REG_LED_CURRENT_BASE + led->chan_nr, + led_current); +} + +static int lp8501_post_init_device(struct lp55xx_chip *chip) +{ + int ret; + u8 val = LP8501_DEFAULT_CFG; + + ret = lp55xx_write(chip, LP8501_REG_ENABLE, LP8501_ENABLE); + if (ret) + return ret; + + /* Chip startup time is 500 us, 1 - 2 ms gives some margin */ + usleep_range(1000, 2000); + + if (chip->pdata->clock_mode != LP55XX_CLOCK_EXT) + val |= LP8501_INT_CLK; + + ret = lp55xx_write(chip, LP8501_REG_CONFIG, val); + if (ret) + return ret; + + /* Power selection for each output */ + return lp55xx_update_bits(chip, LP8501_REG_PWR_CONFIG, + LP8501_PWR_CONFIG_M, chip->pdata->pwr_sel); +} + +static void lp8501_load_engine(struct lp55xx_chip *chip) +{ + enum lp55xx_engine_index idx = chip->engine_idx; + u8 mask[] = { + [LP55XX_ENGINE_1] = LP8501_MODE_ENG1_M, + [LP55XX_ENGINE_2] = LP8501_MODE_ENG2_M, + [LP55XX_ENGINE_3] = LP8501_MODE_ENG3_M, + }; + + u8 val[] = { + [LP55XX_ENGINE_1] = LP8501_LOAD_ENG1, + [LP55XX_ENGINE_2] = LP8501_LOAD_ENG2, + [LP55XX_ENGINE_3] = LP8501_LOAD_ENG3, + }; + + u8 page_sel[] = { + [LP55XX_ENGINE_1] = LP8501_PAGE_ENG1, + [LP55XX_ENGINE_2] = LP8501_PAGE_ENG2, + [LP55XX_ENGINE_3] = LP8501_PAGE_ENG3, + }; + + lp55xx_update_bits(chip, LP8501_REG_OP_MODE, mask[idx], val[idx]); + + lp8501_wait_opmode_done(); + + lp55xx_write(chip, LP8501_REG_PROG_PAGE_SEL, page_sel[idx]); +} + +static void lp8501_stop_engine(struct lp55xx_chip *chip) +{ + lp55xx_write(chip, LP8501_REG_OP_MODE, 0); + lp8501_wait_opmode_done(); +} + +static void lp8501_turn_off_channels(struct lp55xx_chip *chip) +{ + int i; + + for (i = 0; i < LP8501_MAX_LEDS; i++) + lp55xx_write(chip, LP8501_REG_LED_PWM_BASE + i, 0); +} + +static void lp8501_run_engine(struct lp55xx_chip *chip, bool start) +{ + int ret; + u8 mode; + u8 exec; + + /* stop engine */ + if (!start) { + lp8501_stop_engine(chip); + lp8501_turn_off_channels(chip); + return; + } + + /* + * To run the engine, + * operation mode and enable register should updated at the same time + */ + + ret = lp55xx_read(chip, LP8501_REG_OP_MODE, &mode); + if (ret) + return; + + ret = lp55xx_read(chip, LP8501_REG_ENABLE, &exec); + if (ret) + return; + + /* change operation mode to RUN only when each engine is loading */ + if (LP8501_ENG1_IS_LOADING(mode)) { + mode = (mode & ~LP8501_MODE_ENG1_M) | LP8501_RUN_ENG1; + exec = (exec & ~LP8501_EXEC_ENG1_M) | LP8501_RUN_ENG1; + } + + if (LP8501_ENG2_IS_LOADING(mode)) { + mode = (mode & ~LP8501_MODE_ENG2_M) | LP8501_RUN_ENG2; + exec = (exec & ~LP8501_EXEC_ENG2_M) | LP8501_RUN_ENG2; + } + + if (LP8501_ENG3_IS_LOADING(mode)) { + mode = (mode & ~LP8501_MODE_ENG3_M) | LP8501_RUN_ENG3; + exec = (exec & ~LP8501_EXEC_ENG3_M) | LP8501_RUN_ENG3; + } + + lp55xx_write(chip, LP8501_REG_OP_MODE, mode); + lp8501_wait_opmode_done(); + + lp55xx_update_bits(chip, LP8501_REG_ENABLE, LP8501_EXEC_M, exec); +} + +static int lp8501_update_program_memory(struct lp55xx_chip *chip, + const u8 *data, size_t size) +{ + u8 pattern[LP8501_PROGRAM_LENGTH] = {0}; + unsigned cmd; + char c[3]; + int update_size; + int nrchars; + int offset = 0; + int ret; + int i; + + /* clear program memory before updating */ + for (i = 0; i < LP8501_PROGRAM_LENGTH; i++) + lp55xx_write(chip, LP8501_REG_PROG_MEM + i, 0); + + i = 0; + while ((offset < size - 1) && (i < LP8501_PROGRAM_LENGTH)) { + /* separate sscanfs because length is working only for %s */ + ret = sscanf(data + offset, "%2s%n ", c, &nrchars); + if (ret != 1) + goto err; + + ret = sscanf(c, "%2x", &cmd); + if (ret != 1) + goto err; + + pattern[i] = (u8)cmd; + offset += nrchars; + i++; + } + + /* Each instruction is 16bit long. Check that length is even */ + if (i % 2) + goto err; + + update_size = i; + for (i = 0; i < update_size; i++) + lp55xx_write(chip, LP8501_REG_PROG_MEM + i, pattern[i]); + + return 0; + +err: + dev_err(&chip->cl->dev, "wrong pattern format\n"); + return -EINVAL; +} + +static void lp8501_firmware_loaded(struct lp55xx_chip *chip) +{ + const struct firmware *fw = chip->fw; + + if (fw->size > LP8501_PROGRAM_LENGTH) { + dev_err(&chip->cl->dev, "firmware data size overflow: %zu\n", + fw->size); + return; + } + + /* + * Program momery sequence + * 1) set engine mode to "LOAD" + * 2) write firmware data into program memory + */ + + lp8501_load_engine(chip); + lp8501_update_program_memory(chip, fw->data, fw->size); +} + +static void lp8501_led_brightness_work(struct work_struct *work) +{ + struct lp55xx_led *led = container_of(work, struct lp55xx_led, + brightness_work); + struct lp55xx_chip *chip = led->chip; + + mutex_lock(&chip->lock); + lp55xx_write(chip, LP8501_REG_LED_PWM_BASE + led->chan_nr, + led->brightness); + mutex_unlock(&chip->lock); +} + +/* Chip specific configurations */ +static struct lp55xx_device_config lp8501_cfg = { + .reset = { + .addr = LP8501_REG_RESET, + .val = LP8501_RESET, + }, + .enable = { + .addr = LP8501_REG_ENABLE, + .val = LP8501_ENABLE, + }, + .max_channel = LP8501_MAX_LEDS, + .post_init_device = lp8501_post_init_device, + .brightness_work_fn = lp8501_led_brightness_work, + .set_led_current = lp8501_set_led_current, + .firmware_cb = lp8501_firmware_loaded, + .run_engine = lp8501_run_engine, +}; + +static int lp8501_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + int ret; + struct lp55xx_chip *chip; + struct lp55xx_led *led; + struct lp55xx_platform_data *pdata; + struct device_node *np = client->dev.of_node; + + if (!dev_get_platdata(&client->dev)) { + if (np) { + ret = lp55xx_of_populate_pdata(&client->dev, np); + if (ret < 0) + return ret; + } else { + dev_err(&client->dev, "no platform data\n"); + return -EINVAL; + } + } + pdata = dev_get_platdata(&client->dev); + + chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL); + if (!chip) + return -ENOMEM; + + led = devm_kzalloc(&client->dev, + sizeof(*led) * pdata->num_channels, GFP_KERNEL); + if (!led) + return -ENOMEM; + + chip->cl = client; + chip->pdata = pdata; + chip->cfg = &lp8501_cfg; + + mutex_init(&chip->lock); + + i2c_set_clientdata(client, led); + + ret = lp55xx_init_device(chip); + if (ret) + goto err_init; + + dev_info(&client->dev, "%s Programmable led chip found\n", id->name); + + ret = lp55xx_register_leds(led, chip); + if (ret) + goto err_register_leds; + + ret = lp55xx_register_sysfs(chip); + if (ret) { + dev_err(&client->dev, "registering sysfs failed\n"); + goto err_register_sysfs; + } + + return 0; + +err_register_sysfs: + lp55xx_unregister_leds(led, chip); +err_register_leds: + lp55xx_deinit_device(chip); +err_init: + return ret; +} + +static int lp8501_remove(struct i2c_client *client) +{ + struct lp55xx_led *led = i2c_get_clientdata(client); + struct lp55xx_chip *chip = led->chip; + + lp8501_stop_engine(chip); + lp55xx_unregister_sysfs(chip); + lp55xx_unregister_leds(led, chip); + lp55xx_deinit_device(chip); + + return 0; +} + +static const struct i2c_device_id lp8501_id[] = { + { "lp8501", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, lp8501_id); + +#ifdef CONFIG_OF +static const struct of_device_id of_lp8501_leds_match[] = { + { .compatible = "ti,lp8501", }, + {}, +}; + +MODULE_DEVICE_TABLE(of, of_lp8501_leds_match); +#endif + +static struct i2c_driver lp8501_driver = { + .driver = { + .name = "lp8501", + .of_match_table = of_match_ptr(of_lp8501_leds_match), + }, + .probe = lp8501_probe, + .remove = lp8501_remove, + .id_table = lp8501_id, +}; + +module_i2c_driver(lp8501_driver); + +MODULE_DESCRIPTION("Texas Instruments LP8501 LED drvier"); +MODULE_AUTHOR("Milo Kim"); +MODULE_LICENSE("GPL"); diff --git a/drivers/leds/leds-lt3593.c b/drivers/leds/leds-lt3593.c index ca48a7d5502d..3417e5be7b57 100644 --- a/drivers/leds/leds-lt3593.c +++ b/drivers/leds/leds-lt3593.c @@ -135,7 +135,7 @@ static void delete_lt3593_led(struct lt3593_led_data *led) static int lt3593_led_probe(struct platform_device *pdev) { - struct gpio_led_platform_data *pdata = pdev->dev.platform_data; + struct gpio_led_platform_data *pdata = dev_get_platdata(&pdev->dev); struct lt3593_led_data *leds_data; int i, ret = 0; @@ -169,7 +169,7 @@ err: static int lt3593_led_remove(struct platform_device *pdev) { int i; - struct gpio_led_platform_data *pdata = pdev->dev.platform_data; + struct gpio_led_platform_data *pdata = dev_get_platdata(&pdev->dev); struct lt3593_led_data *leds_data; leds_data = platform_get_drvdata(pdev); diff --git a/drivers/leds/leds-netxbig.c b/drivers/leds/leds-netxbig.c index c61c5ebcc08e..2f9f141084ba 100644 --- a/drivers/leds/leds-netxbig.c +++ b/drivers/leds/leds-netxbig.c @@ -306,7 +306,7 @@ create_netxbig_led(struct platform_device *pdev, struct netxbig_led_data *led_dat, const struct netxbig_led *template) { - struct netxbig_led_platform_data *pdata = pdev->dev.platform_data; + struct netxbig_led_platform_data *pdata = dev_get_platdata(&pdev->dev); int ret; spin_lock_init(&led_dat->lock); @@ -354,7 +354,7 @@ create_netxbig_led(struct platform_device *pdev, static int netxbig_led_probe(struct platform_device *pdev) { - struct netxbig_led_platform_data *pdata = pdev->dev.platform_data; + struct netxbig_led_platform_data *pdata = dev_get_platdata(&pdev->dev); struct netxbig_led_data *leds_data; int i; int ret; @@ -391,7 +391,7 @@ err_free_leds: static int netxbig_led_remove(struct platform_device *pdev) { - struct netxbig_led_platform_data *pdata = pdev->dev.platform_data; + struct netxbig_led_platform_data *pdata = dev_get_platdata(&pdev->dev); struct netxbig_led_data *leds_data; int i; diff --git a/drivers/leds/leds-ns2.c b/drivers/leds/leds-ns2.c index e7df9875c400..141f13438e80 100644 --- a/drivers/leds/leds-ns2.c +++ b/drivers/leds/leds-ns2.c @@ -321,7 +321,7 @@ static inline int sizeof_ns2_led_priv(int num_leds) static int ns2_led_probe(struct platform_device *pdev) { - struct ns2_led_platform_data *pdata = pdev->dev.platform_data; + struct ns2_led_platform_data *pdata = dev_get_platdata(&pdev->dev); struct ns2_led_priv *priv; int i; int ret; diff --git a/drivers/leds/leds-pca9532.c b/drivers/leds/leds-pca9532.c index 0c597bdd23f9..4a0e786b7832 100644 --- a/drivers/leds/leds-pca9532.c +++ b/drivers/leds/leds-pca9532.c @@ -446,7 +446,8 @@ static int pca9532_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct pca9532_data *data = i2c_get_clientdata(client); - struct pca9532_platform_data *pca9532_pdata = client->dev.platform_data; + struct pca9532_platform_data *pca9532_pdata = + dev_get_platdata(&client->dev); if (!pca9532_pdata) return -EIO; diff --git a/drivers/leds/leds-pca955x.c b/drivers/leds/leds-pca955x.c index edf485b773c8..c3a08b60535b 100644 --- a/drivers/leds/leds-pca955x.c +++ b/drivers/leds/leds-pca955x.c @@ -267,7 +267,7 @@ static int pca955x_probe(struct i2c_client *client, chip = &pca955x_chipdefs[id->driver_data]; adapter = to_i2c_adapter(client->dev.parent); - pdata = client->dev.platform_data; + pdata = dev_get_platdata(&client->dev); /* Make sure the slave address / chip type combo given is possible */ if ((client->addr & ~((1 << chip->slv_addr_shift) - 1)) != diff --git a/drivers/leds/leds-pca9633.c b/drivers/leds/leds-pca9633.c deleted file mode 100644 index 9aae5679ffb2..000000000000 --- a/drivers/leds/leds-pca9633.c +++ /dev/null @@ -1,194 +0,0 @@ -/* - * Copyright 2011 bct electronic GmbH - * - * Author: Peter Meerwald <p.meerwald@bct-electronic.com> - * - * Based on leds-pca955x.c - * - * This file is subject to the terms and conditions of version 2 of - * the GNU General Public License. See the file COPYING in the main - * directory of this archive for more details. - * - * LED driver for the PCA9633 I2C LED driver (7-bit slave address 0x62) - * - */ - -#include <linux/module.h> -#include <linux/delay.h> -#include <linux/string.h> -#include <linux/ctype.h> -#include <linux/leds.h> -#include <linux/err.h> -#include <linux/i2c.h> -#include <linux/workqueue.h> -#include <linux/slab.h> -#include <linux/platform_data/leds-pca9633.h> - -/* LED select registers determine the source that drives LED outputs */ -#define PCA9633_LED_OFF 0x0 /* LED driver off */ -#define PCA9633_LED_ON 0x1 /* LED driver on */ -#define PCA9633_LED_PWM 0x2 /* Controlled through PWM */ -#define PCA9633_LED_GRP_PWM 0x3 /* Controlled through PWM/GRPPWM */ - -#define PCA9633_MODE1 0x00 -#define PCA9633_MODE2 0x01 -#define PCA9633_PWM_BASE 0x02 -#define PCA9633_LEDOUT 0x08 - -static const struct i2c_device_id pca9633_id[] = { - { "pca9633", 0 }, - { } -}; -MODULE_DEVICE_TABLE(i2c, pca9633_id); - -struct pca9633_led { - struct i2c_client *client; - struct work_struct work; - enum led_brightness brightness; - struct led_classdev led_cdev; - int led_num; /* 0 .. 3 potentially */ - char name[32]; -}; - -static void pca9633_led_work(struct work_struct *work) -{ - struct pca9633_led *pca9633 = container_of(work, - struct pca9633_led, work); - u8 ledout = i2c_smbus_read_byte_data(pca9633->client, PCA9633_LEDOUT); - int shift = 2 * pca9633->led_num; - u8 mask = 0x3 << shift; - - switch (pca9633->brightness) { - case LED_FULL: - i2c_smbus_write_byte_data(pca9633->client, PCA9633_LEDOUT, - (ledout & ~mask) | (PCA9633_LED_ON << shift)); - break; - case LED_OFF: - i2c_smbus_write_byte_data(pca9633->client, PCA9633_LEDOUT, - ledout & ~mask); - break; - default: - i2c_smbus_write_byte_data(pca9633->client, - PCA9633_PWM_BASE + pca9633->led_num, - pca9633->brightness); - i2c_smbus_write_byte_data(pca9633->client, PCA9633_LEDOUT, - (ledout & ~mask) | (PCA9633_LED_PWM << shift)); - break; - } -} - -static void pca9633_led_set(struct led_classdev *led_cdev, - enum led_brightness value) -{ - struct pca9633_led *pca9633; - - pca9633 = container_of(led_cdev, struct pca9633_led, led_cdev); - - pca9633->brightness = value; - - /* - * Must use workqueue for the actual I/O since I2C operations - * can sleep. - */ - schedule_work(&pca9633->work); -} - -static int pca9633_probe(struct i2c_client *client, - const struct i2c_device_id *id) -{ - struct pca9633_led *pca9633; - struct pca9633_platform_data *pdata; - int i, err; - - pdata = client->dev.platform_data; - - if (pdata) { - if (pdata->leds.num_leds <= 0 || pdata->leds.num_leds > 4) { - dev_err(&client->dev, "board info must claim at most 4 LEDs"); - return -EINVAL; - } - } - - pca9633 = devm_kzalloc(&client->dev, 4 * sizeof(*pca9633), GFP_KERNEL); - if (!pca9633) - return -ENOMEM; - - i2c_set_clientdata(client, pca9633); - - for (i = 0; i < 4; i++) { - pca9633[i].client = client; - pca9633[i].led_num = i; - - /* Platform data can specify LED names and default triggers */ - if (pdata && i < pdata->leds.num_leds) { - if (pdata->leds.leds[i].name) - snprintf(pca9633[i].name, - sizeof(pca9633[i].name), "pca9633:%s", - pdata->leds.leds[i].name); - if (pdata->leds.leds[i].default_trigger) - pca9633[i].led_cdev.default_trigger = - pdata->leds.leds[i].default_trigger; - } else { - snprintf(pca9633[i].name, sizeof(pca9633[i].name), - "pca9633:%d", i); - } - - pca9633[i].led_cdev.name = pca9633[i].name; - pca9633[i].led_cdev.brightness_set = pca9633_led_set; - - INIT_WORK(&pca9633[i].work, pca9633_led_work); - - err = led_classdev_register(&client->dev, &pca9633[i].led_cdev); - if (err < 0) - goto exit; - } - - /* Disable LED all-call address and set normal mode */ - i2c_smbus_write_byte_data(client, PCA9633_MODE1, 0x00); - - /* Configure output: open-drain or totem pole (push-pull) */ - if (pdata && pdata->outdrv == PCA9633_OPEN_DRAIN) - i2c_smbus_write_byte_data(client, PCA9633_MODE2, 0x01); - - /* Turn off LEDs */ - i2c_smbus_write_byte_data(client, PCA9633_LEDOUT, 0x00); - - return 0; - -exit: - while (i--) { - led_classdev_unregister(&pca9633[i].led_cdev); - cancel_work_sync(&pca9633[i].work); - } - - return err; -} - -static int pca9633_remove(struct i2c_client *client) -{ - struct pca9633_led *pca9633 = i2c_get_clientdata(client); - int i; - - for (i = 0; i < 4; i++) { - led_classdev_unregister(&pca9633[i].led_cdev); - cancel_work_sync(&pca9633[i].work); - } - - return 0; -} - -static struct i2c_driver pca9633_driver = { - .driver = { - .name = "leds-pca9633", - .owner = THIS_MODULE, - }, - .probe = pca9633_probe, - .remove = pca9633_remove, - .id_table = pca9633_id, -}; - -module_i2c_driver(pca9633_driver); - -MODULE_AUTHOR("Peter Meerwald <p.meerwald@bct-electronic.com>"); -MODULE_DESCRIPTION("PCA9633 LED driver"); -MODULE_LICENSE("GPL v2"); diff --git a/drivers/leds/leds-pca963x.c b/drivers/leds/leds-pca963x.c new file mode 100644 index 000000000000..82589c0a5689 --- /dev/null +++ b/drivers/leds/leds-pca963x.c @@ -0,0 +1,461 @@ +/* + * Copyright 2011 bct electronic GmbH + * Copyright 2013 Qtechnology/AS + * + * Author: Peter Meerwald <p.meerwald@bct-electronic.com> + * Author: Ricardo Ribalda <ricardo.ribalda@gmail.com> + * + * Based on leds-pca955x.c + * + * This file is subject to the terms and conditions of version 2 of + * the GNU General Public License. See the file COPYING in the main + * directory of this archive for more details. + * + * LED driver for the PCA9633 I2C LED driver (7-bit slave address 0x62) + * LED driver for the PCA9634 I2C LED driver (7-bit slave address set by hw.) + * + * Note that hardware blinking violates the leds infrastructure driver + * interface since the hardware only supports blinking all LEDs with the + * same delay_on/delay_off rates. That is, only the LEDs that are set to + * blink will actually blink but all LEDs that are set to blink will blink + * in identical fashion. The delay_on/delay_off values of the last LED + * that is set to blink will be used for all of the blinking LEDs. + * Hardware blinking is disabled by default but can be enabled by setting + * the 'blink_type' member in the platform_data struct to 'PCA963X_HW_BLINK' + * or by adding the 'nxp,hw-blink' property to the DTS. + */ + +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/leds.h> +#include <linux/err.h> +#include <linux/i2c.h> +#include <linux/workqueue.h> +#include <linux/slab.h> +#include <linux/of.h> +#include <linux/platform_data/leds-pca963x.h> + +/* LED select registers determine the source that drives LED outputs */ +#define PCA963X_LED_OFF 0x0 /* LED driver off */ +#define PCA963X_LED_ON 0x1 /* LED driver on */ +#define PCA963X_LED_PWM 0x2 /* Controlled through PWM */ +#define PCA963X_LED_GRP_PWM 0x3 /* Controlled through PWM/GRPPWM */ + +#define PCA963X_MODE2_DMBLNK 0x20 /* Enable blinking */ + +#define PCA963X_MODE1 0x00 +#define PCA963X_MODE2 0x01 +#define PCA963X_PWM_BASE 0x02 + +enum pca963x_type { + pca9633, + pca9634, +}; + +struct pca963x_chipdef { + u8 grppwm; + u8 grpfreq; + u8 ledout_base; + int n_leds; +}; + +static struct pca963x_chipdef pca963x_chipdefs[] = { + [pca9633] = { + .grppwm = 0x6, + .grpfreq = 0x7, + .ledout_base = 0x8, + .n_leds = 4, + }, + [pca9634] = { + .grppwm = 0xa, + .grpfreq = 0xb, + .ledout_base = 0xc, + .n_leds = 8, + }, +}; + +/* Total blink period in milliseconds */ +#define PCA963X_BLINK_PERIOD_MIN 42 +#define PCA963X_BLINK_PERIOD_MAX 10667 + +static const struct i2c_device_id pca963x_id[] = { + { "pca9632", pca9633 }, + { "pca9633", pca9633 }, + { "pca9634", pca9634 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, pca963x_id); + +enum pca963x_cmd { + BRIGHTNESS_SET, + BLINK_SET, +}; + +struct pca963x_led; + +struct pca963x { + struct pca963x_chipdef *chipdef; + struct mutex mutex; + struct i2c_client *client; + struct pca963x_led *leds; +}; + +struct pca963x_led { + struct pca963x *chip; + struct work_struct work; + enum led_brightness brightness; + struct led_classdev led_cdev; + int led_num; /* 0 .. 7 potentially */ + enum pca963x_cmd cmd; + char name[32]; + u8 gdc; + u8 gfrq; +}; + +static void pca963x_brightness_work(struct pca963x_led *pca963x) +{ + u8 ledout_addr = pca963x->chip->chipdef->ledout_base + + (pca963x->led_num / 4); + u8 ledout; + int shift = 2 * (pca963x->led_num % 4); + u8 mask = 0x3 << shift; + + mutex_lock(&pca963x->chip->mutex); + ledout = i2c_smbus_read_byte_data(pca963x->chip->client, ledout_addr); + switch (pca963x->brightness) { + case LED_FULL: + i2c_smbus_write_byte_data(pca963x->chip->client, ledout_addr, + (ledout & ~mask) | (PCA963X_LED_ON << shift)); + break; + case LED_OFF: + i2c_smbus_write_byte_data(pca963x->chip->client, ledout_addr, + ledout & ~mask); + break; + default: + i2c_smbus_write_byte_data(pca963x->chip->client, + PCA963X_PWM_BASE + pca963x->led_num, + pca963x->brightness); + i2c_smbus_write_byte_data(pca963x->chip->client, ledout_addr, + (ledout & ~mask) | (PCA963X_LED_PWM << shift)); + break; + } + mutex_unlock(&pca963x->chip->mutex); +} + +static void pca963x_blink_work(struct pca963x_led *pca963x) +{ + u8 ledout_addr = pca963x->chip->chipdef->ledout_base + + (pca963x->led_num / 4); + u8 ledout; + u8 mode2 = i2c_smbus_read_byte_data(pca963x->chip->client, + PCA963X_MODE2); + int shift = 2 * (pca963x->led_num % 4); + u8 mask = 0x3 << shift; + + i2c_smbus_write_byte_data(pca963x->chip->client, + pca963x->chip->chipdef->grppwm, pca963x->gdc); + + i2c_smbus_write_byte_data(pca963x->chip->client, + pca963x->chip->chipdef->grpfreq, pca963x->gfrq); + + if (!(mode2 & PCA963X_MODE2_DMBLNK)) + i2c_smbus_write_byte_data(pca963x->chip->client, PCA963X_MODE2, + mode2 | PCA963X_MODE2_DMBLNK); + + mutex_lock(&pca963x->chip->mutex); + ledout = i2c_smbus_read_byte_data(pca963x->chip->client, ledout_addr); + if ((ledout & mask) != (PCA963X_LED_GRP_PWM << shift)) + i2c_smbus_write_byte_data(pca963x->chip->client, ledout_addr, + (ledout & ~mask) | (PCA963X_LED_GRP_PWM << shift)); + mutex_unlock(&pca963x->chip->mutex); +} + +static void pca963x_work(struct work_struct *work) +{ + struct pca963x_led *pca963x = container_of(work, + struct pca963x_led, work); + + switch (pca963x->cmd) { + case BRIGHTNESS_SET: + pca963x_brightness_work(pca963x); + break; + case BLINK_SET: + pca963x_blink_work(pca963x); + break; + } +} + +static void pca963x_led_set(struct led_classdev *led_cdev, + enum led_brightness value) +{ + struct pca963x_led *pca963x; + + pca963x = container_of(led_cdev, struct pca963x_led, led_cdev); + + pca963x->cmd = BRIGHTNESS_SET; + pca963x->brightness = value; + + /* + * Must use workqueue for the actual I/O since I2C operations + * can sleep. + */ + schedule_work(&pca963x->work); +} + +static int pca963x_blink_set(struct led_classdev *led_cdev, + unsigned long *delay_on, unsigned long *delay_off) +{ + struct pca963x_led *pca963x; + unsigned long time_on, time_off, period; + u8 gdc, gfrq; + + pca963x = container_of(led_cdev, struct pca963x_led, led_cdev); + + time_on = *delay_on; + time_off = *delay_off; + + /* If both zero, pick reasonable defaults of 500ms each */ + if (!time_on && !time_off) { + time_on = 500; + time_off = 500; + } + + period = time_on + time_off; + + /* If period not supported by hardware, default to someting sane. */ + if ((period < PCA963X_BLINK_PERIOD_MIN) || + (period > PCA963X_BLINK_PERIOD_MAX)) { + time_on = 500; + time_off = 500; + period = time_on + time_off; + } + + /* + * From manual: duty cycle = (GDC / 256) -> + * (time_on / period) = (GDC / 256) -> + * GDC = ((time_on * 256) / period) + */ + gdc = (time_on * 256) / period; + + /* + * From manual: period = ((GFRQ + 1) / 24) in seconds. + * So, period (in ms) = (((GFRQ + 1) / 24) * 1000) -> + * GFRQ = ((period * 24 / 1000) - 1) + */ + gfrq = (period * 24 / 1000) - 1; + + pca963x->cmd = BLINK_SET; + pca963x->gdc = gdc; + pca963x->gfrq = gfrq; + + /* + * Must use workqueue for the actual I/O since I2C operations + * can sleep. + */ + schedule_work(&pca963x->work); + + *delay_on = time_on; + *delay_off = time_off; + + return 0; +} + +#if IS_ENABLED(CONFIG_OF) +static struct pca963x_platform_data * +pca963x_dt_init(struct i2c_client *client, struct pca963x_chipdef *chip) +{ + struct device_node *np = client->dev.of_node, *child; + struct pca963x_platform_data *pdata; + struct led_info *pca963x_leds; + int count; + + count = of_get_child_count(np); + if (!count || count > chip->n_leds) + return ERR_PTR(-ENODEV); + + pca963x_leds = devm_kzalloc(&client->dev, + sizeof(struct led_info) * chip->n_leds, GFP_KERNEL); + if (!pca963x_leds) + return ERR_PTR(-ENOMEM); + + for_each_child_of_node(np, child) { + struct led_info led; + u32 reg; + int res; + + res = of_property_read_u32(child, "reg", ®); + if ((res != 0) || (reg >= chip->n_leds)) + continue; + led.name = + of_get_property(child, "label", NULL) ? : child->name; + led.default_trigger = + of_get_property(child, "linux,default-trigger", NULL); + pca963x_leds[reg] = led; + } + pdata = devm_kzalloc(&client->dev, + sizeof(struct pca963x_platform_data), GFP_KERNEL); + if (!pdata) + return ERR_PTR(-ENOMEM); + + pdata->leds.leds = pca963x_leds; + pdata->leds.num_leds = chip->n_leds; + + /* default to open-drain unless totem pole (push-pull) is specified */ + if (of_property_read_bool(np, "nxp,totem-pole")) + pdata->outdrv = PCA963X_TOTEM_POLE; + else + pdata->outdrv = PCA963X_OPEN_DRAIN; + + /* default to software blinking unless hardware blinking is specified */ + if (of_property_read_bool(np, "nxp,hw-blink")) + pdata->blink_type = PCA963X_HW_BLINK; + else + pdata->blink_type = PCA963X_SW_BLINK; + + return pdata; +} + +static const struct of_device_id of_pca963x_match[] = { + { .compatible = "nxp,pca9632", }, + { .compatible = "nxp,pca9633", }, + { .compatible = "nxp,pca9634", }, + {}, +}; +#else +static struct pca963x_platform_data * +pca963x_dt_init(struct i2c_client *client, struct pca963x_chipdef *chip) +{ + return ERR_PTR(-ENODEV); +} +#endif + +static int pca963x_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct pca963x *pca963x_chip; + struct pca963x_led *pca963x; + struct pca963x_platform_data *pdata; + struct pca963x_chipdef *chip; + int i, err; + + chip = &pca963x_chipdefs[id->driver_data]; + pdata = dev_get_platdata(&client->dev); + + if (!pdata) { + pdata = pca963x_dt_init(client, chip); + if (IS_ERR(pdata)) { + dev_warn(&client->dev, "could not parse configuration\n"); + pdata = NULL; + } + } + + if (pdata && (pdata->leds.num_leds < 1 || + pdata->leds.num_leds > chip->n_leds)) { + dev_err(&client->dev, "board info must claim 1-%d LEDs", + chip->n_leds); + return -EINVAL; + } + + pca963x_chip = devm_kzalloc(&client->dev, sizeof(*pca963x_chip), + GFP_KERNEL); + if (!pca963x_chip) + return -ENOMEM; + pca963x = devm_kzalloc(&client->dev, chip->n_leds * sizeof(*pca963x), + GFP_KERNEL); + if (!pca963x) + return -ENOMEM; + + i2c_set_clientdata(client, pca963x_chip); + + mutex_init(&pca963x_chip->mutex); + pca963x_chip->chipdef = chip; + pca963x_chip->client = client; + pca963x_chip->leds = pca963x; + + /* Turn off LEDs by default*/ + i2c_smbus_write_byte_data(client, chip->ledout_base, 0x00); + if (chip->n_leds > 4) + i2c_smbus_write_byte_data(client, chip->ledout_base + 1, 0x00); + + for (i = 0; i < chip->n_leds; i++) { + pca963x[i].led_num = i; + pca963x[i].chip = pca963x_chip; + + /* Platform data can specify LED names and default triggers */ + if (pdata && i < pdata->leds.num_leds) { + if (pdata->leds.leds[i].name) + snprintf(pca963x[i].name, + sizeof(pca963x[i].name), "pca963x:%s", + pdata->leds.leds[i].name); + if (pdata->leds.leds[i].default_trigger) + pca963x[i].led_cdev.default_trigger = + pdata->leds.leds[i].default_trigger; + } + if (!pdata || i >= pdata->leds.num_leds || + !pdata->leds.leds[i].name) + snprintf(pca963x[i].name, sizeof(pca963x[i].name), + "pca963x:%d:%.2x:%d", client->adapter->nr, + client->addr, i); + + pca963x[i].led_cdev.name = pca963x[i].name; + pca963x[i].led_cdev.brightness_set = pca963x_led_set; + + if (pdata && pdata->blink_type == PCA963X_HW_BLINK) + pca963x[i].led_cdev.blink_set = pca963x_blink_set; + + INIT_WORK(&pca963x[i].work, pca963x_work); + + err = led_classdev_register(&client->dev, &pca963x[i].led_cdev); + if (err < 0) + goto exit; + } + + /* Disable LED all-call address and set normal mode */ + i2c_smbus_write_byte_data(client, PCA963X_MODE1, 0x00); + + /* Configure output: open-drain or totem pole (push-pull) */ + if (pdata && pdata->outdrv == PCA963X_OPEN_DRAIN) + i2c_smbus_write_byte_data(client, PCA963X_MODE2, 0x01); + + return 0; + +exit: + while (i--) { + led_classdev_unregister(&pca963x[i].led_cdev); + cancel_work_sync(&pca963x[i].work); + } + + return err; +} + +static int pca963x_remove(struct i2c_client *client) +{ + struct pca963x *pca963x = i2c_get_clientdata(client); + int i; + + for (i = 0; i < pca963x->chipdef->n_leds; i++) { + led_classdev_unregister(&pca963x->leds[i].led_cdev); + cancel_work_sync(&pca963x->leds[i].work); + } + + return 0; +} + +static struct i2c_driver pca963x_driver = { + .driver = { + .name = "leds-pca963x", + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(of_pca963x_match), + }, + .probe = pca963x_probe, + .remove = pca963x_remove, + .id_table = pca963x_id, +}; + +module_i2c_driver(pca963x_driver); + +MODULE_AUTHOR("Peter Meerwald <p.meerwald@bct-electronic.com>"); +MODULE_DESCRIPTION("PCA963X LED driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/leds/leds-pwm.c b/drivers/leds/leds-pwm.c index faf52c005e8c..bb6f94898541 100644 --- a/drivers/leds/leds-pwm.c +++ b/drivers/leds/leds-pwm.c @@ -147,7 +147,7 @@ err: static int led_pwm_probe(struct platform_device *pdev) { - struct led_pwm_platform_data *pdata = pdev->dev.platform_data; + struct led_pwm_platform_data *pdata = dev_get_platdata(&pdev->dev); struct led_pwm_priv *priv; int i, ret = 0; diff --git a/drivers/leds/leds-regulator.c b/drivers/leds/leds-regulator.c index 4253a9b03dbf..358430db6e66 100644 --- a/drivers/leds/leds-regulator.c +++ b/drivers/leds/leds-regulator.c @@ -142,7 +142,8 @@ static void regulator_led_brightness_set(struct led_classdev *led_cdev, static int regulator_led_probe(struct platform_device *pdev) { - struct led_regulator_platform_data *pdata = pdev->dev.platform_data; + struct led_regulator_platform_data *pdata = + dev_get_platdata(&pdev->dev); struct regulator_led *led; struct regulator *vcc; int ret = 0; diff --git a/drivers/leds/leds-s3c24xx.c b/drivers/leds/leds-s3c24xx.c index e1a0df63a37f..76483fb5ee45 100644 --- a/drivers/leds/leds-s3c24xx.c +++ b/drivers/leds/leds-s3c24xx.c @@ -71,7 +71,7 @@ static int s3c24xx_led_remove(struct platform_device *dev) static int s3c24xx_led_probe(struct platform_device *dev) { - struct s3c24xx_led_platdata *pdata = dev->dev.platform_data; + struct s3c24xx_led_platdata *pdata = dev_get_platdata(&dev->dev); struct s3c24xx_gpio_led *led; int ret; diff --git a/drivers/leds/leds-ss4200.c b/drivers/leds/leds-ss4200.c index 64e204e714f6..5b8f938a8d73 100644 --- a/drivers/leds/leds-ss4200.c +++ b/drivers/leds/leds-ss4200.c @@ -91,7 +91,7 @@ MODULE_PARM_DESC(nodetect, "Skip DMI-based hardware detection"); * detected as working, but in reality it is not) as low as * possible. */ -static struct dmi_system_id __initdata nas_led_whitelist[] = { +static struct dmi_system_id nas_led_whitelist[] __initdata = { { .callback = ss4200_led_dmi_callback, .ident = "Intel SS4200-E", @@ -197,7 +197,7 @@ static void nasgpio_led_set_attr(struct led_classdev *led_cdev, spin_unlock(&nasgpio_gpio_lock); } -u32 nasgpio_led_get_attr(struct led_classdev *led_cdev, u32 port) +static u32 nasgpio_led_get_attr(struct led_classdev *led_cdev, u32 port) { struct nasgpio_led *led = led_classdev_to_nasgpio_led(led_cdev); u32 gpio_in; diff --git a/drivers/leds/leds-tca6507.c b/drivers/leds/leds-tca6507.c index 98fe021ba276..8cc304f36728 100644 --- a/drivers/leds/leds-tca6507.c +++ b/drivers/leds/leds-tca6507.c @@ -737,7 +737,7 @@ static int tca6507_probe(struct i2c_client *client, int i = 0; adapter = to_i2c_adapter(client->dev.parent); - pdata = client->dev.platform_data; + pdata = dev_get_platdata(&client->dev); if (!i2c_check_functionality(adapter, I2C_FUNC_I2C)) return -EIO; diff --git a/drivers/leds/leds-wm831x-status.c b/drivers/leds/leds-wm831x-status.c index 120815a42701..0a1a13f3a6a5 100644 --- a/drivers/leds/leds-wm831x-status.c +++ b/drivers/leds/leds-wm831x-status.c @@ -230,9 +230,9 @@ static int wm831x_status_probe(struct platform_device *pdev) int id = pdev->id % ARRAY_SIZE(chip_pdata->status); int ret; - res = platform_get_resource(pdev, IORESOURCE_IO, 0); + res = platform_get_resource(pdev, IORESOURCE_REG, 0); if (res == NULL) { - dev_err(&pdev->dev, "No I/O resource\n"); + dev_err(&pdev->dev, "No register resource\n"); ret = -EINVAL; goto err; } @@ -246,8 +246,8 @@ static int wm831x_status_probe(struct platform_device *pdev) drvdata->wm831x = wm831x; drvdata->reg = res->start; - if (wm831x->dev->platform_data) - chip_pdata = wm831x->dev->platform_data; + if (dev_get_platdata(wm831x->dev)) + chip_pdata = dev_get_platdata(wm831x->dev); else chip_pdata = NULL; diff --git a/drivers/leds/leds-wm8350.c b/drivers/leds/leds-wm8350.c index 8a181d56602d..3f75fd22fd49 100644 --- a/drivers/leds/leds-wm8350.c +++ b/drivers/leds/leds-wm8350.c @@ -203,7 +203,7 @@ static int wm8350_led_probe(struct platform_device *pdev) { struct regulator *isink, *dcdc; struct wm8350_led *led; - struct wm8350_led_platform_data *pdata = pdev->dev.platform_data; + struct wm8350_led_platform_data *pdata = dev_get_platdata(&pdev->dev); int i; if (pdata == NULL) { diff --git a/drivers/leds/trigger/ledtrig-backlight.c b/drivers/leds/trigger/ledtrig-backlight.c index 3c9c88a07eb8..47e55aa9eefa 100644 --- a/drivers/leds/trigger/ledtrig-backlight.c +++ b/drivers/leds/trigger/ledtrig-backlight.c @@ -36,26 +36,28 @@ static int fb_notifier_callback(struct notifier_block *p, struct bl_trig_notifier, notifier); struct led_classdev *led = n->led; struct fb_event *fb_event = data; - int *blank = fb_event->data; - int new_status = *blank ? BLANK : UNBLANK; + int *blank; + int new_status; - switch (event) { - case FB_EVENT_BLANK: - if (new_status == n->old_status) - break; + /* If we aren't interested in this event, skip it immediately ... */ + if (event != FB_EVENT_BLANK) + return 0; - if ((n->old_status == UNBLANK) ^ n->invert) { - n->brightness = led->brightness; - __led_set_brightness(led, LED_OFF); - } else { - __led_set_brightness(led, n->brightness); - } + blank = fb_event->data; + new_status = *blank ? BLANK : UNBLANK; - n->old_status = new_status; + if (new_status == n->old_status) + return 0; - break; + if ((n->old_status == UNBLANK) ^ n->invert) { + n->brightness = led->brightness; + __led_set_brightness(led, LED_OFF); + } else { + __led_set_brightness(led, n->brightness); } + n->old_status = new_status; + return 0; } diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index ee372884c405..f9764e61978b 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -597,24 +597,19 @@ static int mca_reap(struct btree *b, struct closure *cl, unsigned min_order) return 0; } -static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc) +static unsigned long bch_mca_scan(struct shrinker *shrink, + struct shrink_control *sc) { struct cache_set *c = container_of(shrink, struct cache_set, shrink); struct btree *b, *t; unsigned long i, nr = sc->nr_to_scan; + unsigned long freed = 0; if (c->shrinker_disabled) - return 0; + return SHRINK_STOP; if (c->try_harder) - return 0; - - /* - * If nr == 0, we're supposed to return the number of items we have - * cached. Not allowed to return -1. - */ - if (!nr) - return mca_can_free(c) * c->btree_pages; + return SHRINK_STOP; /* Return -1 if we can't do anything right now */ if (sc->gfp_mask & __GFP_WAIT) @@ -634,14 +629,14 @@ static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc) i = 0; list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) { - if (!nr) + if (freed >= nr) break; if (++i > 3 && !mca_reap(b, NULL, 0)) { mca_data_free(b); rw_unlock(true, b); - --nr; + freed++; } } @@ -652,7 +647,7 @@ static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc) if (list_empty(&c->btree_cache)) goto out; - for (i = 0; nr && i < c->bucket_cache_used; i++) { + for (i = 0; (nr--) && i < c->bucket_cache_used; i++) { b = list_first_entry(&c->btree_cache, struct btree, list); list_rotate_left(&c->btree_cache); @@ -661,14 +656,27 @@ static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc) mca_bucket_free(b); mca_data_free(b); rw_unlock(true, b); - --nr; + freed++; } else b->accessed = 0; } out: - nr = mca_can_free(c) * c->btree_pages; mutex_unlock(&c->bucket_lock); - return nr; + return freed; +} + +static unsigned long bch_mca_count(struct shrinker *shrink, + struct shrink_control *sc) +{ + struct cache_set *c = container_of(shrink, struct cache_set, shrink); + + if (c->shrinker_disabled) + return 0; + + if (c->try_harder) + return 0; + + return mca_can_free(c) * c->btree_pages; } void bch_btree_cache_free(struct cache_set *c) @@ -737,7 +745,8 @@ int bch_btree_cache_alloc(struct cache_set *c) c->verify_data = NULL; #endif - c->shrink.shrink = bch_mca_shrink; + c->shrink.count_objects = bch_mca_count; + c->shrink.scan_objects = bch_mca_scan; c->shrink.seeks = 4; c->shrink.batch = c->btree_pages * 2; register_shrinker(&c->shrink); diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 12a2c2846f99..4fe6ab2fbe2e 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -556,7 +556,7 @@ STORE(__bch_cache_set) struct shrink_control sc; sc.gfp_mask = GFP_KERNEL; sc.nr_to_scan = strtoul_or_return(buf); - c->shrink.shrink(&c->shrink, &sc); + c->shrink.scan_objects(&c->shrink, &sc); } sysfs_strtoul(congested_read_threshold_us, diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 5227e079a6e3..173cbb20d104 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1425,62 +1425,75 @@ static int __cleanup_old_buffer(struct dm_buffer *b, gfp_t gfp, unsigned long max_jiffies) { if (jiffies - b->last_accessed < max_jiffies) - return 1; + return 0; if (!(gfp & __GFP_IO)) { if (test_bit(B_READING, &b->state) || test_bit(B_WRITING, &b->state) || test_bit(B_DIRTY, &b->state)) - return 1; + return 0; } if (b->hold_count) - return 1; + return 0; __make_buffer_clean(b); __unlink_buffer(b); __free_buffer_wake(b); - return 0; + return 1; } -static void __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, - struct shrink_control *sc) +static long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, + gfp_t gfp_mask) { int l; struct dm_buffer *b, *tmp; + long freed = 0; for (l = 0; l < LIST_SIZE; l++) { - list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) - if (!__cleanup_old_buffer(b, sc->gfp_mask, 0) && - !--nr_to_scan) - return; + list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) { + freed += __cleanup_old_buffer(b, gfp_mask, 0); + if (!--nr_to_scan) + break; + } dm_bufio_cond_resched(); } + return freed; } -static int shrink(struct shrinker *shrinker, struct shrink_control *sc) +static unsigned long +dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { - struct dm_bufio_client *c = - container_of(shrinker, struct dm_bufio_client, shrinker); - unsigned long r; - unsigned long nr_to_scan = sc->nr_to_scan; + struct dm_bufio_client *c; + unsigned long freed; + c = container_of(shrink, struct dm_bufio_client, shrinker); if (sc->gfp_mask & __GFP_IO) dm_bufio_lock(c); else if (!dm_bufio_trylock(c)) - return !nr_to_scan ? 0 : -1; + return SHRINK_STOP; - if (nr_to_scan) - __scan(c, nr_to_scan, sc); + freed = __scan(c, sc->nr_to_scan, sc->gfp_mask); + dm_bufio_unlock(c); + return freed; +} - r = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY]; - if (r > INT_MAX) - r = INT_MAX; +static unsigned long +dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) +{ + struct dm_bufio_client *c; + unsigned long count; - dm_bufio_unlock(c); + c = container_of(shrink, struct dm_bufio_client, shrinker); + if (sc->gfp_mask & __GFP_IO) + dm_bufio_lock(c); + else if (!dm_bufio_trylock(c)) + return 0; - return r; + count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY]; + dm_bufio_unlock(c); + return count; } /* @@ -1582,7 +1595,8 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign __cache_size_refresh(); mutex_unlock(&dm_bufio_clients_lock); - c->shrinker.shrink = shrink; + c->shrinker.count_objects = dm_bufio_shrink_count; + c->shrinker.scan_objects = dm_bufio_shrink_scan; c->shrinker.seeks = 1; c->shrinker.batch = 0; register_shrinker(&c->shrinker); @@ -1669,7 +1683,7 @@ static void cleanup_old_buffers(void) struct dm_buffer *b; b = list_entry(c->lru[LIST_CLEAN].prev, struct dm_buffer, lru_list); - if (__cleanup_old_buffer(b, 0, max_age * HZ)) + if (!__cleanup_old_buffer(b, 0, max_age * HZ)) break; dm_bufio_cond_resched(); } diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig index 8068d7b64155..c7caf94621b4 100644 --- a/drivers/media/platform/Kconfig +++ b/drivers/media/platform/Kconfig @@ -203,7 +203,7 @@ config VIDEO_SAMSUNG_EXYNOS_GSC config VIDEO_SH_VEU tristate "SuperH VEU mem2mem video processing driver" - depends on VIDEO_DEV && VIDEO_V4L2 && GENERIC_HARDIRQS && HAS_DMA + depends on VIDEO_DEV && VIDEO_V4L2 && HAS_DMA select VIDEOBUF2_DMA_CONTIG select V4L2_MEM2MEM_DEV help diff --git a/drivers/media/radio/Kconfig b/drivers/media/radio/Kconfig index 39882ddd2594..6ecdc39bb366 100644 --- a/drivers/media/radio/Kconfig +++ b/drivers/media/radio/Kconfig @@ -214,7 +214,7 @@ config RADIO_TIMBERDALE config RADIO_WL1273 tristate "Texas Instruments WL1273 I2C FM Radio" - depends on I2C && VIDEO_V4L2 && GENERIC_HARDIRQS + depends on I2C && VIDEO_V4L2 select MFD_CORE select MFD_WL1273_CORE select FW_LOADER diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index e0e46f50f95d..914c3d142f78 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -23,7 +23,7 @@ config MFD_AS3711 select MFD_CORE select REGMAP_I2C select REGMAP_IRQ - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y help Support for the AS3711 PMIC from AMS @@ -40,7 +40,7 @@ config PMIC_ADP5520 config MFD_AAT2870_CORE bool "AnalogicTech AAT2870" select MFD_CORE - depends on I2C=y && GPIOLIB && GENERIC_HARDIRQS + depends on I2C=y && GPIOLIB help If you say yes here you get support for the AAT2870. This driver provides common support for accessing the device, @@ -78,7 +78,7 @@ config MFD_CROS_EC_SPI config MFD_ASIC3 bool "Compaq ASIC3" - depends on GENERIC_HARDIRQS && GPIOLIB && ARM + depends on GPIOLIB && ARM select MFD_CORE ---help--- This driver supports the ASIC3 multifunction chip found on many @@ -104,7 +104,7 @@ config MFD_DA9052_SPI select REGMAP_SPI select REGMAP_IRQ select PMIC_DA9052 - depends on SPI_MASTER=y && GENERIC_HARDIRQS + depends on SPI_MASTER=y help Support for the Dialog Semiconductor DA9052 PMIC when controlled using SPI. This driver provides common support @@ -116,7 +116,7 @@ config MFD_DA9052_I2C select REGMAP_I2C select REGMAP_IRQ select PMIC_DA9052 - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y help Support for the Dialog Semiconductor DA9052 PMIC when controlled using I2C. This driver provides common support @@ -128,7 +128,7 @@ config MFD_DA9055 select REGMAP_I2C select REGMAP_IRQ select MFD_CORE - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y help Say yes here for support of Dialog Semiconductor DA9055. This is a Power Management IC. This driver provides common support for @@ -144,7 +144,7 @@ config MFD_DA9063 select MFD_CORE select REGMAP_I2C select REGMAP_IRQ - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y help Say yes here for support for the Dialog Semiconductor DA9063 PMIC. This includes the I2C driver and core APIs. @@ -156,7 +156,7 @@ config MFD_MC13783 config MFD_MC13XXX tristate - depends on (SPI_MASTER || I2C) && GENERIC_HARDIRQS + depends on (SPI_MASTER || I2C) select MFD_CORE select MFD_MC13783 help @@ -167,7 +167,7 @@ config MFD_MC13XXX config MFD_MC13XXX_SPI tristate "Freescale MC13783 and MC13892 SPI interface" - depends on SPI_MASTER && GENERIC_HARDIRQS + depends on SPI_MASTER select REGMAP_SPI select MFD_MC13XXX help @@ -175,7 +175,7 @@ config MFD_MC13XXX_SPI config MFD_MC13XXX_I2C tristate "Freescale MC13892 I2C interface" - depends on I2C && GENERIC_HARDIRQS + depends on I2C select REGMAP_I2C select MFD_MC13XXX help @@ -183,7 +183,7 @@ config MFD_MC13XXX_I2C config HTC_EGPIO bool "HTC EGPIO support" - depends on GENERIC_HARDIRQS && GPIOLIB && ARM + depends on GPIOLIB && ARM help This driver supports the CPLD egpio chip present on several HTC phones. It provides basic support for input @@ -192,7 +192,6 @@ config HTC_EGPIO config HTC_PASIC3 tristate "HTC PASIC3 LED/DS1WM chip support" select MFD_CORE - depends on GENERIC_HARDIRQS help This core driver provides register access for the LED/DS1WM chips labeled "AIC2" and "AIC3", found on HTC Blueangel and @@ -210,7 +209,7 @@ config HTC_I2CPLD config LPC_ICH tristate "Intel ICH LPC" - depends on PCI && GENERIC_HARDIRQS + depends on PCI select MFD_CORE help The LPC bridge function of the Intel ICH provides support for @@ -220,7 +219,7 @@ config LPC_ICH config LPC_SCH tristate "Intel SCH LPC" - depends on PCI && GENERIC_HARDIRQS + depends on PCI select MFD_CORE help LPC bridge function of the Intel SCH provides support for @@ -238,7 +237,7 @@ config MFD_INTEL_MSIC config MFD_JANZ_CMODIO tristate "Janz CMOD-IO PCI MODULbus Carrier Board" select MFD_CORE - depends on PCI && GENERIC_HARDIRQS + depends on PCI help This is the core driver for the Janz CMOD-IO PCI MODULbus carrier board. This device is a PCI to MODULbus bridge which may @@ -277,7 +276,7 @@ config MFD_KEMPLD config MFD_88PM800 tristate "Marvell 88PM800" - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y select REGMAP_I2C select REGMAP_IRQ select MFD_CORE @@ -289,7 +288,7 @@ config MFD_88PM800 config MFD_88PM805 tristate "Marvell 88PM805" - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y select REGMAP_I2C select REGMAP_IRQ select MFD_CORE @@ -301,7 +300,7 @@ config MFD_88PM805 config MFD_88PM860X bool "Marvell 88PM8606/88PM8607" - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y select REGMAP_I2C select MFD_CORE help @@ -312,7 +311,7 @@ config MFD_88PM860X config MFD_MAX77686 bool "Maxim Semiconductor MAX77686 PMIC Support" - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y select MFD_CORE select REGMAP_I2C select IRQ_DOMAIN @@ -325,7 +324,7 @@ config MFD_MAX77686 config MFD_MAX77693 bool "Maxim Semiconductor MAX77693 PMIC Support" - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y select MFD_CORE select REGMAP_I2C help @@ -339,7 +338,7 @@ config MFD_MAX77693 config MFD_MAX8907 tristate "Maxim Semiconductor MAX8907 PMIC Support" select MFD_CORE - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y select REGMAP_I2C select REGMAP_IRQ help @@ -350,7 +349,7 @@ config MFD_MAX8907 config MFD_MAX8925 bool "Maxim Semiconductor MAX8925 PMIC Support" - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y select MFD_CORE help Say yes here to support for Maxim Semiconductor MAX8925. This is @@ -360,7 +359,7 @@ config MFD_MAX8925 config MFD_MAX8997 bool "Maxim Semiconductor MAX8997/8966 PMIC Support" - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y select MFD_CORE select IRQ_DOMAIN help @@ -373,7 +372,7 @@ config MFD_MAX8997 config MFD_MAX8998 bool "Maxim Semiconductor MAX8998/National LP3974 PMIC Support" - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y select MFD_CORE select IRQ_DOMAIN help @@ -385,7 +384,7 @@ config MFD_MAX8998 config EZX_PCAP bool "Motorola EZXPCAP Support" - depends on GENERIC_HARDIRQS && SPI_MASTER + depends on SPI_MASTER help This enables the PCAP ASIC present on EZX Phones. This is needed for MMC, TouchScreen, Sound, USB, etc.. @@ -393,7 +392,7 @@ config EZX_PCAP config MFD_VIPERBOARD tristate "Nano River Technologies Viperboard" select MFD_CORE - depends on USB && GENERIC_HARDIRQS + depends on USB default n help Say yes here if you want support for Nano River Technologies @@ -407,7 +406,7 @@ config MFD_VIPERBOARD config MFD_RETU tristate "Nokia Retu and Tahvo multi-function device" select MFD_CORE - depends on I2C && GENERIC_HARDIRQS + depends on I2C select REGMAP_IRQ help Retu and Tahvo are a multi-function devices found on Nokia @@ -480,7 +479,7 @@ config MFD_PM8XXX_IRQ config MFD_RDC321X tristate "RDC R-321x southbridge" select MFD_CORE - depends on PCI && GENERIC_HARDIRQS + depends on PCI help Say yes here if you want to have support for the RDC R-321x SoC southbridge which provides access to GPIOs and Watchdog using the @@ -488,7 +487,7 @@ config MFD_RDC321X config MFD_RTSX_PCI tristate "Realtek PCI-E card reader" - depends on PCI && GENERIC_HARDIRQS + depends on PCI select MFD_CORE help This supports for Realtek PCI-Express card reader including rts5209, @@ -498,7 +497,7 @@ config MFD_RTSX_PCI config MFD_RC5T583 bool "Ricoh RC5T583 Power Management system device" - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y select MFD_CORE select REGMAP_I2C help @@ -512,7 +511,7 @@ config MFD_RC5T583 config MFD_SEC_CORE bool "SAMSUNG Electronics PMIC Series Support" - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y select MFD_CORE select REGMAP_I2C select REGMAP_IRQ @@ -555,7 +554,7 @@ config MFD_SM501_GPIO config MFD_SMSC bool "SMSC ECE1099 series chips" - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y select MFD_CORE select REGMAP_I2C help @@ -577,7 +576,7 @@ config ABX500_CORE config AB3100_CORE bool "ST-Ericsson AB3100 Mixed Signal Circuit core functions" - depends on I2C=y && ABX500_CORE && GENERIC_HARDIRQS + depends on I2C=y && ABX500_CORE select MFD_CORE default y if ARCH_U300 help @@ -601,7 +600,7 @@ config AB3100_OTP config AB8500_CORE bool "ST-Ericsson AB8500 Mixed Signal Power Management chip" - depends on GENERIC_HARDIRQS && ABX500_CORE && MFD_DB8500_PRCMU + depends on ABX500_CORE && MFD_DB8500_PRCMU select POWER_SUPPLY select MFD_CORE select IRQ_DOMAIN @@ -639,7 +638,7 @@ config MFD_DB8500_PRCMU config MFD_STMPE bool "STMicroelectronics STMPE" - depends on (I2C=y || SPI_MASTER=y) && GENERIC_HARDIRQS + depends on (I2C=y || SPI_MASTER=y) select MFD_CORE help Support for the STMPE family of I/O Expanders from @@ -680,7 +679,7 @@ endmenu config MFD_STA2X11 bool "STMicroelectronics STA2X11" - depends on STA2X11 && GENERIC_HARDIRQS + depends on STA2X11 select MFD_CORE select REGMAP_MMIO @@ -700,7 +699,6 @@ config MFD_TI_AM335X_TSCADC select MFD_CORE select REGMAP select REGMAP_MMIO - depends on GENERIC_HARDIRQS help If you say yes here you get support for Texas Instruments series of Touch Screen /ADC chips. @@ -717,7 +715,7 @@ config MFD_DM355EVM_MSP config MFD_LP8788 bool "TI LP8788 Power Management Unit Driver" - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y select MFD_CORE select REGMAP_I2C select IRQ_DOMAIN @@ -739,14 +737,14 @@ config MFD_PALMAS select MFD_CORE select REGMAP_I2C select REGMAP_IRQ - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y help If you say yes here you get support for the Palmas series of PMIC chips from Texas Instruments. config MFD_TI_SSP tristate "TI Sequencer Serial Port support" - depends on ARCH_DAVINCI_TNETV107X && GENERIC_HARDIRQS + depends on ARCH_DAVINCI_TNETV107X select MFD_CORE ---help--- Say Y here if you want support for the Sequencer Serial Port @@ -761,7 +759,6 @@ config TPS6105X select REGULATOR select MFD_CORE select REGULATOR_FIXED_VOLTAGE - depends on GENERIC_HARDIRQS help This option enables a driver for the TP61050/TPS61052 high-power "white LED driver". This boost converter is @@ -784,7 +781,7 @@ config TPS65010 config TPS6507X tristate "TI TPS6507x Power Management / Touch Screen chips" select MFD_CORE - depends on I2C && GENERIC_HARDIRQS + depends on I2C help If you say yes here you get support for the TPS6507x series of Power Management / Touch Screen chips. These include voltage @@ -798,7 +795,7 @@ config TPS65911_COMPARATOR config MFD_TPS65090 bool "TI TPS65090 Power Management chips" - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y select MFD_CORE select REGMAP_I2C select REGMAP_IRQ @@ -811,7 +808,7 @@ config MFD_TPS65090 config MFD_TPS65217 tristate "TI TPS65217 Power Management / White LED chips" - depends on I2C && GENERIC_HARDIRQS + depends on I2C select MFD_CORE select REGMAP_I2C help @@ -826,7 +823,7 @@ config MFD_TPS65217 config MFD_TPS6586X bool "TI TPS6586x Power Management chips" - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y select MFD_CORE select REGMAP_I2C help @@ -841,7 +838,7 @@ config MFD_TPS6586X config MFD_TPS65910 bool "TI TPS65910 Power Management chip" - depends on I2C=y && GPIOLIB && GENERIC_HARDIRQS + depends on I2C=y && GPIOLIB select MFD_CORE select REGMAP_I2C select REGMAP_IRQ @@ -862,7 +859,7 @@ config MFD_TPS65912_I2C bool "TI TPS65912 Power Management chip with I2C" select MFD_CORE select MFD_TPS65912 - depends on I2C=y && GPIOLIB && GENERIC_HARDIRQS + depends on I2C=y && GPIOLIB help If you say yes here you get support for the TPS65912 series of PM chips with I2C interface. @@ -871,14 +868,14 @@ config MFD_TPS65912_SPI bool "TI TPS65912 Power Management chip with SPI" select MFD_CORE select MFD_TPS65912 - depends on SPI_MASTER && GPIOLIB && GENERIC_HARDIRQS + depends on SPI_MASTER && GPIOLIB help If you say yes here you get support for the TPS65912 series of PM chips with SPI interface. config MFD_TPS80031 bool "TI TPS80031/TPS80032 Power Management chips" - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y select MFD_CORE select REGMAP_I2C select REGMAP_IRQ @@ -892,7 +889,7 @@ config MFD_TPS80031 config TWL4030_CORE bool "TI TWL4030/TWL5030/TWL6030/TPS659x0 Support" - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y select IRQ_DOMAIN select REGMAP_I2C help @@ -931,13 +928,13 @@ config TWL4030_POWER config MFD_TWL4030_AUDIO bool "TI TWL4030 Audio" - depends on TWL4030_CORE && GENERIC_HARDIRQS + depends on TWL4030_CORE select MFD_CORE default n config TWL6040_CORE bool "TI TWL6040 audio codec" - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y select MFD_CORE select REGMAP_I2C select REGMAP_IRQ @@ -961,7 +958,7 @@ config MENELAUS config MFD_WL1273_CORE tristate "TI WL1273 FM radio" - depends on I2C && GENERIC_HARDIRQS + depends on I2C select MFD_CORE default n help @@ -974,7 +971,6 @@ config MFD_LM3533 depends on I2C select MFD_CORE select REGMAP_I2C - depends on GENERIC_HARDIRQS help Say yes here to enable support for National Semiconductor / TI LM3533 Lighting Power chips. @@ -996,7 +992,7 @@ config MFD_TIMBERDALE config MFD_TC3589X bool "Toshiba TC35892 and variants" - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y select MFD_CORE help Support for the Toshiba TC35892 and variants I/O Expander. @@ -1011,7 +1007,7 @@ config MFD_TMIO config MFD_T7L66XB bool "Toshiba T7L66XB" - depends on ARM && HAVE_CLK && GENERIC_HARDIRQS + depends on ARM && HAVE_CLK select MFD_CORE select MFD_TMIO help @@ -1036,7 +1032,7 @@ config MFD_TC6393XB config MFD_VX855 tristate "VIA VX855/VX875 integrated south bridge" - depends on PCI && GENERIC_HARDIRQS + depends on PCI select MFD_CORE help Say yes here to enable support for various functions of the @@ -1054,7 +1050,7 @@ config MFD_ARIZONA_I2C select MFD_ARIZONA select MFD_CORE select REGMAP_I2C - depends on I2C && GENERIC_HARDIRQS + depends on I2C help Support for the Wolfson Microelectronics Arizona platform audio SoC core functionality controlled via I2C. @@ -1064,7 +1060,7 @@ config MFD_ARIZONA_SPI select MFD_ARIZONA select MFD_CORE select REGMAP_SPI - depends on SPI_MASTER && GENERIC_HARDIRQS + depends on SPI_MASTER help Support for the Wolfson Microelectronics Arizona platform audio SoC core functionality controlled via I2C. @@ -1090,7 +1086,7 @@ config MFD_WM8997 config MFD_WM8400 bool "Wolfson Microelectronics WM8400" select MFD_CORE - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y select REGMAP_I2C help Support for the Wolfson Microelecronics WM8400 PMIC and audio @@ -1100,7 +1096,6 @@ config MFD_WM8400 config MFD_WM831X bool - depends on GENERIC_HARDIRQS config MFD_WM831X_I2C bool "Wolfson Microelectronics WM831x/2x PMICs with I2C" @@ -1108,7 +1103,7 @@ config MFD_WM831X_I2C select MFD_WM831X select REGMAP_I2C select IRQ_DOMAIN - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y help Support for the Wolfson Microelecronics WM831x and WM832x PMICs when controlled using I2C. This driver provides common support @@ -1121,7 +1116,7 @@ config MFD_WM831X_SPI select MFD_WM831X select REGMAP_SPI select IRQ_DOMAIN - depends on SPI_MASTER && GENERIC_HARDIRQS + depends on SPI_MASTER help Support for the Wolfson Microelecronics WM831x and WM832x PMICs when controlled using SPI. This driver provides common support @@ -1130,12 +1125,11 @@ config MFD_WM831X_SPI config MFD_WM8350 bool - depends on GENERIC_HARDIRQS config MFD_WM8350_I2C bool "Wolfson Microelectronics WM8350 with I2C" select MFD_WM8350 - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y help The WM8350 is an integrated audio and power management subsystem with watchdog and RTC functionality for embedded @@ -1148,7 +1142,7 @@ config MFD_WM8994 select MFD_CORE select REGMAP_I2C select REGMAP_IRQ - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y help The WM8994 is a highly integrated hi-fi CODEC designed for smartphone applicatiosn. As well as audio functionality it diff --git a/drivers/misc/cb710/Kconfig b/drivers/misc/cb710/Kconfig index 5acb9c5b49c4..22429b8b1068 100644 --- a/drivers/misc/cb710/Kconfig +++ b/drivers/misc/cb710/Kconfig @@ -1,6 +1,6 @@ config CB710_CORE tristate "ENE CB710/720 Flash memory card reader support" - depends on PCI && GENERIC_HARDIRQS + depends on PCI help This option enables support for PCI ENE CB710/720 Flash memory card reader found in some laptops (ie. some versions of HP Compaq nx9500). diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index b7fd5ab80a48..7fc5099e44b2 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -487,7 +487,7 @@ config MMC_SDHI config MMC_CB710 tristate "ENE CB710 MMC/SD Interface support" - depends on PCI && GENERIC_HARDIRQS + depends on PCI select CB710_CORE help This option enables support for MMC/SD part of ENE CB710/720 Flash diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index 154275182b4b..f5aa4b02cfa6 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -1343,7 +1343,7 @@ out: static int invalidate_fastmap(struct ubi_device *ubi, struct ubi_fastmap_layout *fm) { - int ret, i; + int ret; struct ubi_vid_hdr *vh; ret = erase_block(ubi, fm->e[0]->pnum); @@ -1360,9 +1360,6 @@ static int invalidate_fastmap(struct ubi_device *ubi, vh->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); ret = ubi_io_write_vid_hdr(ubi, fm->e[0]->pnum, vh); - for (i = 0; i < fm->used_blocks; i++) - ubi_wl_put_fm_peb(ubi, fm->e[i], i, fm->to_be_tortured[i]); - return ret; } diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 5df49d3cb5c7..c95bfb183c62 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1069,6 +1069,9 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) { dbg_wl("no WL needed: min used EC %d, max free EC %d", e1->ec, e2->ec); + + /* Give the unused PEB back */ + wl_tree_add(e2, &ubi->free); goto out_cancel; } self_check_in_wl_tree(ubi, e1, &ubi->used); diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c index 3d86ffeb4e15..94edc9c6fbbf 100644 --- a/drivers/net/ethernet/amd/declance.c +++ b/drivers/net/ethernet/amd/declance.c @@ -725,6 +725,7 @@ static irqreturn_t lance_dma_merr_int(int irq, void *dev_id) { struct net_device *dev = dev_id; + clear_ioasic_dma_irq(irq); printk(KERN_ERR "%s: DMA error\n", dev->name); return IRQ_HANDLED; } diff --git a/drivers/net/ethernet/cadence/Kconfig b/drivers/net/ethernet/cadence/Kconfig index 8030cc0396fd..751d5c7b312d 100644 --- a/drivers/net/ethernet/cadence/Kconfig +++ b/drivers/net/ethernet/cadence/Kconfig @@ -22,7 +22,7 @@ if NET_CADENCE config ARM_AT91_ETHER tristate "AT91RM9200 Ethernet support" - depends on GENERIC_HARDIRQS && HAS_DMA + depends on HAS_DMA select MACB ---help--- If you wish to compile a kernel for the AT91RM9200 and enable diff --git a/drivers/net/wireless/p54/Kconfig b/drivers/net/wireless/p54/Kconfig index 15ea36b51a66..cdafb8c73e82 100644 --- a/drivers/net/wireless/p54/Kconfig +++ b/drivers/net/wireless/p54/Kconfig @@ -41,7 +41,7 @@ config P54_PCI config P54_SPI tristate "Prism54 SPI (stlc45xx) support" - depends on P54_COMMON && SPI_MASTER && GENERIC_HARDIRQS + depends on P54_COMMON && SPI_MASTER ---help--- This driver is for stlc4550 or stlc4560 based wireless chips such as Nokia's N800/N810 Portable Internet Tablet. diff --git a/drivers/net/wireless/ti/wl1251/Kconfig b/drivers/net/wireless/ti/wl1251/Kconfig index 8fec4ed36ac2..477a206c098e 100644 --- a/drivers/net/wireless/ti/wl1251/Kconfig +++ b/drivers/net/wireless/ti/wl1251/Kconfig @@ -1,6 +1,6 @@ menuconfig WL1251 tristate "TI wl1251 driver support" - depends on MAC80211 && GENERIC_HARDIRQS + depends on MAC80211 select FW_LOADER select CRC7 ---help--- diff --git a/drivers/net/wireless/ti/wlcore/Kconfig b/drivers/net/wireless/ti/wlcore/Kconfig index 2b832825c3d4..7c099542b214 100644 --- a/drivers/net/wireless/ti/wlcore/Kconfig +++ b/drivers/net/wireless/ti/wlcore/Kconfig @@ -1,6 +1,6 @@ config WLCORE tristate "TI wlcore support" - depends on WL_TI && GENERIC_HARDIRQS && MAC80211 + depends on WL_TI && MAC80211 select FW_LOADER ---help--- This module contains the main code for TI WLAN chips. It abstracts diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index f6488adf3af1..0b7d23b4ad95 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -487,7 +487,6 @@ static void acpiphp_bus_add(acpi_handle handle) { struct acpi_device *adev = NULL; - acpiphp_bus_trim(handle); acpi_bus_scan(handle); acpi_bus_get_device(handle, &adev); if (adev) @@ -529,6 +528,16 @@ static void check_hotplug_bridge(struct acpiphp_slot *slot, struct pci_dev *dev) } } +static int acpiphp_rescan_slot(struct acpiphp_slot *slot) +{ + struct acpiphp_func *func; + + list_for_each_entry(func, &slot->funcs, sibling) + acpiphp_bus_add(func_to_handle(func)); + + return pci_scan_slot(slot->bus, PCI_DEVFN(slot->device, 0)); +} + /** * enable_slot - enable, configure a slot * @slot: slot to be enabled @@ -543,12 +552,9 @@ static void __ref enable_slot(struct acpiphp_slot *slot) struct acpiphp_func *func; int max, pass; LIST_HEAD(add_list); + int nr_found; - list_for_each_entry(func, &slot->funcs, sibling) - acpiphp_bus_add(func_to_handle(func)); - - pci_scan_slot(bus, PCI_DEVFN(slot->device, 0)); - + nr_found = acpiphp_rescan_slot(slot); max = acpiphp_max_busnr(bus); for (pass = 0; pass < 2; pass++) { list_for_each_entry(dev, &bus->devices, bus_list) { @@ -567,8 +573,11 @@ static void __ref enable_slot(struct acpiphp_slot *slot) } } } - __pci_bus_assign_resources(bus, &add_list, NULL); + /* Nothing more to do here if there are no new devices on this bus. */ + if (!nr_found && (slot->flags & SLOT_ENABLED)) + return; + acpiphp_sanitize_bus(bus); acpiphp_set_hpp_values(bus); acpiphp_set_acpi_region(slot); @@ -837,11 +846,22 @@ static void hotplug_event(acpi_handle handle, u32 type, void *data) case ACPI_NOTIFY_DEVICE_CHECK: /* device check */ dbg("%s: Device check notify on %s\n", __func__, objname); - if (bridge) + if (bridge) { acpiphp_check_bridge(bridge); - else - acpiphp_check_bridge(func->parent); + } else { + struct acpiphp_slot *slot = func->slot; + int ret; + /* + * Check if anything has changed in the slot and rescan + * from the parent if that's the case. + */ + mutex_lock(&slot->crit_sect); + ret = acpiphp_rescan_slot(slot); + mutex_unlock(&slot->crit_sect); + if (ret) + acpiphp_check_bridge(func->parent); + } break; case ACPI_NOTIFY_EJECT_REQUEST: @@ -867,6 +887,8 @@ static void hotplug_event_work(struct work_struct *work) hotplug_event(hp_work->handle, hp_work->type, context); acpi_scan_lock_release(); + acpi_evaluate_hotplug_ost(hp_work->handle, hp_work->type, + ACPI_OST_SC_SUCCESS, NULL); kfree(hp_work); /* allocated in handle_hotplug_event() */ put_bridge(context->func.parent); } @@ -882,11 +904,15 @@ static void hotplug_event_work(struct work_struct *work) static void handle_hotplug_event(acpi_handle handle, u32 type, void *data) { struct acpiphp_context *context; + u32 ost_code = ACPI_OST_SC_SUCCESS; switch (type) { case ACPI_NOTIFY_BUS_CHECK: case ACPI_NOTIFY_DEVICE_CHECK: + break; case ACPI_NOTIFY_EJECT_REQUEST: + ost_code = ACPI_OST_SC_EJECT_IN_PROGRESS; + acpi_evaluate_hotplug_ost(handle, type, ost_code, NULL); break; case ACPI_NOTIFY_DEVICE_WAKE: @@ -895,20 +921,21 @@ static void handle_hotplug_event(acpi_handle handle, u32 type, void *data) case ACPI_NOTIFY_FREQUENCY_MISMATCH: acpi_handle_err(handle, "Device cannot be configured due " "to a frequency mismatch\n"); - return; + goto out; case ACPI_NOTIFY_BUS_MODE_MISMATCH: acpi_handle_err(handle, "Device cannot be configured due " "to a bus mode mismatch\n"); - return; + goto out; case ACPI_NOTIFY_POWER_FAULT: acpi_handle_err(handle, "Device has suffered a power fault\n"); - return; + goto out; default: acpi_handle_warn(handle, "Unsupported event type 0x%x\n", type); - return; + ost_code = ACPI_OST_SC_UNRECOGNIZED_NOTIFY; + goto out; } mutex_lock(&acpiphp_context_lock); @@ -917,8 +944,14 @@ static void handle_hotplug_event(acpi_handle handle, u32 type, void *data) get_bridge(context->func.parent); acpiphp_put_context(context); alloc_acpi_hp_work(handle, type, context, hotplug_event_work); + mutex_unlock(&acpiphp_context_lock); + return; } mutex_unlock(&acpiphp_context_lock); + ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE; + + out: + acpi_evaluate_hotplug_ost(handle, type, ost_code, NULL); } /* diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index b35f93c232cf..d5f90d6383bc 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -30,7 +30,6 @@ static int pci_msi_enable = 1; /* Arch hooks */ -#if defined(CONFIG_GENERIC_HARDIRQS) int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) { struct msi_chip *chip = dev->bus->msi; @@ -67,21 +66,6 @@ int __weak arch_msi_check_device(struct pci_dev *dev, int nvec, int type) return chip->check_device(chip, dev, nvec, type); } -#else -int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) -{ - return -ENOSYS; -} - -void __weak arch_teardown_msi_irq(unsigned int irq) -{ -} - -int __weak arch_msi_check_device(struct pci_dev *dev, int nvec, int type) -{ - return 0; -} -#endif /* CONFIG_GENERIC_HARDIRQS */ int __weak arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { @@ -245,8 +229,6 @@ static void msix_mask_irq(struct msi_desc *desc, u32 flag) desc->masked = __msix_mask_irq(desc, flag); } -#ifdef CONFIG_GENERIC_HARDIRQS - static void msi_set_mask_bit(struct irq_data *data, u32 flag) { struct msi_desc *desc = irq_data_get_msi(data); @@ -270,8 +252,6 @@ void unmask_msi_irq(struct irq_data *data) msi_set_mask_bit(data, 0); } -#endif /* CONFIG_GENERIC_HARDIRQS */ - void __read_msi_msg(struct msi_desc *entry, struct msi_msg *msg) { BUG_ON(entry->dev->current_state != PCI_D0); @@ -382,10 +362,8 @@ static void free_msi_irqs(struct pci_dev *dev) nvec = entry->nvec_used; else nvec = 1 << entry->msi_attrib.multiple; -#ifdef CONFIG_GENERIC_HARDIRQS for (i = 0; i < nvec; i++) BUG_ON(irq_has_action(entry->irq + i)); -#endif } arch_teardown_msi_irqs(dev); diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 36a9e6023395..96d6b2eef4f2 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -732,6 +732,7 @@ config SAMSUNG_LAPTOP tristate "Samsung Laptop driver" depends on X86 depends on RFKILL || RFKILL = n + depends on ACPI_VIDEO || ACPI_VIDEO = n depends on BACKLIGHT_CLASS_DEVICE select LEDS_CLASS select NEW_LEDS @@ -764,7 +765,7 @@ config INTEL_OAKTRAIL config SAMSUNG_Q10 tristate "Samsung Q10 Extras" - depends on SERIO_I8042 + depends on ACPI select BACKLIGHT_CLASS_DEVICE ---help--- This driver provides support for backlight control on Samsung Q10 diff --git a/drivers/platform/x86/amilo-rfkill.c b/drivers/platform/x86/amilo-rfkill.c index 6296f078b7bc..da36b5e824d4 100644 --- a/drivers/platform/x86/amilo-rfkill.c +++ b/drivers/platform/x86/amilo-rfkill.c @@ -85,6 +85,13 @@ static const struct dmi_system_id amilo_rfkill_id_table[] = { { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), + DMI_MATCH(DMI_BOARD_NAME, "AMILO L1310"), + }, + .driver_data = (void *)&amilo_a1655_rfkill_ops + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), DMI_MATCH(DMI_BOARD_NAME, "AMILO M7440"), }, .driver_data = (void *)&amilo_m7440_rfkill_ops diff --git a/drivers/platform/x86/classmate-laptop.c b/drivers/platform/x86/classmate-laptop.c index 36e5e6c13db4..6dfa8d3b4eec 100644 --- a/drivers/platform/x86/classmate-laptop.c +++ b/drivers/platform/x86/classmate-laptop.c @@ -590,7 +590,7 @@ static ssize_t cmpc_accel_sensitivity_store(struct device *dev, inputdev = dev_get_drvdata(&acpi->dev); accel = dev_get_drvdata(&inputdev->dev); - r = strict_strtoul(buf, 0, &sensitivity); + r = kstrtoul(buf, 0, &sensitivity); if (r) return r; diff --git a/drivers/platform/x86/compal-laptop.c b/drivers/platform/x86/compal-laptop.c index 475cc5242511..eaa78edb1f4e 100644 --- a/drivers/platform/x86/compal-laptop.c +++ b/drivers/platform/x86/compal-laptop.c @@ -425,7 +425,8 @@ static ssize_t pwm_enable_store(struct device *dev, struct compal_data *data = dev_get_drvdata(dev); long val; int err; - err = strict_strtol(buf, 10, &val); + + err = kstrtol(buf, 10, &val); if (err) return err; if (val < 0) @@ -463,7 +464,8 @@ static ssize_t pwm_store(struct device *dev, struct device_attribute *attr, struct compal_data *data = dev_get_drvdata(dev); long val; int err; - err = strict_strtol(buf, 10, &val); + + err = kstrtol(buf, 10, &val); if (err) return err; if (val < 0 || val > 255) @@ -1081,7 +1083,6 @@ static int compal_remove(struct platform_device *pdev) hwmon_device_unregister(data->hwmon_dev); power_supply_unregister(&data->psy); - platform_set_drvdata(pdev, NULL); kfree(data); sysfs_remove_group(&pdev->dev.kobj, &compal_attribute_group); diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index d6970f47ae72..1c86fa0857c8 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -725,7 +725,7 @@ static int hp_wmi_rfkill_setup(struct platform_device *device) (void *) HPWMI_WWAN); if (!wwan_rfkill) { err = -ENOMEM; - goto register_gps_error; + goto register_bluetooth_error; } rfkill_init_sw_state(wwan_rfkill, hp_wmi_get_sw_state(HPWMI_WWAN)); @@ -733,7 +733,7 @@ static int hp_wmi_rfkill_setup(struct platform_device *device) hp_wmi_get_hw_state(HPWMI_WWAN)); err = rfkill_register(wwan_rfkill); if (err) - goto register_wwan_err; + goto register_wwan_error; } if (wireless & 0x8) { @@ -743,7 +743,7 @@ static int hp_wmi_rfkill_setup(struct platform_device *device) (void *) HPWMI_GPS); if (!gps_rfkill) { err = -ENOMEM; - goto register_bluetooth_error; + goto register_wwan_error; } rfkill_init_sw_state(gps_rfkill, hp_wmi_get_sw_state(HPWMI_GPS)); @@ -755,16 +755,16 @@ static int hp_wmi_rfkill_setup(struct platform_device *device) } return 0; -register_wwan_err: - rfkill_destroy(wwan_rfkill); - wwan_rfkill = NULL; - if (gps_rfkill) - rfkill_unregister(gps_rfkill); register_gps_error: rfkill_destroy(gps_rfkill); gps_rfkill = NULL; if (bluetooth_rfkill) rfkill_unregister(bluetooth_rfkill); +register_wwan_error: + rfkill_destroy(wwan_rfkill); + wwan_rfkill = NULL; + if (gps_rfkill) + rfkill_unregister(gps_rfkill); register_bluetooth_error: rfkill_destroy(bluetooth_rfkill); bluetooth_rfkill = NULL; diff --git a/drivers/platform/x86/intel-rst.c b/drivers/platform/x86/intel-rst.c index 9385afd9b558..41b740cb28bc 100644 --- a/drivers/platform/x86/intel-rst.c +++ b/drivers/platform/x86/intel-rst.c @@ -193,17 +193,6 @@ static struct acpi_driver irst_driver = { }, }; -static int irst_init(void) -{ - return acpi_bus_register_driver(&irst_driver); -} - -static void irst_exit(void) -{ - acpi_bus_unregister_driver(&irst_driver); -} - -module_init(irst_init); -module_exit(irst_exit); +module_acpi_driver(irst_driver); MODULE_DEVICE_TABLE(acpi, irst_ids); diff --git a/drivers/platform/x86/intel-smartconnect.c b/drivers/platform/x86/intel-smartconnect.c index f74e93d096bc..52259dcabecb 100644 --- a/drivers/platform/x86/intel-smartconnect.c +++ b/drivers/platform/x86/intel-smartconnect.c @@ -74,17 +74,6 @@ static struct acpi_driver smartconnect_driver = { }, }; -static int smartconnect_init(void) -{ - return acpi_bus_register_driver(&smartconnect_driver); -} - -static void smartconnect_exit(void) -{ - acpi_bus_unregister_driver(&smartconnect_driver); -} - -module_init(smartconnect_init); -module_exit(smartconnect_exit); +module_acpi_driver(smartconnect_driver); MODULE_DEVICE_TABLE(acpi, smartconnect_ids); diff --git a/drivers/platform/x86/intel_mid_powerbtn.c b/drivers/platform/x86/intel_mid_powerbtn.c index f59683aa13d5..6b18aba82cfa 100644 --- a/drivers/platform/x86/intel_mid_powerbtn.c +++ b/drivers/platform/x86/intel_mid_powerbtn.c @@ -128,7 +128,6 @@ static int mfld_pb_remove(struct platform_device *pdev) free_irq(irq, input); input_unregister_device(input); - platform_set_drvdata(pdev, NULL); return 0; } diff --git a/drivers/platform/x86/intel_mid_thermal.c b/drivers/platform/x86/intel_mid_thermal.c index 81c491e74b34..93fab8b70ce1 100644 --- a/drivers/platform/x86/intel_mid_thermal.c +++ b/drivers/platform/x86/intel_mid_thermal.c @@ -542,7 +542,6 @@ static int mid_thermal_remove(struct platform_device *pdev) } kfree(pinfo); - platform_set_drvdata(pdev, NULL); /* Stop the ADC */ return configure_adc(0); diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c index 984253da365d..10d12b221601 100644 --- a/drivers/platform/x86/panasonic-laptop.c +++ b/drivers/platform/x86/panasonic-laptop.c @@ -643,23 +643,6 @@ out_hotkey: return result; } -static int __init acpi_pcc_init(void) -{ - int result = 0; - - if (acpi_disabled) - return -ENODEV; - - result = acpi_bus_register_driver(&acpi_pcc_driver); - if (result < 0) { - ACPI_DEBUG_PRINT((ACPI_DB_ERROR, - "Error registering hotkey driver\n")); - return -ENODEV; - } - - return 0; -} - static int acpi_pcc_hotkey_remove(struct acpi_device *device) { struct pcc_acpi *pcc = acpi_driver_data(device); @@ -679,10 +662,4 @@ static int acpi_pcc_hotkey_remove(struct acpi_device *device) return 0; } -static void __exit acpi_pcc_exit(void) -{ - acpi_bus_unregister_driver(&acpi_pcc_driver); -} - -module_init(acpi_pcc_init); -module_exit(acpi_pcc_exit); +module_acpi_driver(acpi_pcc_driver); diff --git a/drivers/platform/x86/samsung-q10.c b/drivers/platform/x86/samsung-q10.c index 4430b8c1369d..cae7098e9b0d 100644 --- a/drivers/platform/x86/samsung-q10.c +++ b/drivers/platform/x86/samsung-q10.c @@ -14,16 +14,12 @@ #include <linux/init.h> #include <linux/platform_device.h> #include <linux/backlight.h> -#include <linux/i8042.h> #include <linux/dmi.h> +#include <acpi/acpi_drivers.h> -#define SAMSUNGQ10_BL_MAX_INTENSITY 255 -#define SAMSUNGQ10_BL_DEFAULT_INTENSITY 185 +#define SAMSUNGQ10_BL_MAX_INTENSITY 7 -#define SAMSUNGQ10_BL_8042_CMD 0xbe -#define SAMSUNGQ10_BL_8042_DATA { 0x89, 0x91 } - -static int samsungq10_bl_brightness; +static acpi_handle ec_handle; static bool force; module_param(force, bool, 0); @@ -33,21 +29,26 @@ MODULE_PARM_DESC(force, static int samsungq10_bl_set_intensity(struct backlight_device *bd) { - int brightness = bd->props.brightness; - unsigned char c[3] = SAMSUNGQ10_BL_8042_DATA; + acpi_status status; + int i; - c[2] = (unsigned char)brightness; - i8042_lock_chip(); - i8042_command(c, (0x30 << 8) | SAMSUNGQ10_BL_8042_CMD); - i8042_unlock_chip(); - samsungq10_bl_brightness = brightness; + for (i = 0; i < SAMSUNGQ10_BL_MAX_INTENSITY; i++) { + status = acpi_evaluate_object(ec_handle, "_Q63", NULL, NULL); + if (ACPI_FAILURE(status)) + return -EIO; + } + for (i = 0; i < bd->props.brightness; i++) { + status = acpi_evaluate_object(ec_handle, "_Q64", NULL, NULL); + if (ACPI_FAILURE(status)) + return -EIO; + } return 0; } static int samsungq10_bl_get_intensity(struct backlight_device *bd) { - return samsungq10_bl_brightness; + return bd->props.brightness; } static const struct backlight_ops samsungq10_bl_ops = { @@ -55,28 +56,6 @@ static const struct backlight_ops samsungq10_bl_ops = { .update_status = samsungq10_bl_set_intensity, }; -#ifdef CONFIG_PM_SLEEP -static int samsungq10_suspend(struct device *dev) -{ - return 0; -} - -static int samsungq10_resume(struct device *dev) -{ - - struct backlight_device *bd = dev_get_drvdata(dev); - - samsungq10_bl_set_intensity(bd); - return 0; -} -#else -#define samsungq10_suspend NULL -#define samsungq10_resume NULL -#endif - -static SIMPLE_DEV_PM_OPS(samsungq10_pm_ops, - samsungq10_suspend, samsungq10_resume); - static int samsungq10_probe(struct platform_device *pdev) { @@ -93,9 +72,6 @@ static int samsungq10_probe(struct platform_device *pdev) platform_set_drvdata(pdev, bd); - bd->props.brightness = SAMSUNGQ10_BL_DEFAULT_INTENSITY; - samsungq10_bl_set_intensity(bd); - return 0; } @@ -104,9 +80,6 @@ static int samsungq10_remove(struct platform_device *pdev) struct backlight_device *bd = platform_get_drvdata(pdev); - bd->props.brightness = SAMSUNGQ10_BL_DEFAULT_INTENSITY; - samsungq10_bl_set_intensity(bd); - backlight_device_unregister(bd); return 0; @@ -116,7 +89,6 @@ static struct platform_driver samsungq10_driver = { .driver = { .name = KBUILD_MODNAME, .owner = THIS_MODULE, - .pm = &samsungq10_pm_ops, }, .probe = samsungq10_probe, .remove = samsungq10_remove, @@ -172,6 +144,11 @@ static int __init samsungq10_init(void) if (!force && !dmi_check_system(samsungq10_dmi_table)) return -ENODEV; + ec_handle = ec_get_handle(); + + if (!ec_handle) + return -ENODEV; + samsungq10_device = platform_create_bundle(&samsungq10_driver, samsungq10_probe, NULL, 0, NULL, 0); diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index be67e5e28d18..03ca6c139f1a 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -369,7 +369,7 @@ struct tpacpi_led_classdev { struct led_classdev led_classdev; struct work_struct work; enum led_status_t new_state; - unsigned int led; + int led; }; /* brightness level capabilities */ @@ -5296,6 +5296,16 @@ static int __init led_init(struct ibm_init_struct *iibm) led_supported = led_init_detect_mode(); + if (led_supported != TPACPI_LED_NONE) { + useful_leds = tpacpi_check_quirks(led_useful_qtable, + ARRAY_SIZE(led_useful_qtable)); + + if (!useful_leds) { + led_handle = NULL; + led_supported = TPACPI_LED_NONE; + } + } + vdbg_printk(TPACPI_DBG_INIT, "LED commands are %s, mode %d\n", str_supported(led_supported), led_supported); @@ -5309,10 +5319,9 @@ static int __init led_init(struct ibm_init_struct *iibm) return -ENOMEM; } - useful_leds = tpacpi_check_quirks(led_useful_qtable, - ARRAY_SIZE(led_useful_qtable)); - for (i = 0; i < TPACPI_LED_NUMLEDS; i++) { + tpacpi_leds[i].led = -1; + if (!tpacpi_is_led_restricted(i) && test_bit(i, &useful_leds)) { rc = tpacpi_init_led(i); @@ -5370,9 +5379,13 @@ static int led_write(char *buf) return -ENODEV; while ((cmd = next_cmd(&buf))) { - if (sscanf(cmd, "%d", &led) != 1 || led < 0 || led > 15) + if (sscanf(cmd, "%d", &led) != 1) return -EINVAL; + if (led < 0 || led > (TPACPI_LED_NUMLEDS - 1) || + tpacpi_leds[led].led < 0) + return -ENODEV; + if (strstr(cmd, "off")) { s = TPACPI_LED_OFF; } else if (strstr(cmd, "on")) { diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 6e02c953d888..601ea9512242 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -780,7 +780,7 @@ static bool guid_already_parsed(const char *guid_string) /* * Parse the _WDG method for the GUID data blocks */ -static acpi_status parse_wdg(acpi_handle handle) +static int parse_wdg(acpi_handle handle) { struct acpi_buffer out = {ACPI_ALLOCATE_BUFFER, NULL}; union acpi_object *obj; @@ -812,7 +812,7 @@ static acpi_status parse_wdg(acpi_handle handle) wblock = kzalloc(sizeof(struct wmi_block), GFP_KERNEL); if (!wblock) - return AE_NO_MEMORY; + return -ENOMEM; wblock->handle = handle; wblock->gblock = gblock[i]; diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig index bb49ab684f9a..e6f92b450913 100644 --- a/drivers/power/Kconfig +++ b/drivers/power/Kconfig @@ -269,7 +269,6 @@ config CHARGER_ISP1704 config CHARGER_MAX8903 tristate "MAX8903 Battery DC-DC Charger for USB and Adapter Power" - depends on GENERIC_HARDIRQS help Say Y to enable support for the MAX8903 DC-DC charger and sysfs. The driver supports controlling charger-enable and current-limit @@ -370,7 +369,7 @@ config AB8500_BM config BATTERY_GOLDFISH tristate "Goldfish battery driver" - depends on GENERIC_HARDIRQS && (GOLDFISH || COMPILE_TEST) + depends on GOLDFISH || COMPILE_TEST help Say Y to enable support for the battery and AC power in the Goldfish emulator. diff --git a/drivers/pps/clients/Kconfig b/drivers/pps/clients/Kconfig index 6efd9b60d8ff..0c9f2805d076 100644 --- a/drivers/pps/clients/Kconfig +++ b/drivers/pps/clients/Kconfig @@ -31,7 +31,7 @@ config PPS_CLIENT_PARPORT config PPS_CLIENT_GPIO tristate "PPS client using GPIO" - depends on PPS && GENERIC_HARDIRQS + depends on PPS help If you say yes here you get support for a PPS source using GPIO. To be useful you must also register a platform device diff --git a/drivers/scsi/aic7xxx/aic7xxx_pci.c b/drivers/scsi/aic7xxx/aic7xxx_pci.c index 6917b4f5ac9e..22d5a949ec83 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_pci.c +++ b/drivers/scsi/aic7xxx/aic7xxx_pci.c @@ -692,7 +692,7 @@ ahc_find_pci_device(ahc_dev_softc_t pci) * ID as valid. */ if (ahc_get_pci_function(pci) > 0 - && ahc_9005_subdevinfo_valid(vendor, device, subvendor, subdevice) + && ahc_9005_subdevinfo_valid(device, vendor, subdevice, subvendor) && SUBID_9005_MFUNCENB(subdevice) == 0) return (NULL); diff --git a/drivers/scsi/esas2r/esas2r_flash.c b/drivers/scsi/esas2r/esas2r_flash.c index 8582929b1fef..2ec3c23275b8 100644 --- a/drivers/scsi/esas2r/esas2r_flash.c +++ b/drivers/scsi/esas2r/esas2r_flash.c @@ -860,8 +860,13 @@ bool esas2r_process_fs_ioctl(struct esas2r_adapter *a, return false; } + if (fsc->command >= cmdcnt) { + fs->status = ATTO_STS_INV_FUNC; + return false; + } + func = cmd_to_fls_func[fsc->command]; - if (fsc->command >= cmdcnt || func == 0xFF) { + if (func == 0xFF) { fs->status = ATTO_STS_INV_FUNC; return false; } @@ -1355,7 +1360,7 @@ void esas2r_nvram_set_defaults(struct esas2r_adapter *a) u32 time = jiffies_to_msecs(jiffies); esas2r_lock_clear_flags(&a->flags, AF_NVR_VALID); - memcpy(n, &default_sas_nvram, sizeof(struct esas2r_sas_nvram)); + *n = default_sas_nvram; n->sas_addr[3] |= 0x0F; n->sas_addr[4] = HIBYTE(LOWORD(time)); n->sas_addr[5] = LOBYTE(LOWORD(time)); @@ -1373,7 +1378,7 @@ void esas2r_nvram_get_defaults(struct esas2r_adapter *a, * address out first. */ memcpy(&sas_addr[0], a->nvram->sas_addr, 8); - memcpy(nvram, &default_sas_nvram, sizeof(struct esas2r_sas_nvram)); + *nvram = default_sas_nvram; memcpy(&nvram->sas_addr[0], &sas_addr[0], 8); } diff --git a/drivers/scsi/esas2r/esas2r_init.c b/drivers/scsi/esas2r/esas2r_init.c index 3a798e7d5c56..da1869df2408 100644 --- a/drivers/scsi/esas2r/esas2r_init.c +++ b/drivers/scsi/esas2r/esas2r_init.c @@ -665,7 +665,7 @@ void esas2r_kill_adapter(int i) int esas2r_cleanup(struct Scsi_Host *host) { - struct esas2r_adapter *a = (struct esas2r_adapter *)host->hostdata; + struct esas2r_adapter *a; int index; if (host == NULL) { @@ -678,6 +678,7 @@ int esas2r_cleanup(struct Scsi_Host *host) } esas2r_debug("esas2r_cleanup called for host %p", host); + a = (struct esas2r_adapter *)host->hostdata; index = a->index; esas2r_kill_adapter(index); return index; @@ -808,7 +809,7 @@ static void esas2r_init_pci_cfg_space(struct esas2r_adapter *a) int pcie_cap_reg; pcie_cap_reg = pci_find_capability(a->pcid, PCI_CAP_ID_EXP); - if (0xffff && pcie_cap_reg) { + if (0xffff & pcie_cap_reg) { u16 devcontrol; pci_read_config_word(a->pcid, pcie_cap_reg + PCI_EXP_DEVCTL, @@ -1550,8 +1551,7 @@ void esas2r_reset_chip(struct esas2r_adapter *a) * to not overwrite a previous crash that was saved. */ if ((a->flags2 & AF2_COREDUMP_AVAIL) - && !(a->flags2 & AF2_COREDUMP_SAVED) - && a->fw_coredump_buff) { + && !(a->flags2 & AF2_COREDUMP_SAVED)) { esas2r_read_mem_block(a, a->fw_coredump_buff, MW_DATA_ADDR_SRAM + 0x80000, diff --git a/drivers/scsi/esas2r/esas2r_ioctl.c b/drivers/scsi/esas2r/esas2r_ioctl.c index f3d0cb885972..e5b09027e066 100644 --- a/drivers/scsi/esas2r/esas2r_ioctl.c +++ b/drivers/scsi/esas2r/esas2r_ioctl.c @@ -415,7 +415,7 @@ static int csmi_ioctl_callback(struct esas2r_adapter *a, lun = tm->lun; } - if (path > 0 || tid > ESAS2R_MAX_ID) { + if (path > 0) { rq->func_rsp.ioctl_rsp.csmi.csmi_status = cpu_to_le32( CSMI_STS_INV_PARAM); return false; diff --git a/drivers/scsi/esas2r/esas2r_vda.c b/drivers/scsi/esas2r/esas2r_vda.c index f8ec6d636846..fd1392879647 100644 --- a/drivers/scsi/esas2r/esas2r_vda.c +++ b/drivers/scsi/esas2r/esas2r_vda.c @@ -302,6 +302,7 @@ static void esas2r_complete_vda_ioctl(struct esas2r_adapter *a, if (vi->cmd.cfg.cfg_func == VDA_CFG_GET_INIT) { struct atto_ioctl_vda_cfg_cmd *cfg = &vi->cmd.cfg; struct atto_vda_cfg_rsp *rsp = &rq->func_rsp.cfg_rsp; + char buf[sizeof(cfg->data.init.fw_release) + 1]; cfg->data_length = cpu_to_le32(sizeof(struct atto_vda_cfg_init)); @@ -309,11 +310,13 @@ static void esas2r_complete_vda_ioctl(struct esas2r_adapter *a, le32_to_cpu(rsp->vda_version); cfg->data.init.fw_build = rsp->fw_build; - sprintf((char *)&cfg->data.init.fw_release, - "%1d.%02d", + snprintf(buf, sizeof(buf), "%1d.%02d", (int)LOBYTE(le16_to_cpu(rsp->fw_release)), (int)HIBYTE(le16_to_cpu(rsp->fw_release))); + memcpy(&cfg->data.init.fw_release, buf, + sizeof(cfg->data.init.fw_release)); + if (LOWORD(LOBYTE(cfg->data.init.fw_build)) == 'A') cfg->data.init.fw_version = cfg->data.init.fw_build; diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h index c18c68150e9f..e4dd3d7cd236 100644 --- a/drivers/scsi/fnic/fnic.h +++ b/drivers/scsi/fnic/fnic.h @@ -43,6 +43,8 @@ #define DFX DRV_NAME "%d: " #define DESC_CLEAN_LOW_WATERMARK 8 +#define FNIC_UCSM_DFLT_THROTTLE_CNT_BLD 16 /* UCSM default throttle count */ +#define FNIC_MIN_IO_REQ 256 /* Min IO throttle count */ #define FNIC_MAX_IO_REQ 2048 /* scsi_cmnd tag map entries */ #define FNIC_IO_LOCKS 64 /* IO locks: power of 2 */ #define FNIC_DFLT_QUEUE_DEPTH 32 @@ -154,6 +156,9 @@ do { \ FNIC_CHECK_LOGGING(FNIC_ISR_LOGGING, \ shost_printk(kern_level, host, fmt, ##args);) +#define FNIC_MAIN_NOTE(kern_level, host, fmt, args...) \ + shost_printk(kern_level, host, fmt, ##args) + extern const char *fnic_state_str[]; enum fnic_intx_intr_index { @@ -215,10 +220,12 @@ struct fnic { struct vnic_stats *stats; unsigned long stats_time; /* time of stats update */ + unsigned long stats_reset_time; /* time of stats reset */ struct vnic_nic_cfg *nic_cfg; char name[IFNAMSIZ]; struct timer_list notify_timer; /* used for MSI interrupts */ + unsigned int fnic_max_tag_id; unsigned int err_intr_offset; unsigned int link_intr_offset; @@ -359,4 +366,5 @@ fnic_chk_state_flags_locked(struct fnic *fnic, unsigned long st_flags) return ((fnic->state_flags & st_flags) == st_flags); } void __fnic_set_state_flags(struct fnic *, unsigned long, unsigned long); +void fnic_dump_fchost_stats(struct Scsi_Host *, struct fc_host_statistics *); #endif /* _FNIC_H_ */ diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c index 42e15ee6e1bb..bbf81ea3a252 100644 --- a/drivers/scsi/fnic/fnic_main.c +++ b/drivers/scsi/fnic/fnic_main.c @@ -74,6 +74,10 @@ module_param(fnic_trace_max_pages, uint, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(fnic_trace_max_pages, "Total allocated memory pages " "for fnic trace buffer"); +static unsigned int fnic_max_qdepth = FNIC_DFLT_QUEUE_DEPTH; +module_param(fnic_max_qdepth, uint, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(fnic_max_qdepth, "Queue depth to report for each LUN"); + static struct libfc_function_template fnic_transport_template = { .frame_send = fnic_send, .lport_set_port_id = fnic_set_port_id, @@ -91,7 +95,7 @@ static int fnic_slave_alloc(struct scsi_device *sdev) if (!rport || fc_remote_port_chkready(rport)) return -ENXIO; - scsi_activate_tcq(sdev, FNIC_DFLT_QUEUE_DEPTH); + scsi_activate_tcq(sdev, fnic_max_qdepth); return 0; } @@ -126,6 +130,7 @@ fnic_set_rport_dev_loss_tmo(struct fc_rport *rport, u32 timeout) static void fnic_get_host_speed(struct Scsi_Host *shost); static struct scsi_transport_template *fnic_fc_transport; static struct fc_host_statistics *fnic_get_stats(struct Scsi_Host *); +static void fnic_reset_host_stats(struct Scsi_Host *); static struct fc_function_template fnic_fc_functions = { @@ -153,6 +158,7 @@ static struct fc_function_template fnic_fc_functions = { .set_rport_dev_loss_tmo = fnic_set_rport_dev_loss_tmo, .issue_fc_host_lip = fnic_reset, .get_fc_host_stats = fnic_get_stats, + .reset_fc_host_stats = fnic_reset_host_stats, .dd_fcrport_size = sizeof(struct fc_rport_libfc_priv), .terminate_rport_io = fnic_terminate_rport_io, .bsg_request = fc_lport_bsg_request, @@ -206,13 +212,116 @@ static struct fc_host_statistics *fnic_get_stats(struct Scsi_Host *host) stats->error_frames = vs->tx.tx_errors + vs->rx.rx_errors; stats->dumped_frames = vs->tx.tx_drops + vs->rx.rx_drop; stats->invalid_crc_count = vs->rx.rx_crc_errors; - stats->seconds_since_last_reset = (jiffies - lp->boot_time) / HZ; + stats->seconds_since_last_reset = + (jiffies - fnic->stats_reset_time) / HZ; stats->fcp_input_megabytes = div_u64(fnic->fcp_input_bytes, 1000000); stats->fcp_output_megabytes = div_u64(fnic->fcp_output_bytes, 1000000); return stats; } +/* + * fnic_dump_fchost_stats + * note : dumps fc_statistics into system logs + */ +void fnic_dump_fchost_stats(struct Scsi_Host *host, + struct fc_host_statistics *stats) +{ + FNIC_MAIN_NOTE(KERN_NOTICE, host, + "fnic: seconds since last reset = %llu\n", + stats->seconds_since_last_reset); + FNIC_MAIN_NOTE(KERN_NOTICE, host, + "fnic: tx frames = %llu\n", + stats->tx_frames); + FNIC_MAIN_NOTE(KERN_NOTICE, host, + "fnic: tx words = %llu\n", + stats->tx_words); + FNIC_MAIN_NOTE(KERN_NOTICE, host, + "fnic: rx frames = %llu\n", + stats->rx_frames); + FNIC_MAIN_NOTE(KERN_NOTICE, host, + "fnic: rx words = %llu\n", + stats->rx_words); + FNIC_MAIN_NOTE(KERN_NOTICE, host, + "fnic: lip count = %llu\n", + stats->lip_count); + FNIC_MAIN_NOTE(KERN_NOTICE, host, + "fnic: nos count = %llu\n", + stats->nos_count); + FNIC_MAIN_NOTE(KERN_NOTICE, host, + "fnic: error frames = %llu\n", + stats->error_frames); + FNIC_MAIN_NOTE(KERN_NOTICE, host, + "fnic: dumped frames = %llu\n", + stats->dumped_frames); + FNIC_MAIN_NOTE(KERN_NOTICE, host, + "fnic: link failure count = %llu\n", + stats->link_failure_count); + FNIC_MAIN_NOTE(KERN_NOTICE, host, + "fnic: loss of sync count = %llu\n", + stats->loss_of_sync_count); + FNIC_MAIN_NOTE(KERN_NOTICE, host, + "fnic: loss of signal count = %llu\n", + stats->loss_of_signal_count); + FNIC_MAIN_NOTE(KERN_NOTICE, host, + "fnic: prim seq protocol err count = %llu\n", + stats->prim_seq_protocol_err_count); + FNIC_MAIN_NOTE(KERN_NOTICE, host, + "fnic: invalid tx word count= %llu\n", + stats->invalid_tx_word_count); + FNIC_MAIN_NOTE(KERN_NOTICE, host, + "fnic: invalid crc count = %llu\n", + stats->invalid_crc_count); + FNIC_MAIN_NOTE(KERN_NOTICE, host, + "fnic: fcp input requests = %llu\n", + stats->fcp_input_requests); + FNIC_MAIN_NOTE(KERN_NOTICE, host, + "fnic: fcp output requests = %llu\n", + stats->fcp_output_requests); + FNIC_MAIN_NOTE(KERN_NOTICE, host, + "fnic: fcp control requests = %llu\n", + stats->fcp_control_requests); + FNIC_MAIN_NOTE(KERN_NOTICE, host, + "fnic: fcp input megabytes = %llu\n", + stats->fcp_input_megabytes); + FNIC_MAIN_NOTE(KERN_NOTICE, host, + "fnic: fcp output megabytes = %llu\n", + stats->fcp_output_megabytes); + return; +} + +/* + * fnic_reset_host_stats : clears host stats + * note : called when reset_statistics set under sysfs dir + */ +static void fnic_reset_host_stats(struct Scsi_Host *host) +{ + int ret; + struct fc_lport *lp = shost_priv(host); + struct fnic *fnic = lport_priv(lp); + struct fc_host_statistics *stats; + unsigned long flags; + + /* dump current stats, before clearing them */ + stats = fnic_get_stats(host); + fnic_dump_fchost_stats(host, stats); + + spin_lock_irqsave(&fnic->fnic_lock, flags); + ret = vnic_dev_stats_clear(fnic->vdev); + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + + if (ret) { + FNIC_MAIN_DBG(KERN_DEBUG, fnic->lport->host, + "fnic: Reset vnic stats failed" + " 0x%x", ret); + return; + } + fnic->stats_reset_time = jiffies; + memset(stats, 0, sizeof(*stats)); + + return; +} + void fnic_log_q_error(struct fnic *fnic) { unsigned int i; @@ -447,13 +556,6 @@ static int fnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) host->transportt = fnic_fc_transport; - err = scsi_init_shared_tag_map(host, FNIC_MAX_IO_REQ); - if (err) { - shost_printk(KERN_ERR, fnic->lport->host, - "Unable to alloc shared tag map\n"); - goto err_out_free_hba; - } - /* Setup PCI resources */ pci_set_drvdata(pdev, fnic); @@ -476,10 +578,10 @@ static int fnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_master(pdev); /* Query PCI controller on system for DMA addressing - * limitation for the device. Try 40-bit first, and + * limitation for the device. Try 64-bit first, and * fail to 32-bit. */ - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40)); + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); if (err) { err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { @@ -496,10 +598,10 @@ static int fnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_release_regions; } } else { - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); if (err) { shost_printk(KERN_ERR, fnic->lport->host, - "Unable to obtain 40-bit DMA " + "Unable to obtain 64-bit DMA " "for consistent allocations, aborting.\n"); goto err_out_release_regions; } @@ -566,6 +668,22 @@ static int fnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) "aborting.\n"); goto err_out_dev_close; } + + /* Configure Maximum Outstanding IO reqs*/ + if (fnic->config.io_throttle_count != FNIC_UCSM_DFLT_THROTTLE_CNT_BLD) { + host->can_queue = min_t(u32, FNIC_MAX_IO_REQ, + max_t(u32, FNIC_MIN_IO_REQ, + fnic->config.io_throttle_count)); + } + fnic->fnic_max_tag_id = host->can_queue; + + err = scsi_init_shared_tag_map(host, fnic->fnic_max_tag_id); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "Unable to alloc shared tag map\n"); + goto err_out_dev_close; + } + host->max_lun = fnic->config.luns_per_tgt; host->max_id = FNIC_MAX_FCP_TARGET; host->max_cmd_len = FCOE_MAX_CMD_LEN; @@ -719,6 +837,7 @@ static int fnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } fc_lport_init_stats(lp); + fnic->stats_reset_time = jiffies; fc_lport_config(lp); diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index a97e6e584f8c..d014aae19134 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -111,6 +111,12 @@ static inline spinlock_t *fnic_io_lock_hash(struct fnic *fnic, return &fnic->io_req_lock[hash]; } +static inline spinlock_t *fnic_io_lock_tag(struct fnic *fnic, + int tag) +{ + return &fnic->io_req_lock[tag & (FNIC_IO_LOCKS - 1)]; +} + /* * Unmap the data buffer and sense buffer for an io_req, * also unmap and free the device-private scatter/gather list. @@ -730,7 +736,7 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic, fcpio_tag_id_dec(&tag, &id); icmnd_cmpl = &desc->u.icmnd_cmpl; - if (id >= FNIC_MAX_IO_REQ) { + if (id >= fnic->fnic_max_tag_id) { shost_printk(KERN_ERR, fnic->lport->host, "Tag out of range tag %x hdr status = %s\n", id, fnic_fcpio_status_to_str(hdr_status)); @@ -818,38 +824,6 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic, if (icmnd_cmpl->flags & FCPIO_ICMND_CMPL_RESID_UNDER) xfer_len -= icmnd_cmpl->residual; - /* - * If queue_full, then try to reduce queue depth for all - * LUNS on the target. Todo: this should be accompanied - * by a periodic queue_depth rampup based on successful - * IO completion. - */ - if (icmnd_cmpl->scsi_status == QUEUE_FULL) { - struct scsi_device *t_sdev; - int qd = 0; - - shost_for_each_device(t_sdev, sc->device->host) { - if (t_sdev->id != sc->device->id) - continue; - - if (t_sdev->queue_depth > 1) { - qd = scsi_track_queue_full - (t_sdev, - t_sdev->queue_depth - 1); - if (qd == -1) - qd = t_sdev->host->cmd_per_lun; - shost_printk(KERN_INFO, - fnic->lport->host, - "scsi[%d:%d:%d:%d" - "] queue full detected," - "new depth = %d\n", - t_sdev->host->host_no, - t_sdev->channel, - t_sdev->id, t_sdev->lun, - t_sdev->queue_depth); - } - } - } break; case FCPIO_TIMEOUT: /* request was timed out */ @@ -939,7 +913,7 @@ static void fnic_fcpio_itmf_cmpl_handler(struct fnic *fnic, fcpio_header_dec(&desc->hdr, &type, &hdr_status, &tag); fcpio_tag_id_dec(&tag, &id); - if ((id & FNIC_TAG_MASK) >= FNIC_MAX_IO_REQ) { + if ((id & FNIC_TAG_MASK) >= fnic->fnic_max_tag_id) { shost_printk(KERN_ERR, fnic->lport->host, "Tag out of range tag %x hdr status = %s\n", id, fnic_fcpio_status_to_str(hdr_status)); @@ -988,9 +962,7 @@ static void fnic_fcpio_itmf_cmpl_handler(struct fnic *fnic, spin_unlock_irqrestore(io_lock, flags); return; } - CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE; CMD_ABTS_STATUS(sc) = hdr_status; - CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_DONE; FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "abts cmpl recd. id %d status %s\n", @@ -1148,23 +1120,25 @@ int fnic_wq_copy_cmpl_handler(struct fnic *fnic, int copy_work_to_do) static void fnic_cleanup_io(struct fnic *fnic, int exclude_id) { - unsigned int i; + int i; struct fnic_io_req *io_req; unsigned long flags = 0; struct scsi_cmnd *sc; spinlock_t *io_lock; unsigned long start_time = 0; - for (i = 0; i < FNIC_MAX_IO_REQ; i++) { + for (i = 0; i < fnic->fnic_max_tag_id; i++) { if (i == exclude_id) continue; + io_lock = fnic_io_lock_tag(fnic, i); + spin_lock_irqsave(io_lock, flags); sc = scsi_host_find_tag(fnic->lport->host, i); - if (!sc) + if (!sc) { + spin_unlock_irqrestore(io_lock, flags); continue; + } - io_lock = fnic_io_lock_hash(fnic, sc); - spin_lock_irqsave(io_lock, flags); io_req = (struct fnic_io_req *)CMD_SP(sc); if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) && !(CMD_FLAGS(sc) & FNIC_DEV_RST_DONE)) { @@ -1236,7 +1210,7 @@ void fnic_wq_copy_cleanup_handler(struct vnic_wq_copy *wq, fcpio_tag_id_dec(&desc->hdr.tag, &id); id &= FNIC_TAG_MASK; - if (id >= FNIC_MAX_IO_REQ) + if (id >= fnic->fnic_max_tag_id) return; sc = scsi_host_find_tag(fnic->lport->host, id); @@ -1340,14 +1314,15 @@ static void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id) if (fnic->in_remove) return; - for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) { + for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) { abt_tag = tag; + io_lock = fnic_io_lock_tag(fnic, tag); + spin_lock_irqsave(io_lock, flags); sc = scsi_host_find_tag(fnic->lport->host, tag); - if (!sc) + if (!sc) { + spin_unlock_irqrestore(io_lock, flags); continue; - - io_lock = fnic_io_lock_hash(fnic, sc); - spin_lock_irqsave(io_lock, flags); + } io_req = (struct fnic_io_req *)CMD_SP(sc); @@ -1441,12 +1416,29 @@ void fnic_terminate_rport_io(struct fc_rport *rport) unsigned long flags; struct scsi_cmnd *sc; struct scsi_lun fc_lun; - struct fc_rport_libfc_priv *rdata = rport->dd_data; - struct fc_lport *lport = rdata->local_port; - struct fnic *fnic = lport_priv(lport); + struct fc_rport_libfc_priv *rdata; + struct fc_lport *lport; + struct fnic *fnic; struct fc_rport *cmd_rport; enum fnic_ioreq_state old_ioreq_state; + if (!rport) { + printk(KERN_ERR "fnic_terminate_rport_io: rport is NULL\n"); + return; + } + rdata = rport->dd_data; + + if (!rdata) { + printk(KERN_ERR "fnic_terminate_rport_io: rdata is NULL\n"); + return; + } + lport = rdata->local_port; + + if (!lport) { + printk(KERN_ERR "fnic_terminate_rport_io: lport is NULL\n"); + return; + } + fnic = lport_priv(lport); FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "fnic_terminate_rport_io called" " wwpn 0x%llx, wwnn0x%llx, rport 0x%p, portid 0x%06x\n", @@ -1456,18 +1448,21 @@ void fnic_terminate_rport_io(struct fc_rport *rport) if (fnic->in_remove) return; - for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) { + for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) { abt_tag = tag; + io_lock = fnic_io_lock_tag(fnic, tag); + spin_lock_irqsave(io_lock, flags); sc = scsi_host_find_tag(fnic->lport->host, tag); - if (!sc) + if (!sc) { + spin_unlock_irqrestore(io_lock, flags); continue; + } cmd_rport = starget_to_rport(scsi_target(sc->device)); - if (rport != cmd_rport) + if (rport != cmd_rport) { + spin_unlock_irqrestore(io_lock, flags); continue; - - io_lock = fnic_io_lock_hash(fnic, sc); - spin_lock_irqsave(io_lock, flags); + } io_req = (struct fnic_io_req *)CMD_SP(sc); @@ -1680,13 +1675,15 @@ int fnic_abort_cmd(struct scsi_cmnd *sc) io_req->abts_done = NULL; /* fw did not complete abort, timed out */ - if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { + if (CMD_ABTS_STATUS(sc) == FCPIO_INVALID_CODE) { spin_unlock_irqrestore(io_lock, flags); CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_TIMED_OUT; ret = FAILED; goto fnic_abort_cmd_end; } + CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE; + /* * firmware completed the abort, check the status, * free the io_req irrespective of failure or success @@ -1784,17 +1781,18 @@ static int fnic_clean_pending_aborts(struct fnic *fnic, DECLARE_COMPLETION_ONSTACK(tm_done); enum fnic_ioreq_state old_ioreq_state; - for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) { + for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) { + io_lock = fnic_io_lock_tag(fnic, tag); + spin_lock_irqsave(io_lock, flags); sc = scsi_host_find_tag(fnic->lport->host, tag); /* * ignore this lun reset cmd or cmds that do not belong to * this lun */ - if (!sc || sc == lr_sc || sc->device != lun_dev) + if (!sc || sc == lr_sc || sc->device != lun_dev) { + spin_unlock_irqrestore(io_lock, flags); continue; - - io_lock = fnic_io_lock_hash(fnic, sc); - spin_lock_irqsave(io_lock, flags); + } io_req = (struct fnic_io_req *)CMD_SP(sc); @@ -1823,6 +1821,11 @@ static int fnic_clean_pending_aborts(struct fnic *fnic, spin_unlock_irqrestore(io_lock, flags); continue; } + + if (io_req->abts_done) + shost_printk(KERN_ERR, fnic->lport->host, + "%s: io_req->abts_done is set state is %s\n", + __func__, fnic_ioreq_state_to_str(CMD_STATE(sc))); old_ioreq_state = CMD_STATE(sc); /* * Any pending IO issued prior to reset is expected to be @@ -1833,11 +1836,6 @@ static int fnic_clean_pending_aborts(struct fnic *fnic, */ CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING; - if (io_req->abts_done) - shost_printk(KERN_ERR, fnic->lport->host, - "%s: io_req->abts_done is set state is %s\n", - __func__, fnic_ioreq_state_to_str(CMD_STATE(sc))); - BUG_ON(io_req->abts_done); abt_tag = tag; @@ -1890,12 +1888,13 @@ static int fnic_clean_pending_aborts(struct fnic *fnic, io_req->abts_done = NULL; /* if abort is still pending with fw, fail */ - if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { + if (CMD_ABTS_STATUS(sc) == FCPIO_INVALID_CODE) { spin_unlock_irqrestore(io_lock, flags); CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_DONE; ret = 1; goto clean_pending_aborts_end; } + CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE; CMD_SP(sc) = NULL; spin_unlock_irqrestore(io_lock, flags); @@ -2093,8 +2092,8 @@ int fnic_device_reset(struct scsi_cmnd *sc) spin_unlock_irqrestore(io_lock, flags); int_to_scsilun(sc->device->lun, &fc_lun); /* - * Issue abort and terminate on the device reset request. - * If q'ing of the abort fails, retry issue it after a delay. + * Issue abort and terminate on device reset request. + * If q'ing of terminate fails, retry it after a delay. */ while (1) { spin_lock_irqsave(io_lock, flags); @@ -2405,7 +2404,7 @@ int fnic_is_abts_pending(struct fnic *fnic, struct scsi_cmnd *lr_sc) lun_dev = lr_sc->device; /* walk again to check, if IOs are still pending in fw */ - for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) { + for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) { sc = scsi_host_find_tag(fnic->lport->host, tag); /* * ignore this lun reset cmd or cmds that do not belong to diff --git a/drivers/scsi/fnic/vnic_scsi.h b/drivers/scsi/fnic/vnic_scsi.h index fbb55364e272..e343e1d0f801 100644 --- a/drivers/scsi/fnic/vnic_scsi.h +++ b/drivers/scsi/fnic/vnic_scsi.h @@ -54,8 +54,8 @@ #define VNIC_FNIC_PLOGI_TIMEOUT_MIN 1000 #define VNIC_FNIC_PLOGI_TIMEOUT_MAX 255000 -#define VNIC_FNIC_IO_THROTTLE_COUNT_MIN 256 -#define VNIC_FNIC_IO_THROTTLE_COUNT_MAX 4096 +#define VNIC_FNIC_IO_THROTTLE_COUNT_MIN 1 +#define VNIC_FNIC_IO_THROTTLE_COUNT_MAX 2048 #define VNIC_FNIC_LINK_DOWN_TIMEOUT_MIN 0 #define VNIC_FNIC_LINK_DOWN_TIMEOUT_MAX 240000 diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index fac8cf5832dd..891c86b66253 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -54,7 +54,7 @@ #include "hpsa.h" /* HPSA_DRIVER_VERSION must be 3 byte values (0-255) separated by '.' */ -#define HPSA_DRIVER_VERSION "2.0.2-1" +#define HPSA_DRIVER_VERSION "3.4.0-1" #define DRIVER_NAME "HP HPSA Driver (v " HPSA_DRIVER_VERSION ")" #define HPSA "hpsa" @@ -89,13 +89,14 @@ static const struct pci_device_id hpsa_pci_device_id[] = { {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3245}, {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3247}, {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3249}, - {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x324a}, - {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x324b}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x324A}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x324B}, {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3233}, {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSF, 0x103C, 0x3350}, {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSF, 0x103C, 0x3351}, {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSF, 0x103C, 0x3352}, {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSF, 0x103C, 0x3353}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSF, 0x103C, 0x334D}, {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSF, 0x103C, 0x3354}, {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSF, 0x103C, 0x3355}, {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSF, 0x103C, 0x3356}, @@ -107,7 +108,19 @@ static const struct pci_device_id hpsa_pci_device_id[] = { {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSH, 0x103C, 0x1925}, {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSH, 0x103C, 0x1926}, {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSH, 0x103C, 0x1928}, - {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSF, 0x103C, 0x334d}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSH, 0x103C, 0x1929}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSI, 0x103C, 0x21BD}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSI, 0x103C, 0x21BE}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSI, 0x103C, 0x21BF}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSI, 0x103C, 0x21C0}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSI, 0x103C, 0x21C1}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSI, 0x103C, 0x21C2}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSI, 0x103C, 0x21C3}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSI, 0x103C, 0x21C4}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSI, 0x103C, 0x21C5}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSI, 0x103C, 0x21C7}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSI, 0x103C, 0x21C8}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSI, 0x103C, 0x21C9}, {PCI_VENDOR_ID_HP, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_STORAGE_RAID << 8, 0xffff << 8, 0}, {0,} @@ -125,24 +138,35 @@ static struct board_type products[] = { {0x3245103C, "Smart Array P410i", &SA5_access}, {0x3247103C, "Smart Array P411", &SA5_access}, {0x3249103C, "Smart Array P812", &SA5_access}, - {0x324a103C, "Smart Array P712m", &SA5_access}, - {0x324b103C, "Smart Array P711m", &SA5_access}, + {0x324A103C, "Smart Array P712m", &SA5_access}, + {0x324B103C, "Smart Array P711m", &SA5_access}, {0x3350103C, "Smart Array P222", &SA5_access}, {0x3351103C, "Smart Array P420", &SA5_access}, {0x3352103C, "Smart Array P421", &SA5_access}, {0x3353103C, "Smart Array P822", &SA5_access}, + {0x334D103C, "Smart Array P822se", &SA5_access}, {0x3354103C, "Smart Array P420i", &SA5_access}, {0x3355103C, "Smart Array P220i", &SA5_access}, {0x3356103C, "Smart Array P721m", &SA5_access}, - {0x1920103C, "Smart Array", &SA5_access}, - {0x1921103C, "Smart Array", &SA5_access}, - {0x1922103C, "Smart Array", &SA5_access}, - {0x1923103C, "Smart Array", &SA5_access}, - {0x1924103C, "Smart Array", &SA5_access}, - {0x1925103C, "Smart Array", &SA5_access}, - {0x1926103C, "Smart Array", &SA5_access}, - {0x1928103C, "Smart Array", &SA5_access}, - {0x334d103C, "Smart Array P822se", &SA5_access}, + {0x1921103C, "Smart Array P830i", &SA5_access}, + {0x1922103C, "Smart Array P430", &SA5_access}, + {0x1923103C, "Smart Array P431", &SA5_access}, + {0x1924103C, "Smart Array P830", &SA5_access}, + {0x1926103C, "Smart Array P731m", &SA5_access}, + {0x1928103C, "Smart Array P230i", &SA5_access}, + {0x1929103C, "Smart Array P530", &SA5_access}, + {0x21BD103C, "Smart Array", &SA5_access}, + {0x21BE103C, "Smart Array", &SA5_access}, + {0x21BF103C, "Smart Array", &SA5_access}, + {0x21C0103C, "Smart Array", &SA5_access}, + {0x21C1103C, "Smart Array", &SA5_access}, + {0x21C2103C, "Smart Array", &SA5_access}, + {0x21C3103C, "Smart Array", &SA5_access}, + {0x21C4103C, "Smart Array", &SA5_access}, + {0x21C5103C, "Smart Array", &SA5_access}, + {0x21C7103C, "Smart Array", &SA5_access}, + {0x21C8103C, "Smart Array", &SA5_access}, + {0x21C9103C, "Smart Array", &SA5_access}, {0xFFFF103C, "Unknown Smart Array", &SA5_access}, }; diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 4e31caa21ddf..23f5ba5e6472 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -2208,7 +2208,10 @@ static int ibmvfc_cancel_all(struct scsi_device *sdev, int type) if (rsp_rc != 0) { sdev_printk(KERN_ERR, sdev, "Failed to send cancel event. rc=%d\n", rsp_rc); - return -EIO; + /* If failure is received, the host adapter is most likely going + through reset, return success so the caller will wait for the command + being cancelled to get returned */ + return 0; } sdev_printk(KERN_INFO, sdev, "Cancelling outstanding commands.\n"); @@ -2221,7 +2224,15 @@ static int ibmvfc_cancel_all(struct scsi_device *sdev, int type) if (status != IBMVFC_MAD_SUCCESS) { sdev_printk(KERN_WARNING, sdev, "Cancel failed with rc=%x\n", status); - return -EIO; + switch (status) { + case IBMVFC_MAD_DRIVER_FAILED: + case IBMVFC_MAD_CRQ_ERROR: + /* Host adapter most likely going through reset, return success to + the caller will wait for the command being cancelled to get returned */ + return 0; + default: + return -EIO; + }; } sdev_printk(KERN_INFO, sdev, "Successfully cancelled outstanding commands\n"); diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index d0fa4b6c551f..fa764406df68 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -241,7 +241,7 @@ static void gather_partition_info(void) struct device_node *rootdn; const char *ppartition_name; - const unsigned int *p_number_ptr; + const __be32 *p_number_ptr; /* Retrieve information about this partition */ rootdn = of_find_node_by_path("/"); @@ -255,7 +255,7 @@ static void gather_partition_info(void) sizeof(partition_name)); p_number_ptr = of_get_property(rootdn, "ibm,partition-no", NULL); if (p_number_ptr) - partition_number = *p_number_ptr; + partition_number = of_read_number(p_number_ptr, 1); of_node_put(rootdn); } @@ -270,10 +270,11 @@ static void set_adapter_info(struct ibmvscsi_host_data *hostdata) strncpy(hostdata->madapter_info.partition_name, partition_name, sizeof(hostdata->madapter_info.partition_name)); - hostdata->madapter_info.partition_number = partition_number; + hostdata->madapter_info.partition_number = + cpu_to_be32(partition_number); - hostdata->madapter_info.mad_version = 1; - hostdata->madapter_info.os_type = 2; + hostdata->madapter_info.mad_version = cpu_to_be32(1); + hostdata->madapter_info.os_type = cpu_to_be32(2); } /** @@ -464,9 +465,9 @@ static int initialize_event_pool(struct event_pool *pool, memset(&evt->crq, 0x00, sizeof(evt->crq)); atomic_set(&evt->free, 1); evt->crq.valid = 0x80; - evt->crq.IU_length = sizeof(*evt->xfer_iu); - evt->crq.IU_data_ptr = pool->iu_token + - sizeof(*evt->xfer_iu) * i; + evt->crq.IU_length = cpu_to_be16(sizeof(*evt->xfer_iu)); + evt->crq.IU_data_ptr = cpu_to_be64(pool->iu_token + + sizeof(*evt->xfer_iu) * i); evt->xfer_iu = pool->iu_storage + i; evt->hostdata = hostdata; evt->ext_list = NULL; @@ -588,7 +589,7 @@ static void init_event_struct(struct srp_event_struct *evt_struct, evt_struct->cmnd_done = NULL; evt_struct->sync_srp = NULL; evt_struct->crq.format = format; - evt_struct->crq.timeout = timeout; + evt_struct->crq.timeout = cpu_to_be16(timeout); evt_struct->done = done; } @@ -659,8 +660,8 @@ static int map_sg_list(struct scsi_cmnd *cmd, int nseg, scsi_for_each_sg(cmd, sg, nseg, i) { struct srp_direct_buf *descr = md + i; - descr->va = sg_dma_address(sg); - descr->len = sg_dma_len(sg); + descr->va = cpu_to_be64(sg_dma_address(sg)); + descr->len = cpu_to_be32(sg_dma_len(sg)); descr->key = 0; total_length += sg_dma_len(sg); } @@ -703,13 +704,14 @@ static int map_sg_data(struct scsi_cmnd *cmd, } indirect->table_desc.va = 0; - indirect->table_desc.len = sg_mapped * sizeof(struct srp_direct_buf); + indirect->table_desc.len = cpu_to_be32(sg_mapped * + sizeof(struct srp_direct_buf)); indirect->table_desc.key = 0; if (sg_mapped <= MAX_INDIRECT_BUFS) { total_length = map_sg_list(cmd, sg_mapped, &indirect->desc_list[0]); - indirect->len = total_length; + indirect->len = cpu_to_be32(total_length); return 1; } @@ -731,9 +733,10 @@ static int map_sg_data(struct scsi_cmnd *cmd, total_length = map_sg_list(cmd, sg_mapped, evt_struct->ext_list); - indirect->len = total_length; - indirect->table_desc.va = evt_struct->ext_list_token; - indirect->table_desc.len = sg_mapped * sizeof(indirect->desc_list[0]); + indirect->len = cpu_to_be32(total_length); + indirect->table_desc.va = cpu_to_be64(evt_struct->ext_list_token); + indirect->table_desc.len = cpu_to_be32(sg_mapped * + sizeof(indirect->desc_list[0])); memcpy(indirect->desc_list, evt_struct->ext_list, MAX_INDIRECT_BUFS * sizeof(struct srp_direct_buf)); return 1; @@ -849,7 +852,7 @@ static int ibmvscsi_send_srp_event(struct srp_event_struct *evt_struct, struct ibmvscsi_host_data *hostdata, unsigned long timeout) { - u64 *crq_as_u64 = (u64 *) &evt_struct->crq; + __be64 *crq_as_u64 = (__be64 *)&evt_struct->crq; int request_status = 0; int rc; int srp_req = 0; @@ -920,8 +923,9 @@ static int ibmvscsi_send_srp_event(struct srp_event_struct *evt_struct, add_timer(&evt_struct->timer); } - if ((rc = - ibmvscsi_send_crq(hostdata, crq_as_u64[0], crq_as_u64[1])) != 0) { + rc = ibmvscsi_send_crq(hostdata, be64_to_cpu(crq_as_u64[0]), + be64_to_cpu(crq_as_u64[1])); + if (rc != 0) { list_del(&evt_struct->list); del_timer(&evt_struct->timer); @@ -987,15 +991,16 @@ static void handle_cmd_rsp(struct srp_event_struct *evt_struct) if (((cmnd->result >> 1) & 0x1f) == CHECK_CONDITION) memcpy(cmnd->sense_buffer, rsp->data, - rsp->sense_data_len); + be32_to_cpu(rsp->sense_data_len)); unmap_cmd_data(&evt_struct->iu.srp.cmd, evt_struct, evt_struct->hostdata->dev); if (rsp->flags & SRP_RSP_FLAG_DOOVER) - scsi_set_resid(cmnd, rsp->data_out_res_cnt); + scsi_set_resid(cmnd, + be32_to_cpu(rsp->data_out_res_cnt)); else if (rsp->flags & SRP_RSP_FLAG_DIOVER) - scsi_set_resid(cmnd, rsp->data_in_res_cnt); + scsi_set_resid(cmnd, be32_to_cpu(rsp->data_in_res_cnt)); } if (evt_struct->cmnd_done) @@ -1037,7 +1042,7 @@ static int ibmvscsi_queuecommand_lck(struct scsi_cmnd *cmnd, memset(srp_cmd, 0x00, SRP_MAX_IU_LEN); srp_cmd->opcode = SRP_CMD; memcpy(srp_cmd->cdb, cmnd->cmnd, sizeof(srp_cmd->cdb)); - srp_cmd->lun = ((u64) lun) << 48; + srp_cmd->lun = cpu_to_be64(((u64)lun) << 48); if (!map_data_for_srp_cmd(cmnd, evt_struct, srp_cmd, hostdata->dev)) { if (!firmware_has_feature(FW_FEATURE_CMO)) @@ -1062,9 +1067,10 @@ static int ibmvscsi_queuecommand_lck(struct scsi_cmnd *cmnd, if ((in_fmt == SRP_DATA_DESC_INDIRECT || out_fmt == SRP_DATA_DESC_INDIRECT) && indirect->table_desc.va == 0) { - indirect->table_desc.va = evt_struct->crq.IU_data_ptr + + indirect->table_desc.va = + cpu_to_be64(be64_to_cpu(evt_struct->crq.IU_data_ptr) + offsetof(struct srp_cmd, add_data) + - offsetof(struct srp_indirect_buf, desc_list); + offsetof(struct srp_indirect_buf, desc_list)); } return ibmvscsi_send_srp_event(evt_struct, hostdata, 0); @@ -1158,7 +1164,7 @@ static void login_rsp(struct srp_event_struct *evt_struct) * request_limit could have been set to -1 by this client. */ atomic_set(&hostdata->request_limit, - evt_struct->xfer_iu->srp.login_rsp.req_lim_delta); + be32_to_cpu(evt_struct->xfer_iu->srp.login_rsp.req_lim_delta)); /* If we had any pending I/Os, kick them */ scsi_unblock_requests(hostdata->host); @@ -1184,8 +1190,9 @@ static int send_srp_login(struct ibmvscsi_host_data *hostdata) login = &evt_struct->iu.srp.login_req; memset(login, 0, sizeof(*login)); login->opcode = SRP_LOGIN_REQ; - login->req_it_iu_len = sizeof(union srp_iu); - login->req_buf_fmt = SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT; + login->req_it_iu_len = cpu_to_be32(sizeof(union srp_iu)); + login->req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | + SRP_BUF_FORMAT_INDIRECT); spin_lock_irqsave(hostdata->host->host_lock, flags); /* Start out with a request limit of 0, since this is negotiated in @@ -1214,12 +1221,13 @@ static void capabilities_rsp(struct srp_event_struct *evt_struct) dev_err(hostdata->dev, "error 0x%X getting capabilities info\n", evt_struct->xfer_iu->mad.capabilities.common.status); } else { - if (hostdata->caps.migration.common.server_support != SERVER_SUPPORTS_CAP) + if (hostdata->caps.migration.common.server_support != + cpu_to_be16(SERVER_SUPPORTS_CAP)) dev_info(hostdata->dev, "Partition migration not supported\n"); if (client_reserve) { if (hostdata->caps.reserve.common.server_support == - SERVER_SUPPORTS_CAP) + cpu_to_be16(SERVER_SUPPORTS_CAP)) dev_info(hostdata->dev, "Client reserve enabled\n"); else dev_info(hostdata->dev, "Client reserve not supported\n"); @@ -1251,9 +1259,9 @@ static void send_mad_capabilities(struct ibmvscsi_host_data *hostdata) req = &evt_struct->iu.mad.capabilities; memset(req, 0, sizeof(*req)); - hostdata->caps.flags = CAP_LIST_SUPPORTED; + hostdata->caps.flags = cpu_to_be32(CAP_LIST_SUPPORTED); if (hostdata->client_migrated) - hostdata->caps.flags |= CLIENT_MIGRATED; + hostdata->caps.flags |= cpu_to_be32(CLIENT_MIGRATED); strncpy(hostdata->caps.name, dev_name(&hostdata->host->shost_gendev), sizeof(hostdata->caps.name)); @@ -1264,22 +1272,31 @@ static void send_mad_capabilities(struct ibmvscsi_host_data *hostdata) strncpy(hostdata->caps.loc, location, sizeof(hostdata->caps.loc)); hostdata->caps.loc[sizeof(hostdata->caps.loc) - 1] = '\0'; - req->common.type = VIOSRP_CAPABILITIES_TYPE; - req->buffer = hostdata->caps_addr; + req->common.type = cpu_to_be32(VIOSRP_CAPABILITIES_TYPE); + req->buffer = cpu_to_be64(hostdata->caps_addr); - hostdata->caps.migration.common.cap_type = MIGRATION_CAPABILITIES; - hostdata->caps.migration.common.length = sizeof(hostdata->caps.migration); - hostdata->caps.migration.common.server_support = SERVER_SUPPORTS_CAP; - hostdata->caps.migration.ecl = 1; + hostdata->caps.migration.common.cap_type = + cpu_to_be32(MIGRATION_CAPABILITIES); + hostdata->caps.migration.common.length = + cpu_to_be16(sizeof(hostdata->caps.migration)); + hostdata->caps.migration.common.server_support = + cpu_to_be16(SERVER_SUPPORTS_CAP); + hostdata->caps.migration.ecl = cpu_to_be32(1); if (client_reserve) { - hostdata->caps.reserve.common.cap_type = RESERVATION_CAPABILITIES; - hostdata->caps.reserve.common.length = sizeof(hostdata->caps.reserve); - hostdata->caps.reserve.common.server_support = SERVER_SUPPORTS_CAP; - hostdata->caps.reserve.type = CLIENT_RESERVE_SCSI_2; - req->common.length = sizeof(hostdata->caps); + hostdata->caps.reserve.common.cap_type = + cpu_to_be32(RESERVATION_CAPABILITIES); + hostdata->caps.reserve.common.length = + cpu_to_be16(sizeof(hostdata->caps.reserve)); + hostdata->caps.reserve.common.server_support = + cpu_to_be16(SERVER_SUPPORTS_CAP); + hostdata->caps.reserve.type = + cpu_to_be32(CLIENT_RESERVE_SCSI_2); + req->common.length = + cpu_to_be16(sizeof(hostdata->caps)); } else - req->common.length = sizeof(hostdata->caps) - sizeof(hostdata->caps.reserve); + req->common.length = cpu_to_be16(sizeof(hostdata->caps) - + sizeof(hostdata->caps.reserve)); spin_lock_irqsave(hostdata->host->host_lock, flags); if (ibmvscsi_send_srp_event(evt_struct, hostdata, info_timeout * 2)) @@ -1297,7 +1314,7 @@ static void send_mad_capabilities(struct ibmvscsi_host_data *hostdata) static void fast_fail_rsp(struct srp_event_struct *evt_struct) { struct ibmvscsi_host_data *hostdata = evt_struct->hostdata; - u8 status = evt_struct->xfer_iu->mad.fast_fail.common.status; + u16 status = be16_to_cpu(evt_struct->xfer_iu->mad.fast_fail.common.status); if (status == VIOSRP_MAD_NOT_SUPPORTED) dev_err(hostdata->dev, "fast_fail not supported in server\n"); @@ -1334,8 +1351,8 @@ static int enable_fast_fail(struct ibmvscsi_host_data *hostdata) fast_fail_mad = &evt_struct->iu.mad.fast_fail; memset(fast_fail_mad, 0, sizeof(*fast_fail_mad)); - fast_fail_mad->common.type = VIOSRP_ENABLE_FAST_FAIL; - fast_fail_mad->common.length = sizeof(*fast_fail_mad); + fast_fail_mad->common.type = cpu_to_be32(VIOSRP_ENABLE_FAST_FAIL); + fast_fail_mad->common.length = cpu_to_be16(sizeof(*fast_fail_mad)); spin_lock_irqsave(hostdata->host->host_lock, flags); rc = ibmvscsi_send_srp_event(evt_struct, hostdata, info_timeout * 2); @@ -1362,15 +1379,15 @@ static void adapter_info_rsp(struct srp_event_struct *evt_struct) "host partition %s (%d), OS %d, max io %u\n", hostdata->madapter_info.srp_version, hostdata->madapter_info.partition_name, - hostdata->madapter_info.partition_number, - hostdata->madapter_info.os_type, - hostdata->madapter_info.port_max_txu[0]); + be32_to_cpu(hostdata->madapter_info.partition_number), + be32_to_cpu(hostdata->madapter_info.os_type), + be32_to_cpu(hostdata->madapter_info.port_max_txu[0])); if (hostdata->madapter_info.port_max_txu[0]) hostdata->host->max_sectors = - hostdata->madapter_info.port_max_txu[0] >> 9; + be32_to_cpu(hostdata->madapter_info.port_max_txu[0]) >> 9; - if (hostdata->madapter_info.os_type == 3 && + if (be32_to_cpu(hostdata->madapter_info.os_type) == 3 && strcmp(hostdata->madapter_info.srp_version, "1.6a") <= 0) { dev_err(hostdata->dev, "host (Ver. %s) doesn't support large transfers\n", hostdata->madapter_info.srp_version); @@ -1379,7 +1396,7 @@ static void adapter_info_rsp(struct srp_event_struct *evt_struct) hostdata->host->sg_tablesize = MAX_INDIRECT_BUFS; } - if (hostdata->madapter_info.os_type == 3) { + if (be32_to_cpu(hostdata->madapter_info.os_type) == 3) { enable_fast_fail(hostdata); return; } @@ -1414,9 +1431,9 @@ static void send_mad_adapter_info(struct ibmvscsi_host_data *hostdata) req = &evt_struct->iu.mad.adapter_info; memset(req, 0x00, sizeof(*req)); - req->common.type = VIOSRP_ADAPTER_INFO_TYPE; - req->common.length = sizeof(hostdata->madapter_info); - req->buffer = hostdata->adapter_info_addr; + req->common.type = cpu_to_be32(VIOSRP_ADAPTER_INFO_TYPE); + req->common.length = cpu_to_be16(sizeof(hostdata->madapter_info)); + req->buffer = cpu_to_be64(hostdata->adapter_info_addr); spin_lock_irqsave(hostdata->host->host_lock, flags); if (ibmvscsi_send_srp_event(evt_struct, hostdata, info_timeout * 2)) @@ -1501,7 +1518,7 @@ static int ibmvscsi_eh_abort_handler(struct scsi_cmnd *cmd) /* Set up an abort SRP command */ memset(tsk_mgmt, 0x00, sizeof(*tsk_mgmt)); tsk_mgmt->opcode = SRP_TSK_MGMT; - tsk_mgmt->lun = ((u64) lun) << 48; + tsk_mgmt->lun = cpu_to_be64(((u64) lun) << 48); tsk_mgmt->tsk_mgmt_func = SRP_TSK_ABORT_TASK; tsk_mgmt->task_tag = (u64) found_evt; @@ -1624,7 +1641,7 @@ static int ibmvscsi_eh_device_reset_handler(struct scsi_cmnd *cmd) /* Set up a lun reset SRP command */ memset(tsk_mgmt, 0x00, sizeof(*tsk_mgmt)); tsk_mgmt->opcode = SRP_TSK_MGMT; - tsk_mgmt->lun = ((u64) lun) << 48; + tsk_mgmt->lun = cpu_to_be64(((u64) lun) << 48); tsk_mgmt->tsk_mgmt_func = SRP_TSK_LUN_RESET; evt->sync_srp = &srp_rsp; @@ -1735,8 +1752,9 @@ static void ibmvscsi_handle_crq(struct viosrp_crq *crq, { long rc; unsigned long flags; + /* The hypervisor copies our tag value here so no byteswapping */ struct srp_event_struct *evt_struct = - (struct srp_event_struct *)crq->IU_data_ptr; + (__force struct srp_event_struct *)crq->IU_data_ptr; switch (crq->valid) { case 0xC0: /* initialization */ switch (crq->format) { @@ -1792,18 +1810,18 @@ static void ibmvscsi_handle_crq(struct viosrp_crq *crq, */ if (!valid_event_struct(&hostdata->pool, evt_struct)) { dev_err(hostdata->dev, "returned correlation_token 0x%p is invalid!\n", - (void *)crq->IU_data_ptr); + evt_struct); return; } if (atomic_read(&evt_struct->free)) { dev_err(hostdata->dev, "received duplicate correlation_token 0x%p!\n", - (void *)crq->IU_data_ptr); + evt_struct); return; } if (crq->format == VIOSRP_SRP_FORMAT) - atomic_add(evt_struct->xfer_iu->srp.rsp.req_lim_delta, + atomic_add(be32_to_cpu(evt_struct->xfer_iu->srp.rsp.req_lim_delta), &hostdata->request_limit); del_timer(&evt_struct->timer); @@ -1856,13 +1874,11 @@ static int ibmvscsi_do_host_config(struct ibmvscsi_host_data *hostdata, /* Set up a lun reset SRP command */ memset(host_config, 0x00, sizeof(*host_config)); - host_config->common.type = VIOSRP_HOST_CONFIG_TYPE; - host_config->common.length = length; - host_config->buffer = addr = dma_map_single(hostdata->dev, buffer, - length, - DMA_BIDIRECTIONAL); + host_config->common.type = cpu_to_be32(VIOSRP_HOST_CONFIG_TYPE); + host_config->common.length = cpu_to_be16(length); + addr = dma_map_single(hostdata->dev, buffer, length, DMA_BIDIRECTIONAL); - if (dma_mapping_error(hostdata->dev, host_config->buffer)) { + if (dma_mapping_error(hostdata->dev, addr)) { if (!firmware_has_feature(FW_FEATURE_CMO)) dev_err(hostdata->dev, "dma_mapping error getting host config\n"); @@ -1870,6 +1886,8 @@ static int ibmvscsi_do_host_config(struct ibmvscsi_host_data *hostdata, return -1; } + host_config->buffer = cpu_to_be64(addr); + init_completion(&evt_struct->comp); spin_lock_irqsave(hostdata->host->host_lock, flags); rc = ibmvscsi_send_srp_event(evt_struct, hostdata, info_timeout * 2); diff --git a/drivers/scsi/ibmvscsi/viosrp.h b/drivers/scsi/ibmvscsi/viosrp.h index 2cd735d1d196..116243087622 100644 --- a/drivers/scsi/ibmvscsi/viosrp.h +++ b/drivers/scsi/ibmvscsi/viosrp.h @@ -75,9 +75,9 @@ struct viosrp_crq { u8 format; /* SCSI vs out-of-band */ u8 reserved; u8 status; /* non-scsi failure? (e.g. DMA failure) */ - u16 timeout; /* in seconds */ - u16 IU_length; /* in bytes */ - u64 IU_data_ptr; /* the TCE for transferring data */ + __be16 timeout; /* in seconds */ + __be16 IU_length; /* in bytes */ + __be64 IU_data_ptr; /* the TCE for transferring data */ }; /* MADs are Management requests above and beyond the IUs defined in the SRP @@ -124,10 +124,10 @@ enum viosrp_capability_flag { * Common MAD header */ struct mad_common { - u32 type; - u16 status; - u16 length; - u64 tag; + __be32 type; + __be16 status; + __be16 length; + __be64 tag; }; /* @@ -139,23 +139,23 @@ struct mad_common { */ struct viosrp_empty_iu { struct mad_common common; - u64 buffer; - u32 port; + __be64 buffer; + __be32 port; }; struct viosrp_error_log { struct mad_common common; - u64 buffer; + __be64 buffer; }; struct viosrp_adapter_info { struct mad_common common; - u64 buffer; + __be64 buffer; }; struct viosrp_host_config { struct mad_common common; - u64 buffer; + __be64 buffer; }; struct viosrp_fast_fail { @@ -164,27 +164,27 @@ struct viosrp_fast_fail { struct viosrp_capabilities { struct mad_common common; - u64 buffer; + __be64 buffer; }; struct mad_capability_common { - u32 cap_type; - u16 length; - u16 server_support; + __be32 cap_type; + __be16 length; + __be16 server_support; }; struct mad_reserve_cap { struct mad_capability_common common; - u32 type; + __be32 type; }; struct mad_migration_cap { struct mad_capability_common common; - u32 ecl; + __be32 ecl; }; struct capabilities{ - u32 flags; + __be32 flags; char name[SRP_MAX_LOC_LEN]; char loc[SRP_MAX_LOC_LEN]; struct mad_migration_cap migration; @@ -208,10 +208,10 @@ union viosrp_iu { struct mad_adapter_info_data { char srp_version[8]; char partition_name[96]; - u32 partition_number; - u32 mad_version; - u32 os_type; - u32 port_max_txu[8]; /* per-port maximum transfer */ + __be32 partition_number; + __be32 mad_version; + __be32 os_type; + __be32 port_max_txu[8]; /* per-port maximum transfer */ }; #endif diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index df43bfe6d573..4e1b75ca7451 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -708,6 +708,7 @@ struct lpfc_hba { uint32_t cfg_multi_ring_type; uint32_t cfg_poll; uint32_t cfg_poll_tmo; + uint32_t cfg_task_mgmt_tmo; uint32_t cfg_use_msi; uint32_t cfg_fcp_imax; uint32_t cfg_fcp_cpu_map; diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 16498e030c70..00656fc92b93 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -1865,8 +1865,10 @@ lpfc_##attr##_set(struct lpfc_vport *vport, uint val) \ { \ if (val >= minval && val <= maxval) {\ lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT, \ - "3053 lpfc_" #attr " changed from %d to %d\n", \ - vport->cfg_##attr, val); \ + "3053 lpfc_" #attr \ + " changed from %d (x%x) to %d (x%x)\n", \ + vport->cfg_##attr, vport->cfg_##attr, \ + val, val); \ vport->cfg_##attr = val;\ return 0;\ }\ @@ -4011,8 +4013,11 @@ LPFC_ATTR_R(ack0, 0, 0, 1, "Enable ACK0 support"); # For [0], FCP commands are issued to Work Queues ina round robin fashion. # For [1], FCP commands are issued to a Work Queue associated with the # current CPU. +# It would be set to 1 by the driver if it's able to set up cpu affinity +# for FCP I/Os through Work Queue associated with the current CPU. Otherwise, +# roundrobin scheduling of FCP I/Os through WQs will be used. */ -LPFC_ATTR_RW(fcp_io_sched, 0, 0, 1, "Determine scheduling algrithmn for " +LPFC_ATTR_RW(fcp_io_sched, 0, 0, 1, "Determine scheduling algorithm for " "issuing commands [0] - Round Robin, [1] - Current CPU"); /* @@ -4110,6 +4115,12 @@ LPFC_ATTR_RW(poll_tmo, 10, 1, 255, "Milliseconds driver will wait between polling FCP ring"); /* +# lpfc_task_mgmt_tmo: Maximum time to wait for task management commands +# to complete in seconds. Value range is [5,180], default value is 60. +*/ +LPFC_ATTR_RW(task_mgmt_tmo, 60, 5, 180, + "Maximum time to wait for task management commands to complete"); +/* # lpfc_use_msi: Use MSI (Message Signaled Interrupts) in systems that # support this feature # 0 = MSI disabled @@ -4295,6 +4306,7 @@ struct device_attribute *lpfc_hba_attrs[] = { &dev_attr_issue_reset, &dev_attr_lpfc_poll, &dev_attr_lpfc_poll_tmo, + &dev_attr_lpfc_task_mgmt_tmo, &dev_attr_lpfc_use_msi, &dev_attr_lpfc_fcp_imax, &dev_attr_lpfc_fcp_cpu_map, @@ -5274,6 +5286,7 @@ lpfc_get_cfgparam(struct lpfc_hba *phba) lpfc_topology_init(phba, lpfc_topology); lpfc_link_speed_init(phba, lpfc_link_speed); lpfc_poll_tmo_init(phba, lpfc_poll_tmo); + lpfc_task_mgmt_tmo_init(phba, lpfc_task_mgmt_tmo); lpfc_enable_npiv_init(phba, lpfc_enable_npiv); lpfc_fcf_failover_policy_init(phba, lpfc_fcf_failover_policy); lpfc_enable_rrq_init(phba, lpfc_enable_rrq); diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index 79c13c3263f1..b92aec989d60 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -317,6 +317,11 @@ lpfc_bsg_send_mgmt_cmd_cmp(struct lpfc_hba *phba, } spin_unlock_irqrestore(&phba->ct_ev_lock, flags); + /* Close the timeout handler abort window */ + spin_lock_irqsave(&phba->hbalock, flags); + cmdiocbq->iocb_flag &= ~LPFC_IO_CMD_OUTSTANDING; + spin_unlock_irqrestore(&phba->hbalock, flags); + iocb = &dd_data->context_un.iocb; ndlp = iocb->ndlp; rmp = iocb->rmp; @@ -387,6 +392,7 @@ lpfc_bsg_send_mgmt_cmd(struct fc_bsg_job *job) int request_nseg; int reply_nseg; struct bsg_job_data *dd_data; + unsigned long flags; uint32_t creg_val; int rc = 0; int iocb_stat; @@ -501,14 +507,24 @@ lpfc_bsg_send_mgmt_cmd(struct fc_bsg_job *job) } iocb_stat = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, cmdiocbq, 0); - if (iocb_stat == IOCB_SUCCESS) + + if (iocb_stat == IOCB_SUCCESS) { + spin_lock_irqsave(&phba->hbalock, flags); + /* make sure the I/O had not been completed yet */ + if (cmdiocbq->iocb_flag & LPFC_IO_LIBDFC) { + /* open up abort window to timeout handler */ + cmdiocbq->iocb_flag |= LPFC_IO_CMD_OUTSTANDING; + } + spin_unlock_irqrestore(&phba->hbalock, flags); return 0; /* done for now */ - else if (iocb_stat == IOCB_BUSY) + } else if (iocb_stat == IOCB_BUSY) { rc = -EAGAIN; - else + } else { rc = -EIO; + } /* iocb failed so cleanup */ + job->dd_data = NULL; free_rmp: lpfc_free_bsg_buffers(phba, rmp); @@ -577,6 +593,11 @@ lpfc_bsg_rport_els_cmp(struct lpfc_hba *phba, } spin_unlock_irqrestore(&phba->ct_ev_lock, flags); + /* Close the timeout handler abort window */ + spin_lock_irqsave(&phba->hbalock, flags); + cmdiocbq->iocb_flag &= ~LPFC_IO_CMD_OUTSTANDING; + spin_unlock_irqrestore(&phba->hbalock, flags); + rsp = &rspiocbq->iocb; pcmd = (struct lpfc_dmabuf *)cmdiocbq->context2; prsp = (struct lpfc_dmabuf *)pcmd->list.next; @@ -639,6 +660,7 @@ lpfc_bsg_rport_els(struct fc_bsg_job *job) struct lpfc_iocbq *cmdiocbq; uint16_t rpi = 0; struct bsg_job_data *dd_data; + unsigned long flags; uint32_t creg_val; int rc = 0; @@ -721,15 +743,25 @@ lpfc_bsg_rport_els(struct fc_bsg_job *job) rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, cmdiocbq, 0); - if (rc == IOCB_SUCCESS) + if (rc == IOCB_SUCCESS) { + spin_lock_irqsave(&phba->hbalock, flags); + /* make sure the I/O had not been completed/released */ + if (cmdiocbq->iocb_flag & LPFC_IO_LIBDFC) { + /* open up abort window to timeout handler */ + cmdiocbq->iocb_flag |= LPFC_IO_CMD_OUTSTANDING; + } + spin_unlock_irqrestore(&phba->hbalock, flags); return 0; /* done for now */ - else if (rc == IOCB_BUSY) + } else if (rc == IOCB_BUSY) { rc = -EAGAIN; - else + } else { rc = -EIO; + } -linkdown_err: + /* iocb failed so cleanup */ + job->dd_data = NULL; +linkdown_err: cmdiocbq->context1 = ndlp; lpfc_els_free_iocb(phba, cmdiocbq); @@ -1249,7 +1281,7 @@ lpfc_bsg_hba_get_event(struct fc_bsg_job *job) struct lpfc_hba *phba = vport->phba; struct get_ct_event *event_req; struct get_ct_event_reply *event_reply; - struct lpfc_bsg_event *evt; + struct lpfc_bsg_event *evt, *evt_next; struct event_data *evt_dat = NULL; unsigned long flags; uint32_t rc = 0; @@ -1269,7 +1301,7 @@ lpfc_bsg_hba_get_event(struct fc_bsg_job *job) event_reply = (struct get_ct_event_reply *) job->reply->reply_data.vendor_reply.vendor_rsp; spin_lock_irqsave(&phba->ct_ev_lock, flags); - list_for_each_entry(evt, &phba->ct_ev_waiters, node) { + list_for_each_entry_safe(evt, evt_next, &phba->ct_ev_waiters, node) { if (evt->reg_id == event_req->ev_reg_id) { if (list_empty(&evt->events_to_get)) break; @@ -1370,6 +1402,11 @@ lpfc_issue_ct_rsp_cmp(struct lpfc_hba *phba, } spin_unlock_irqrestore(&phba->ct_ev_lock, flags); + /* Close the timeout handler abort window */ + spin_lock_irqsave(&phba->hbalock, flags); + cmdiocbq->iocb_flag &= ~LPFC_IO_CMD_OUTSTANDING; + spin_unlock_irqrestore(&phba->hbalock, flags); + ndlp = dd_data->context_un.iocb.ndlp; cmp = cmdiocbq->context2; bmp = cmdiocbq->context3; @@ -1433,6 +1470,7 @@ lpfc_issue_ct_rsp(struct lpfc_hba *phba, struct fc_bsg_job *job, uint32_t tag, int rc = 0; struct lpfc_nodelist *ndlp = NULL; struct bsg_job_data *dd_data; + unsigned long flags; uint32_t creg_val; /* allocate our bsg tracking structure */ @@ -1542,8 +1580,19 @@ lpfc_issue_ct_rsp(struct lpfc_hba *phba, struct fc_bsg_job *job, uint32_t tag, rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, ctiocb, 0); - if (rc == IOCB_SUCCESS) + if (rc == IOCB_SUCCESS) { + spin_lock_irqsave(&phba->hbalock, flags); + /* make sure the I/O had not been completed/released */ + if (ctiocb->iocb_flag & LPFC_IO_LIBDFC) { + /* open up abort window to timeout handler */ + ctiocb->iocb_flag |= LPFC_IO_CMD_OUTSTANDING; + } + spin_unlock_irqrestore(&phba->hbalock, flags); return 0; /* done for now */ + } + + /* iocb failed so cleanup */ + job->dd_data = NULL; issue_ct_rsp_exit: lpfc_sli_release_iocbq(phba, ctiocb); @@ -5284,9 +5333,15 @@ lpfc_bsg_timeout(struct fc_bsg_job *job) * remove it from the txq queue and call cancel iocbs. * Otherwise, call abort iotag */ - cmdiocb = dd_data->context_un.iocb.cmdiocbq; - spin_lock_irq(&phba->hbalock); + spin_unlock_irqrestore(&phba->ct_ev_lock, flags); + + spin_lock_irqsave(&phba->hbalock, flags); + /* make sure the I/O abort window is still open */ + if (!(cmdiocb->iocb_flag & LPFC_IO_CMD_OUTSTANDING)) { + spin_unlock_irqrestore(&phba->hbalock, flags); + return -EAGAIN; + } list_for_each_entry_safe(check_iocb, next_iocb, &pring->txq, list) { if (check_iocb == cmdiocb) { @@ -5296,8 +5351,7 @@ lpfc_bsg_timeout(struct fc_bsg_job *job) } if (list_empty(&completions)) lpfc_sli_issue_abort_iotag(phba, pring, cmdiocb); - spin_unlock_irq(&phba->hbalock); - spin_unlock_irqrestore(&phba->ct_ev_lock, flags); + spin_unlock_irqrestore(&phba->hbalock, flags); if (!list_empty(&completions)) { lpfc_sli_cancel_iocbs(phba, &completions, IOSTAT_LOCAL_REJECT, @@ -5321,9 +5375,10 @@ lpfc_bsg_timeout(struct fc_bsg_job *job) * remove it from the txq queue and call cancel iocbs. * Otherwise, call abort iotag. */ - cmdiocb = dd_data->context_un.menlo.cmdiocbq; - spin_lock_irq(&phba->hbalock); + spin_unlock_irqrestore(&phba->ct_ev_lock, flags); + + spin_lock_irqsave(&phba->hbalock, flags); list_for_each_entry_safe(check_iocb, next_iocb, &pring->txq, list) { if (check_iocb == cmdiocb) { @@ -5333,8 +5388,7 @@ lpfc_bsg_timeout(struct fc_bsg_job *job) } if (list_empty(&completions)) lpfc_sli_issue_abort_iotag(phba, pring, cmdiocb); - spin_unlock_irq(&phba->hbalock); - spin_unlock_irqrestore(&phba->ct_ev_lock, flags); + spin_unlock_irqrestore(&phba->hbalock, flags); if (!list_empty(&completions)) { lpfc_sli_cancel_iocbs(phba, &completions, IOSTAT_LOCAL_REJECT, diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 60d6ca2f68c2..7801601aa5d9 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -4437,6 +4437,7 @@ lpfc_nlp_logo_unreg(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) if (!ndlp) return; lpfc_issue_els_logo(vport, ndlp, 0); + mempool_free(pmb, phba->mbox_mem_pool); } /* @@ -4456,7 +4457,15 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) int rc; uint16_t rpi; - if (ndlp->nlp_flag & NLP_RPI_REGISTERED) { + if (ndlp->nlp_flag & NLP_RPI_REGISTERED || + ndlp->nlp_flag & NLP_REG_LOGIN_SEND) { + if (ndlp->nlp_flag & NLP_REG_LOGIN_SEND) + lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI, + "3366 RPI x%x needs to be " + "unregistered nlp_flag x%x " + "did x%x\n", + ndlp->nlp_rpi, ndlp->nlp_flag, + ndlp->nlp_DID); mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); if (mbox) { /* SLI4 ports require the physical rpi value. */ diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 501147c4a147..647f5bfb3bd3 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -3031,10 +3031,10 @@ lpfc_sli4_xri_sgl_update(struct lpfc_hba *phba) phba->sli4_hba.scsi_xri_max); spin_lock_irq(&phba->scsi_buf_list_get_lock); - spin_lock_irq(&phba->scsi_buf_list_put_lock); + spin_lock(&phba->scsi_buf_list_put_lock); list_splice_init(&phba->lpfc_scsi_buf_list_get, &scsi_sgl_list); list_splice(&phba->lpfc_scsi_buf_list_put, &scsi_sgl_list); - spin_unlock_irq(&phba->scsi_buf_list_put_lock); + spin_unlock(&phba->scsi_buf_list_put_lock); spin_unlock_irq(&phba->scsi_buf_list_get_lock); if (phba->sli4_hba.scsi_xri_cnt > phba->sli4_hba.scsi_xri_max) { @@ -3070,10 +3070,10 @@ lpfc_sli4_xri_sgl_update(struct lpfc_hba *phba) psb->cur_iocbq.sli4_xritag = phba->sli4_hba.xri_ids[lxri]; } spin_lock_irq(&phba->scsi_buf_list_get_lock); - spin_lock_irq(&phba->scsi_buf_list_put_lock); + spin_lock(&phba->scsi_buf_list_put_lock); list_splice_init(&scsi_sgl_list, &phba->lpfc_scsi_buf_list_get); INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_put); - spin_unlock_irq(&phba->scsi_buf_list_put_lock); + spin_unlock(&phba->scsi_buf_list_put_lock); spin_unlock_irq(&phba->scsi_buf_list_get_lock); return 0; @@ -4859,6 +4859,9 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) struct lpfc_mqe *mqe; int longs; + /* Get all the module params for configuring this host */ + lpfc_get_cfgparam(phba); + /* Before proceed, wait for POST done and device ready */ rc = lpfc_sli4_post_status_check(phba); if (rc) @@ -4902,15 +4905,6 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) sizeof(struct lpfc_mbox_ext_buf_ctx)); INIT_LIST_HEAD(&phba->mbox_ext_buf_ctx.ext_dmabuf_list); - /* - * We need to do a READ_CONFIG mailbox command here before - * calling lpfc_get_cfgparam. For VFs this will report the - * MAX_XRI, MAX_VPI, MAX_RPI, MAX_IOCB, and MAX_VFI settings. - * All of the resources allocated - * for this Port are tied to these values. - */ - /* Get all the module params for configuring this host */ - lpfc_get_cfgparam(phba); phba->max_vpi = LPFC_MAX_VPI; /* This will be set to correct value after the read_config mbox */ @@ -7141,19 +7135,6 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba) phba->sli4_hba.fcp_wq = NULL; } - if (phba->pci_bar0_memmap_p) { - iounmap(phba->pci_bar0_memmap_p); - phba->pci_bar0_memmap_p = NULL; - } - if (phba->pci_bar2_memmap_p) { - iounmap(phba->pci_bar2_memmap_p); - phba->pci_bar2_memmap_p = NULL; - } - if (phba->pci_bar4_memmap_p) { - iounmap(phba->pci_bar4_memmap_p); - phba->pci_bar4_memmap_p = NULL; - } - /* Release FCP CQ mapping array */ if (phba->sli4_hba.fcp_cq_map != NULL) { kfree(phba->sli4_hba.fcp_cq_map); @@ -7942,9 +7923,9 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba) * particular PCI BARs regions is dependent on the type of * SLI4 device. */ - if (pci_resource_start(pdev, 0)) { - phba->pci_bar0_map = pci_resource_start(pdev, 0); - bar0map_len = pci_resource_len(pdev, 0); + if (pci_resource_start(pdev, PCI_64BIT_BAR0)) { + phba->pci_bar0_map = pci_resource_start(pdev, PCI_64BIT_BAR0); + bar0map_len = pci_resource_len(pdev, PCI_64BIT_BAR0); /* * Map SLI4 PCI Config Space Register base to a kernel virtual @@ -7958,6 +7939,7 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba) "registers.\n"); goto out; } + phba->pci_bar0_memmap_p = phba->sli4_hba.conf_regs_memmap_p; /* Set up BAR0 PCI config space register memory map */ lpfc_sli4_bar0_register_memmap(phba, if_type); } else { @@ -7980,13 +7962,13 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba) } if ((if_type == LPFC_SLI_INTF_IF_TYPE_0) && - (pci_resource_start(pdev, 2))) { + (pci_resource_start(pdev, PCI_64BIT_BAR2))) { /* * Map SLI4 if type 0 HBA Control Register base to a kernel * virtual address and setup the registers. */ - phba->pci_bar1_map = pci_resource_start(pdev, 2); - bar1map_len = pci_resource_len(pdev, 2); + phba->pci_bar1_map = pci_resource_start(pdev, PCI_64BIT_BAR2); + bar1map_len = pci_resource_len(pdev, PCI_64BIT_BAR2); phba->sli4_hba.ctrl_regs_memmap_p = ioremap(phba->pci_bar1_map, bar1map_len); if (!phba->sli4_hba.ctrl_regs_memmap_p) { @@ -7994,17 +7976,18 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba) "ioremap failed for SLI4 HBA control registers.\n"); goto out_iounmap_conf; } + phba->pci_bar2_memmap_p = phba->sli4_hba.ctrl_regs_memmap_p; lpfc_sli4_bar1_register_memmap(phba); } if ((if_type == LPFC_SLI_INTF_IF_TYPE_0) && - (pci_resource_start(pdev, 4))) { + (pci_resource_start(pdev, PCI_64BIT_BAR4))) { /* * Map SLI4 if type 0 HBA Doorbell Register base to a kernel * virtual address and setup the registers. */ - phba->pci_bar2_map = pci_resource_start(pdev, 4); - bar2map_len = pci_resource_len(pdev, 4); + phba->pci_bar2_map = pci_resource_start(pdev, PCI_64BIT_BAR4); + bar2map_len = pci_resource_len(pdev, PCI_64BIT_BAR4); phba->sli4_hba.drbl_regs_memmap_p = ioremap(phba->pci_bar2_map, bar2map_len); if (!phba->sli4_hba.drbl_regs_memmap_p) { @@ -8012,6 +7995,7 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba) "ioremap failed for SLI4 HBA doorbell registers.\n"); goto out_iounmap_ctrl; } + phba->pci_bar4_memmap_p = phba->sli4_hba.drbl_regs_memmap_p; error = lpfc_sli4_bar2_register_memmap(phba, LPFC_VF0); if (error) goto out_iounmap_all; @@ -8405,7 +8389,8 @@ static int lpfc_sli4_set_affinity(struct lpfc_hba *phba, int vectors) { int i, idx, saved_chann, used_chann, cpu, phys_id; - int max_phys_id, num_io_channel, first_cpu; + int max_phys_id, min_phys_id; + int num_io_channel, first_cpu, chan; struct lpfc_vector_map_info *cpup; #ifdef CONFIG_X86 struct cpuinfo_x86 *cpuinfo; @@ -8423,6 +8408,7 @@ lpfc_sli4_set_affinity(struct lpfc_hba *phba, int vectors) phba->sli4_hba.num_present_cpu)); max_phys_id = 0; + min_phys_id = 0xff; phys_id = 0; num_io_channel = 0; first_cpu = LPFC_VECTOR_MAP_EMPTY; @@ -8446,9 +8432,12 @@ lpfc_sli4_set_affinity(struct lpfc_hba *phba, int vectors) if (cpup->phys_id > max_phys_id) max_phys_id = cpup->phys_id; + if (cpup->phys_id < min_phys_id) + min_phys_id = cpup->phys_id; cpup++; } + phys_id = min_phys_id; /* Now associate the HBA vectors with specific CPUs */ for (idx = 0; idx < vectors; idx++) { cpup = phba->sli4_hba.cpu_map; @@ -8459,13 +8448,25 @@ lpfc_sli4_set_affinity(struct lpfc_hba *phba, int vectors) for (i = 1; i < max_phys_id; i++) { phys_id++; if (phys_id > max_phys_id) - phys_id = 0; + phys_id = min_phys_id; cpu = lpfc_find_next_cpu(phba, phys_id); if (cpu == LPFC_VECTOR_MAP_EMPTY) continue; goto found; } + /* Use round robin for scheduling */ + phba->cfg_fcp_io_sched = LPFC_FCP_SCHED_ROUND_ROBIN; + chan = 0; + cpup = phba->sli4_hba.cpu_map; + for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) { + cpup->channel_id = chan; + cpup++; + chan++; + if (chan >= phba->cfg_fcp_io_channel) + chan = 0; + } + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "3329 Cannot set affinity:" "Error mapping vector %d (%d)\n", @@ -8503,7 +8504,7 @@ found: /* Spread vector mapping across multple physical CPU nodes */ phys_id++; if (phys_id > max_phys_id) - phys_id = 0; + phys_id = min_phys_id; } /* @@ -8513,7 +8514,7 @@ found: * Base the remaining IO channel assigned, to IO channels already * assigned to other CPUs on the same phys_id. */ - for (i = 0; i <= max_phys_id; i++) { + for (i = min_phys_id; i <= max_phys_id; i++) { /* * If there are no io channels already mapped to * this phys_id, just round robin thru the io_channels. @@ -8595,10 +8596,11 @@ out: if (num_io_channel != phba->sli4_hba.num_present_cpu) lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "3333 Set affinity mismatch:" - "%d chann != %d cpus: %d vactors\n", + "%d chann != %d cpus: %d vectors\n", num_io_channel, phba->sli4_hba.num_present_cpu, vectors); + /* Enable using cpu affinity for scheduling */ phba->cfg_fcp_io_sched = LPFC_FCP_SCHED_BY_CPU; return 1; } @@ -8689,9 +8691,12 @@ enable_msix_vectors: cfg_fail_out: /* free the irq already requested */ - for (--index; index >= 0; index--) + for (--index; index >= 0; index--) { + irq_set_affinity_hint(phba->sli4_hba.msix_entries[index]. + vector, NULL); free_irq(phba->sli4_hba.msix_entries[index].vector, &phba->sli4_hba.fcp_eq_hdl[index]); + } msi_fail_out: /* Unconfigure MSI-X capability structure */ @@ -8712,9 +8717,12 @@ lpfc_sli4_disable_msix(struct lpfc_hba *phba) int index; /* Free up MSI-X multi-message vectors */ - for (index = 0; index < phba->cfg_fcp_io_channel; index++) + for (index = 0; index < phba->cfg_fcp_io_channel; index++) { + irq_set_affinity_hint(phba->sli4_hba.msix_entries[index]. + vector, NULL); free_irq(phba->sli4_hba.msix_entries[index].vector, &phba->sli4_hba.fcp_eq_hdl[index]); + } /* Disable MSI-X */ pci_disable_msix(phba->pcidev); diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 1242b6c4308b..c913e8cc3b26 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -926,10 +926,10 @@ lpfc_sli4_repost_scsi_sgl_list(struct lpfc_hba *phba) /* get all SCSI buffers need to repost to a local list */ spin_lock_irq(&phba->scsi_buf_list_get_lock); - spin_lock_irq(&phba->scsi_buf_list_put_lock); + spin_lock(&phba->scsi_buf_list_put_lock); list_splice_init(&phba->lpfc_scsi_buf_list_get, &post_sblist); list_splice(&phba->lpfc_scsi_buf_list_put, &post_sblist); - spin_unlock_irq(&phba->scsi_buf_list_put_lock); + spin_unlock(&phba->scsi_buf_list_put_lock); spin_unlock_irq(&phba->scsi_buf_list_get_lock); /* post the list of scsi buffer sgls to port if available */ @@ -1000,9 +1000,12 @@ lpfc_new_scsi_buf_s4(struct lpfc_vport *vport, int num_to_alloc) } memset(psb->data, 0, phba->cfg_sg_dma_buf_size); - /* Page alignment is CRITICAL, double check to be sure */ - if (((unsigned long)(psb->data) & - (unsigned long)(SLI4_PAGE_SIZE - 1)) != 0) { + /* + * 4K Page alignment is CRITICAL to BlockGuard, double check + * to be sure. + */ + if (phba->cfg_enable_bg && (((unsigned long)(psb->data) & + (unsigned long)(SLI4_PAGE_SIZE - 1)) != 0)) { pci_pool_free(phba->lpfc_scsi_dma_buf_pool, psb->data, psb->dma_handle); kfree(psb); @@ -1134,22 +1137,21 @@ lpfc_get_scsi_buf_s3(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) { struct lpfc_scsi_buf * lpfc_cmd = NULL; struct list_head *scsi_buf_list_get = &phba->lpfc_scsi_buf_list_get; - unsigned long gflag = 0; - unsigned long pflag = 0; + unsigned long iflag = 0; - spin_lock_irqsave(&phba->scsi_buf_list_get_lock, gflag); + spin_lock_irqsave(&phba->scsi_buf_list_get_lock, iflag); list_remove_head(scsi_buf_list_get, lpfc_cmd, struct lpfc_scsi_buf, list); if (!lpfc_cmd) { - spin_lock_irqsave(&phba->scsi_buf_list_put_lock, pflag); + spin_lock(&phba->scsi_buf_list_put_lock); list_splice(&phba->lpfc_scsi_buf_list_put, &phba->lpfc_scsi_buf_list_get); INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_put); list_remove_head(scsi_buf_list_get, lpfc_cmd, struct lpfc_scsi_buf, list); - spin_unlock_irqrestore(&phba->scsi_buf_list_put_lock, pflag); + spin_unlock(&phba->scsi_buf_list_put_lock); } - spin_unlock_irqrestore(&phba->scsi_buf_list_get_lock, gflag); + spin_unlock_irqrestore(&phba->scsi_buf_list_get_lock, iflag); return lpfc_cmd; } /** @@ -1167,11 +1169,10 @@ static struct lpfc_scsi_buf* lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) { struct lpfc_scsi_buf *lpfc_cmd, *lpfc_cmd_next; - unsigned long gflag = 0; - unsigned long pflag = 0; + unsigned long iflag = 0; int found = 0; - spin_lock_irqsave(&phba->scsi_buf_list_get_lock, gflag); + spin_lock_irqsave(&phba->scsi_buf_list_get_lock, iflag); list_for_each_entry_safe(lpfc_cmd, lpfc_cmd_next, &phba->lpfc_scsi_buf_list_get, list) { if (lpfc_test_rrq_active(phba, ndlp, @@ -1182,11 +1183,11 @@ lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) break; } if (!found) { - spin_lock_irqsave(&phba->scsi_buf_list_put_lock, pflag); + spin_lock(&phba->scsi_buf_list_put_lock); list_splice(&phba->lpfc_scsi_buf_list_put, &phba->lpfc_scsi_buf_list_get); INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_put); - spin_unlock_irqrestore(&phba->scsi_buf_list_put_lock, pflag); + spin_unlock(&phba->scsi_buf_list_put_lock); list_for_each_entry_safe(lpfc_cmd, lpfc_cmd_next, &phba->lpfc_scsi_buf_list_get, list) { if (lpfc_test_rrq_active( @@ -1197,7 +1198,7 @@ lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) break; } } - spin_unlock_irqrestore(&phba->scsi_buf_list_get_lock, gflag); + spin_unlock_irqrestore(&phba->scsi_buf_list_get_lock, iflag); if (!found) return NULL; return lpfc_cmd; @@ -3966,11 +3967,11 @@ lpfc_handle_fcp_err(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd, /* * Check SLI validation that all the transfer was actually done - * (fcpi_parm should be zero). + * (fcpi_parm should be zero). Apply check only to reads. */ - } else if (fcpi_parm) { + } else if (fcpi_parm && (cmnd->sc_data_direction == DMA_FROM_DEVICE)) { lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP | LOG_FCP_ERROR, - "9029 FCP Data Transfer Check Error: " + "9029 FCP Read Check Error Data: " "x%x x%x x%x x%x x%x\n", be32_to_cpu(fcpcmd->fcpDl), be32_to_cpu(fcprsp->rspResId), @@ -4342,6 +4343,7 @@ lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd, char tag[2]; uint8_t *ptr; bool sli4; + uint32_t fcpdl; if (!pnode || !NLP_CHK_NODE_ACT(pnode)) return; @@ -4389,8 +4391,12 @@ lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd, iocb_cmd->ulpPU = PARM_READ_CHECK; if (vport->cfg_first_burst_size && (pnode->nlp_flag & NLP_FIRSTBURST)) { - piocbq->iocb.un.fcpi.fcpi_XRdy = - vport->cfg_first_burst_size; + fcpdl = scsi_bufflen(scsi_cmnd); + if (fcpdl < vport->cfg_first_burst_size) + piocbq->iocb.un.fcpi.fcpi_XRdy = fcpdl; + else + piocbq->iocb.un.fcpi.fcpi_XRdy = + vport->cfg_first_burst_size; } fcp_cmnd->fcpCntl3 = WRITE_DATA; phba->fc4OutputRequests++; @@ -4878,6 +4884,9 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd) goto out_unlock; } + /* Indicate the IO is being aborted by the driver. */ + iocb->iocb_flag |= LPFC_DRIVER_ABORTED; + /* * The scsi command can not be in txq and it is in flight because the * pCmd is still pointig at the SCSI command we have to abort. There @@ -5006,7 +5015,7 @@ lpfc_send_taskmgmt(struct lpfc_vport *vport, struct lpfc_rport_data *rdata, lpfc_cmd = lpfc_get_scsi_buf(phba, rdata->pnode); if (lpfc_cmd == NULL) return FAILED; - lpfc_cmd->timeout = 60; + lpfc_cmd->timeout = phba->cfg_task_mgmt_tmo; lpfc_cmd->rdata = rdata; status = lpfc_scsi_prep_task_mgmt_cmd(vport, lpfc_cmd, lun_id, diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 0392e114531c..612f48973ff2 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -9831,6 +9831,13 @@ lpfc_sli_abort_iocb(struct lpfc_vport *vport, struct lpfc_sli_ring *pring, abort_cmd) != 0) continue; + /* + * If the iocbq is already being aborted, don't take a second + * action, but do count it. + */ + if (iocbq->iocb_flag & LPFC_DRIVER_ABORTED) + continue; + /* issue ABTS for this IOCB based on iotag */ abtsiocb = lpfc_sli_get_iocbq(phba); if (abtsiocb == NULL) { @@ -9838,6 +9845,9 @@ lpfc_sli_abort_iocb(struct lpfc_vport *vport, struct lpfc_sli_ring *pring, continue; } + /* indicate the IO is being aborted by the driver. */ + iocbq->iocb_flag |= LPFC_DRIVER_ABORTED; + cmd = &iocbq->iocb; abtsiocb->iocb.un.acxri.abortType = ABORT_TYPE_ABTS; abtsiocb->iocb.un.acxri.abortContextTag = cmd->ulpContext; @@ -9847,7 +9857,7 @@ lpfc_sli_abort_iocb(struct lpfc_vport *vport, struct lpfc_sli_ring *pring, abtsiocb->iocb.un.acxri.abortIoTag = cmd->ulpIoTag; abtsiocb->iocb.ulpLe = 1; abtsiocb->iocb.ulpClass = cmd->ulpClass; - abtsiocb->vport = phba->pport; + abtsiocb->vport = vport; /* ABTS WQE must go to the same WQ as the WQE to be aborted */ abtsiocb->fcp_wqidx = iocbq->fcp_wqidx; @@ -12233,7 +12243,6 @@ static void __iomem * lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset) { struct pci_dev *pdev; - unsigned long bar_map, bar_map_len; if (!phba->pcidev) return NULL; @@ -12242,25 +12251,10 @@ lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset) switch (pci_barset) { case WQ_PCI_BAR_0_AND_1: - if (!phba->pci_bar0_memmap_p) { - bar_map = pci_resource_start(pdev, PCI_64BIT_BAR0); - bar_map_len = pci_resource_len(pdev, PCI_64BIT_BAR0); - phba->pci_bar0_memmap_p = ioremap(bar_map, bar_map_len); - } return phba->pci_bar0_memmap_p; case WQ_PCI_BAR_2_AND_3: - if (!phba->pci_bar2_memmap_p) { - bar_map = pci_resource_start(pdev, PCI_64BIT_BAR2); - bar_map_len = pci_resource_len(pdev, PCI_64BIT_BAR2); - phba->pci_bar2_memmap_p = ioremap(bar_map, bar_map_len); - } return phba->pci_bar2_memmap_p; case WQ_PCI_BAR_4_AND_5: - if (!phba->pci_bar4_memmap_p) { - bar_map = pci_resource_start(pdev, PCI_64BIT_BAR4); - bar_map_len = pci_resource_len(pdev, PCI_64BIT_BAR4); - phba->pci_bar4_memmap_p = ioremap(bar_map, bar_map_len); - } return phba->pci_bar4_memmap_p; default: break; @@ -15808,7 +15802,7 @@ lpfc_sli4_fcf_rr_index_set(struct lpfc_hba *phba, uint16_t fcf_index) void lpfc_sli4_fcf_rr_index_clear(struct lpfc_hba *phba, uint16_t fcf_index) { - struct lpfc_fcf_pri *fcf_pri; + struct lpfc_fcf_pri *fcf_pri, *fcf_pri_next; if (fcf_index >= LPFC_SLI4_FCF_TBL_INDX_MAX) { lpfc_printf_log(phba, KERN_ERR, LOG_FIP, "2762 FCF (x%x) reached driver's book " @@ -15818,7 +15812,8 @@ lpfc_sli4_fcf_rr_index_clear(struct lpfc_hba *phba, uint16_t fcf_index) } /* Clear the eligible FCF record index bmask */ spin_lock_irq(&phba->hbalock); - list_for_each_entry(fcf_pri, &phba->fcf.fcf_pri_list, list) { + list_for_each_entry_safe(fcf_pri, fcf_pri_next, &phba->fcf.fcf_pri_list, + list) { if (fcf_pri->fcf_rec.fcf_index == fcf_index) { list_del_init(&fcf_pri->list); break; diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h index 97617996206d..6b0f2478706e 100644 --- a/drivers/scsi/lpfc/lpfc_sli.h +++ b/drivers/scsi/lpfc/lpfc_sli.h @@ -58,7 +58,7 @@ struct lpfc_iocbq { IOCB_t iocb; /* IOCB cmd */ uint8_t retry; /* retry counter for IOCB cmd - if needed */ - uint16_t iocb_flag; + uint32_t iocb_flag; #define LPFC_IO_LIBDFC 1 /* libdfc iocb */ #define LPFC_IO_WAKE 2 /* Synchronous I/O completed */ #define LPFC_IO_WAKE_TMO LPFC_IO_WAKE /* Synchronous I/O timed out */ @@ -73,11 +73,11 @@ struct lpfc_iocbq { #define LPFC_IO_DIF_PASS 0x400 /* T10 DIF IO pass-thru prot */ #define LPFC_IO_DIF_STRIP 0x800 /* T10 DIF IO strip prot */ #define LPFC_IO_DIF_INSERT 0x1000 /* T10 DIF IO insert prot */ +#define LPFC_IO_CMD_OUTSTANDING 0x2000 /* timeout handler abort window */ #define LPFC_FIP_ELS_ID_MASK 0xc000 /* ELS_ID range 0-3, non-shifted mask */ #define LPFC_FIP_ELS_ID_SHIFT 14 - uint8_t rsvd2; uint32_t drvrTimeout; /* driver timeout in seconds */ uint32_t fcp_wqidx; /* index to FCP work queue */ struct lpfc_vport *vport;/* virtual port pointer */ diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 5bcc38223ac9..85120b77aa0e 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -523,7 +523,7 @@ struct lpfc_sli4_hba { struct lpfc_queue *hdr_rq; /* Slow-path Header Receive queue */ struct lpfc_queue *dat_rq; /* Slow-path Data Receive queue */ - uint8_t fw_func_mode; /* FW function protocol mode */ + uint32_t fw_func_mode; /* FW function protocol mode */ uint32_t ulp0_mode; /* ULP0 protocol mode */ uint32_t ulp1_mode; /* ULP1 protocol mode */ diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h index 21859d2006ce..f58f18342bc3 100644 --- a/drivers/scsi/lpfc/lpfc_version.h +++ b/drivers/scsi/lpfc/lpfc_version.h @@ -18,7 +18,7 @@ * included with this package. * *******************************************************************/ -#define LPFC_DRIVER_VERSION "8.3.41" +#define LPFC_DRIVER_VERSION "8.3.42" #define LPFC_DRIVER_NAME "lpfc" /* Used for SLI 2/3 */ diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index 04a42a505852..0c73ba4bf451 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -33,9 +33,9 @@ /* * MegaRAID SAS Driver meta data */ -#define MEGASAS_VERSION "06.600.18.00-rc1" -#define MEGASAS_RELDATE "May. 15, 2013" -#define MEGASAS_EXT_VERSION "Wed. May. 15 17:00:00 PDT 2013" +#define MEGASAS_VERSION "06.700.06.00-rc1" +#define MEGASAS_RELDATE "Aug. 31, 2013" +#define MEGASAS_EXT_VERSION "Sat. Aug. 31 17:00:00 PDT 2013" /* * Device IDs @@ -170,6 +170,7 @@ #define MR_DCMD_CTRL_GET_INFO 0x01010000 #define MR_DCMD_LD_GET_LIST 0x03010000 +#define MR_DCMD_LD_LIST_QUERY 0x03010100 #define MR_DCMD_CTRL_CACHE_FLUSH 0x01101000 #define MR_FLUSH_CTRL_CACHE 0x01 @@ -345,6 +346,15 @@ enum MR_PD_QUERY_TYPE { MR_PD_QUERY_TYPE_EXPOSED_TO_HOST = 5, }; +enum MR_LD_QUERY_TYPE { + MR_LD_QUERY_TYPE_ALL = 0, + MR_LD_QUERY_TYPE_EXPOSED_TO_HOST = 1, + MR_LD_QUERY_TYPE_USED_TGT_IDS = 2, + MR_LD_QUERY_TYPE_CLUSTER_ACCESS = 3, + MR_LD_QUERY_TYPE_CLUSTER_LOCALE = 4, +}; + + #define MR_EVT_CFG_CLEARED 0x0004 #define MR_EVT_LD_STATE_CHANGE 0x0051 #define MR_EVT_PD_INSERTED 0x005b @@ -435,6 +445,14 @@ struct MR_LD_LIST { } ldList[MAX_LOGICAL_DRIVES]; } __packed; +struct MR_LD_TARGETID_LIST { + u32 size; + u32 count; + u8 pad[3]; + u8 targetId[MAX_LOGICAL_DRIVES]; +}; + + /* * SAS controller properties */ @@ -474,21 +492,39 @@ struct megasas_ctrl_prop { * a bit in the following structure. */ struct { - u32 copyBackDisabled : 1; - u32 SMARTerEnabled : 1; - u32 prCorrectUnconfiguredAreas : 1; - u32 useFdeOnly : 1; - u32 disableNCQ : 1; - u32 SSDSMARTerEnabled : 1; - u32 SSDPatrolReadEnabled : 1; - u32 enableSpinDownUnconfigured : 1; - u32 autoEnhancedImport : 1; - u32 enableSecretKeyControl : 1; - u32 disableOnlineCtrlReset : 1; - u32 allowBootWithPinnedCache : 1; - u32 disableSpinDownHS : 1; - u32 enableJBOD : 1; - u32 reserved :18; +#if defined(__BIG_ENDIAN_BITFIELD) + u32 reserved:18; + u32 enableJBOD:1; + u32 disableSpinDownHS:1; + u32 allowBootWithPinnedCache:1; + u32 disableOnlineCtrlReset:1; + u32 enableSecretKeyControl:1; + u32 autoEnhancedImport:1; + u32 enableSpinDownUnconfigured:1; + u32 SSDPatrolReadEnabled:1; + u32 SSDSMARTerEnabled:1; + u32 disableNCQ:1; + u32 useFdeOnly:1; + u32 prCorrectUnconfiguredAreas:1; + u32 SMARTerEnabled:1; + u32 copyBackDisabled:1; +#else + u32 copyBackDisabled:1; + u32 SMARTerEnabled:1; + u32 prCorrectUnconfiguredAreas:1; + u32 useFdeOnly:1; + u32 disableNCQ:1; + u32 SSDSMARTerEnabled:1; + u32 SSDPatrolReadEnabled:1; + u32 enableSpinDownUnconfigured:1; + u32 autoEnhancedImport:1; + u32 enableSecretKeyControl:1; + u32 disableOnlineCtrlReset:1; + u32 allowBootWithPinnedCache:1; + u32 disableSpinDownHS:1; + u32 enableJBOD:1; + u32 reserved:18; +#endif } OnOffProperties; u8 autoSnapVDSpace; u8 viewSpace; @@ -802,6 +838,30 @@ struct megasas_ctrl_info { u16 cacheMemorySize; /*7A2h */ struct { /*7A4h */ +#if defined(__BIG_ENDIAN_BITFIELD) + u32 reserved:11; + u32 supportUnevenSpans:1; + u32 dedicatedHotSparesLimited:1; + u32 headlessMode:1; + u32 supportEmulatedDrives:1; + u32 supportResetNow:1; + u32 realTimeScheduler:1; + u32 supportSSDPatrolRead:1; + u32 supportPerfTuning:1; + u32 disableOnlinePFKChange:1; + u32 supportJBOD:1; + u32 supportBootTimePFKChange:1; + u32 supportSetLinkSpeed:1; + u32 supportEmergencySpares:1; + u32 supportSuspendResumeBGops:1; + u32 blockSSDWriteCacheChange:1; + u32 supportShieldState:1; + u32 supportLdBBMInfo:1; + u32 supportLdPIType3:1; + u32 supportLdPIType2:1; + u32 supportLdPIType1:1; + u32 supportPIcontroller:1; +#else u32 supportPIcontroller:1; u32 supportLdPIType1:1; u32 supportLdPIType2:1; @@ -827,6 +887,7 @@ struct megasas_ctrl_info { u32 supportUnevenSpans:1; u32 reserved:11; +#endif } adapterOperations2; u8 driverVersion[32]; /*7A8h */ @@ -863,7 +924,7 @@ struct megasas_ctrl_info { * =============================== */ #define MEGASAS_MAX_PD_CHANNELS 2 -#define MEGASAS_MAX_LD_CHANNELS 2 +#define MEGASAS_MAX_LD_CHANNELS 1 #define MEGASAS_MAX_CHANNELS (MEGASAS_MAX_PD_CHANNELS + \ MEGASAS_MAX_LD_CHANNELS) #define MEGASAS_MAX_DEV_PER_CHANNEL 128 @@ -1051,9 +1112,15 @@ union megasas_sgl_frame { typedef union _MFI_CAPABILITIES { struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u32 reserved:30; + u32 support_additional_msix:1; + u32 support_fp_remote_lun:1; +#else u32 support_fp_remote_lun:1; u32 support_additional_msix:1; u32 reserved:30; +#endif } mfi_capabilities; u32 reg; } MFI_CAPABILITIES; @@ -1656,4 +1723,16 @@ struct megasas_mgmt_info { int max_index; }; +u8 +MR_BuildRaidContext(struct megasas_instance *instance, + struct IO_REQUEST_INFO *io_info, + struct RAID_CONTEXT *pRAID_Context, + struct MR_FW_RAID_MAP_ALL *map, u8 **raidLUN); +u16 MR_TargetIdToLdGet(u32 ldTgtId, struct MR_FW_RAID_MAP_ALL *map); +struct MR_LD_RAID *MR_LdRaidGet(u32 ld, struct MR_FW_RAID_MAP_ALL *map); +u16 MR_ArPdGet(u32 ar, u32 arm, struct MR_FW_RAID_MAP_ALL *map); +u16 MR_LdSpanArrayGet(u32 ld, u32 span, struct MR_FW_RAID_MAP_ALL *map); +u16 MR_PdDevHandleGet(u32 pd, struct MR_FW_RAID_MAP_ALL *map); +u16 MR_GetLDTgtId(u32 ld, struct MR_FW_RAID_MAP_ALL *map); + #endif /*LSI_MEGARAID_SAS_H */ diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 1f0ca68409d4..3020921a4746 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -18,7 +18,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * FILE: megaraid_sas_base.c - * Version : 06.600.18.00-rc1 + * Version : 06.700.06.00-rc1 * * Authors: LSI Corporation * Sreenivas Bagalkote @@ -92,6 +92,8 @@ MODULE_DESCRIPTION("LSI MegaRAID SAS Driver"); int megasas_transition_to_ready(struct megasas_instance *instance, int ocr); static int megasas_get_pd_list(struct megasas_instance *instance); +static int megasas_ld_list_query(struct megasas_instance *instance, + u8 query_type); static int megasas_issue_init_mfi(struct megasas_instance *instance); static int megasas_register_aen(struct megasas_instance *instance, u32 seq_num, u32 class_locale_word); @@ -374,13 +376,11 @@ static int megasas_check_reset_xscale(struct megasas_instance *instance, struct megasas_register_set __iomem *regs) { - u32 consumer; - consumer = *instance->consumer; if ((instance->adprecovery != MEGASAS_HBA_OPERATIONAL) && - (*instance->consumer == MEGASAS_ADPRESET_INPROG_SIGN)) { + (le32_to_cpu(*instance->consumer) == + MEGASAS_ADPRESET_INPROG_SIGN)) return 1; - } return 0; } @@ -629,9 +629,10 @@ megasas_fire_cmd_skinny(struct megasas_instance *instance, { unsigned long flags; spin_lock_irqsave(&instance->hba_lock, flags); - writel(0, &(regs)->inbound_high_queue_port); - writel((frame_phys_addr | (frame_count<<1))|1, - &(regs)->inbound_low_queue_port); + writel(upper_32_bits(frame_phys_addr), + &(regs)->inbound_high_queue_port); + writel((lower_32_bits(frame_phys_addr) | (frame_count<<1))|1, + &(regs)->inbound_low_queue_port); spin_unlock_irqrestore(&instance->hba_lock, flags); } @@ -879,8 +880,8 @@ megasas_issue_polled(struct megasas_instance *instance, struct megasas_cmd *cmd) struct megasas_header *frame_hdr = &cmd->frame->hdr; - frame_hdr->cmd_status = 0xFF; - frame_hdr->flags |= MFI_FRAME_DONT_POST_IN_REPLY_QUEUE; + frame_hdr->cmd_status = MFI_CMD_STATUS_POLL_MODE; + frame_hdr->flags |= cpu_to_le16(MFI_FRAME_DONT_POST_IN_REPLY_QUEUE); /* * Issue the frame using inbound queue port @@ -944,10 +945,12 @@ megasas_issue_blocked_abort_cmd(struct megasas_instance *instance, */ abort_fr->cmd = MFI_CMD_ABORT; abort_fr->cmd_status = 0xFF; - abort_fr->flags = 0; - abort_fr->abort_context = cmd_to_abort->index; - abort_fr->abort_mfi_phys_addr_lo = cmd_to_abort->frame_phys_addr; - abort_fr->abort_mfi_phys_addr_hi = 0; + abort_fr->flags = cpu_to_le16(0); + abort_fr->abort_context = cpu_to_le32(cmd_to_abort->index); + abort_fr->abort_mfi_phys_addr_lo = + cpu_to_le32(lower_32_bits(cmd_to_abort->frame_phys_addr)); + abort_fr->abort_mfi_phys_addr_hi = + cpu_to_le32(upper_32_bits(cmd_to_abort->frame_phys_addr)); cmd->sync_cmd = 1; cmd->cmd_status = 0xFF; @@ -986,8 +989,8 @@ megasas_make_sgl32(struct megasas_instance *instance, struct scsi_cmnd *scp, if (sge_count) { scsi_for_each_sg(scp, os_sgl, sge_count, i) { - mfi_sgl->sge32[i].length = sg_dma_len(os_sgl); - mfi_sgl->sge32[i].phys_addr = sg_dma_address(os_sgl); + mfi_sgl->sge32[i].length = cpu_to_le32(sg_dma_len(os_sgl)); + mfi_sgl->sge32[i].phys_addr = cpu_to_le32(sg_dma_address(os_sgl)); } } return sge_count; @@ -1015,8 +1018,8 @@ megasas_make_sgl64(struct megasas_instance *instance, struct scsi_cmnd *scp, if (sge_count) { scsi_for_each_sg(scp, os_sgl, sge_count, i) { - mfi_sgl->sge64[i].length = sg_dma_len(os_sgl); - mfi_sgl->sge64[i].phys_addr = sg_dma_address(os_sgl); + mfi_sgl->sge64[i].length = cpu_to_le32(sg_dma_len(os_sgl)); + mfi_sgl->sge64[i].phys_addr = cpu_to_le64(sg_dma_address(os_sgl)); } } return sge_count; @@ -1043,10 +1046,11 @@ megasas_make_sgl_skinny(struct megasas_instance *instance, if (sge_count) { scsi_for_each_sg(scp, os_sgl, sge_count, i) { - mfi_sgl->sge_skinny[i].length = sg_dma_len(os_sgl); + mfi_sgl->sge_skinny[i].length = + cpu_to_le32(sg_dma_len(os_sgl)); mfi_sgl->sge_skinny[i].phys_addr = - sg_dma_address(os_sgl); - mfi_sgl->sge_skinny[i].flag = 0; + cpu_to_le64(sg_dma_address(os_sgl)); + mfi_sgl->sge_skinny[i].flag = cpu_to_le32(0); } } return sge_count; @@ -1155,8 +1159,8 @@ megasas_build_dcdb(struct megasas_instance *instance, struct scsi_cmnd *scp, pthru->cdb_len = scp->cmd_len; pthru->timeout = 0; pthru->pad_0 = 0; - pthru->flags = flags; - pthru->data_xfer_len = scsi_bufflen(scp); + pthru->flags = cpu_to_le16(flags); + pthru->data_xfer_len = cpu_to_le32(scsi_bufflen(scp)); memcpy(pthru->cdb, scp->cmnd, scp->cmd_len); @@ -1168,18 +1172,18 @@ megasas_build_dcdb(struct megasas_instance *instance, struct scsi_cmnd *scp, if ((scp->request->timeout / HZ) > 0xFFFF) pthru->timeout = 0xFFFF; else - pthru->timeout = scp->request->timeout / HZ; + pthru->timeout = cpu_to_le16(scp->request->timeout / HZ); } /* * Construct SGL */ if (instance->flag_ieee == 1) { - pthru->flags |= MFI_FRAME_SGL64; + pthru->flags |= cpu_to_le16(MFI_FRAME_SGL64); pthru->sge_count = megasas_make_sgl_skinny(instance, scp, &pthru->sgl); } else if (IS_DMA64) { - pthru->flags |= MFI_FRAME_SGL64; + pthru->flags |= cpu_to_le16(MFI_FRAME_SGL64); pthru->sge_count = megasas_make_sgl64(instance, scp, &pthru->sgl); } else @@ -1196,8 +1200,10 @@ megasas_build_dcdb(struct megasas_instance *instance, struct scsi_cmnd *scp, * Sense info specific */ pthru->sense_len = SCSI_SENSE_BUFFERSIZE; - pthru->sense_buf_phys_addr_hi = 0; - pthru->sense_buf_phys_addr_lo = cmd->sense_phys_addr; + pthru->sense_buf_phys_addr_hi = + cpu_to_le32(upper_32_bits(cmd->sense_phys_addr)); + pthru->sense_buf_phys_addr_lo = + cpu_to_le32(lower_32_bits(cmd->sense_phys_addr)); /* * Compute the total number of frames this command consumes. FW uses @@ -1248,7 +1254,7 @@ megasas_build_ldio(struct megasas_instance *instance, struct scsi_cmnd *scp, ldio->timeout = 0; ldio->reserved_0 = 0; ldio->pad_0 = 0; - ldio->flags = flags; + ldio->flags = cpu_to_le16(flags); ldio->start_lba_hi = 0; ldio->access_byte = (scp->cmd_len != 6) ? scp->cmnd[1] : 0; @@ -1256,52 +1262,59 @@ megasas_build_ldio(struct megasas_instance *instance, struct scsi_cmnd *scp, * 6-byte READ(0x08) or WRITE(0x0A) cdb */ if (scp->cmd_len == 6) { - ldio->lba_count = (u32) scp->cmnd[4]; - ldio->start_lba_lo = ((u32) scp->cmnd[1] << 16) | - ((u32) scp->cmnd[2] << 8) | (u32) scp->cmnd[3]; + ldio->lba_count = cpu_to_le32((u32) scp->cmnd[4]); + ldio->start_lba_lo = cpu_to_le32(((u32) scp->cmnd[1] << 16) | + ((u32) scp->cmnd[2] << 8) | + (u32) scp->cmnd[3]); - ldio->start_lba_lo &= 0x1FFFFF; + ldio->start_lba_lo &= cpu_to_le32(0x1FFFFF); } /* * 10-byte READ(0x28) or WRITE(0x2A) cdb */ else if (scp->cmd_len == 10) { - ldio->lba_count = (u32) scp->cmnd[8] | - ((u32) scp->cmnd[7] << 8); - ldio->start_lba_lo = ((u32) scp->cmnd[2] << 24) | - ((u32) scp->cmnd[3] << 16) | - ((u32) scp->cmnd[4] << 8) | (u32) scp->cmnd[5]; + ldio->lba_count = cpu_to_le32((u32) scp->cmnd[8] | + ((u32) scp->cmnd[7] << 8)); + ldio->start_lba_lo = cpu_to_le32(((u32) scp->cmnd[2] << 24) | + ((u32) scp->cmnd[3] << 16) | + ((u32) scp->cmnd[4] << 8) | + (u32) scp->cmnd[5]); } /* * 12-byte READ(0xA8) or WRITE(0xAA) cdb */ else if (scp->cmd_len == 12) { - ldio->lba_count = ((u32) scp->cmnd[6] << 24) | - ((u32) scp->cmnd[7] << 16) | - ((u32) scp->cmnd[8] << 8) | (u32) scp->cmnd[9]; + ldio->lba_count = cpu_to_le32(((u32) scp->cmnd[6] << 24) | + ((u32) scp->cmnd[7] << 16) | + ((u32) scp->cmnd[8] << 8) | + (u32) scp->cmnd[9]); - ldio->start_lba_lo = ((u32) scp->cmnd[2] << 24) | - ((u32) scp->cmnd[3] << 16) | - ((u32) scp->cmnd[4] << 8) | (u32) scp->cmnd[5]; + ldio->start_lba_lo = cpu_to_le32(((u32) scp->cmnd[2] << 24) | + ((u32) scp->cmnd[3] << 16) | + ((u32) scp->cmnd[4] << 8) | + (u32) scp->cmnd[5]); } /* * 16-byte READ(0x88) or WRITE(0x8A) cdb */ else if (scp->cmd_len == 16) { - ldio->lba_count = ((u32) scp->cmnd[10] << 24) | - ((u32) scp->cmnd[11] << 16) | - ((u32) scp->cmnd[12] << 8) | (u32) scp->cmnd[13]; + ldio->lba_count = cpu_to_le32(((u32) scp->cmnd[10] << 24) | + ((u32) scp->cmnd[11] << 16) | + ((u32) scp->cmnd[12] << 8) | + (u32) scp->cmnd[13]); - ldio->start_lba_lo = ((u32) scp->cmnd[6] << 24) | - ((u32) scp->cmnd[7] << 16) | - ((u32) scp->cmnd[8] << 8) | (u32) scp->cmnd[9]; + ldio->start_lba_lo = cpu_to_le32(((u32) scp->cmnd[6] << 24) | + ((u32) scp->cmnd[7] << 16) | + ((u32) scp->cmnd[8] << 8) | + (u32) scp->cmnd[9]); - ldio->start_lba_hi = ((u32) scp->cmnd[2] << 24) | - ((u32) scp->cmnd[3] << 16) | - ((u32) scp->cmnd[4] << 8) | (u32) scp->cmnd[5]; + ldio->start_lba_hi = cpu_to_le32(((u32) scp->cmnd[2] << 24) | + ((u32) scp->cmnd[3] << 16) | + ((u32) scp->cmnd[4] << 8) | + (u32) scp->cmnd[5]); } @@ -1309,11 +1322,11 @@ megasas_build_ldio(struct megasas_instance *instance, struct scsi_cmnd *scp, * Construct SGL */ if (instance->flag_ieee) { - ldio->flags |= MFI_FRAME_SGL64; + ldio->flags |= cpu_to_le16(MFI_FRAME_SGL64); ldio->sge_count = megasas_make_sgl_skinny(instance, scp, &ldio->sgl); } else if (IS_DMA64) { - ldio->flags |= MFI_FRAME_SGL64; + ldio->flags |= cpu_to_le16(MFI_FRAME_SGL64); ldio->sge_count = megasas_make_sgl64(instance, scp, &ldio->sgl); } else ldio->sge_count = megasas_make_sgl32(instance, scp, &ldio->sgl); @@ -1329,7 +1342,7 @@ megasas_build_ldio(struct megasas_instance *instance, struct scsi_cmnd *scp, */ ldio->sense_len = SCSI_SENSE_BUFFERSIZE; ldio->sense_buf_phys_addr_hi = 0; - ldio->sense_buf_phys_addr_lo = cmd->sense_phys_addr; + ldio->sense_buf_phys_addr_lo = cpu_to_le32(cmd->sense_phys_addr); /* * Compute the total number of frames this command consumes. FW uses @@ -1400,20 +1413,32 @@ megasas_dump_pending_frames(struct megasas_instance *instance) ldio = (struct megasas_io_frame *)cmd->frame; mfi_sgl = &ldio->sgl; sgcount = ldio->sge_count; - printk(KERN_ERR "megasas[%d]: frame count : 0x%x, Cmd : 0x%x, Tgt id : 0x%x, lba lo : 0x%x, lba_hi : 0x%x, sense_buf addr : 0x%x,sge count : 0x%x\n",instance->host->host_no, cmd->frame_count,ldio->cmd,ldio->target_id, ldio->start_lba_lo,ldio->start_lba_hi,ldio->sense_buf_phys_addr_lo,sgcount); + printk(KERN_ERR "megasas[%d]: frame count : 0x%x, Cmd : 0x%x, Tgt id : 0x%x," + " lba lo : 0x%x, lba_hi : 0x%x, sense_buf addr : 0x%x,sge count : 0x%x\n", + instance->host->host_no, cmd->frame_count, ldio->cmd, ldio->target_id, + le32_to_cpu(ldio->start_lba_lo), le32_to_cpu(ldio->start_lba_hi), + le32_to_cpu(ldio->sense_buf_phys_addr_lo), sgcount); } else { pthru = (struct megasas_pthru_frame *) cmd->frame; mfi_sgl = &pthru->sgl; sgcount = pthru->sge_count; - printk(KERN_ERR "megasas[%d]: frame count : 0x%x, Cmd : 0x%x, Tgt id : 0x%x, lun : 0x%x, cdb_len : 0x%x, data xfer len : 0x%x, sense_buf addr : 0x%x,sge count : 0x%x\n",instance->host->host_no,cmd->frame_count,pthru->cmd,pthru->target_id,pthru->lun,pthru->cdb_len , pthru->data_xfer_len,pthru->sense_buf_phys_addr_lo,sgcount); + printk(KERN_ERR "megasas[%d]: frame count : 0x%x, Cmd : 0x%x, Tgt id : 0x%x, " + "lun : 0x%x, cdb_len : 0x%x, data xfer len : 0x%x, sense_buf addr : 0x%x,sge count : 0x%x\n", + instance->host->host_no, cmd->frame_count, pthru->cmd, pthru->target_id, + pthru->lun, pthru->cdb_len, le32_to_cpu(pthru->data_xfer_len), + le32_to_cpu(pthru->sense_buf_phys_addr_lo), sgcount); } if(megasas_dbg_lvl & MEGASAS_DBG_LVL){ for (n = 0; n < sgcount; n++){ if (IS_DMA64) - printk(KERN_ERR "megasas: sgl len : 0x%x, sgl addr : 0x%08lx ",mfi_sgl->sge64[n].length , (unsigned long)mfi_sgl->sge64[n].phys_addr) ; + printk(KERN_ERR "megasas: sgl len : 0x%x, sgl addr : 0x%llx ", + le32_to_cpu(mfi_sgl->sge64[n].length), + le64_to_cpu(mfi_sgl->sge64[n].phys_addr)); else - printk(KERN_ERR "megasas: sgl len : 0x%x, sgl addr : 0x%x ",mfi_sgl->sge32[n].length , mfi_sgl->sge32[n].phys_addr) ; + printk(KERN_ERR "megasas: sgl len : 0x%x, sgl addr : 0x%x ", + le32_to_cpu(mfi_sgl->sge32[n].length), + le32_to_cpu(mfi_sgl->sge32[n].phys_addr)); } } printk(KERN_ERR "\n"); @@ -1674,11 +1699,11 @@ static void megasas_complete_cmd_dpc(unsigned long instance_addr) spin_lock_irqsave(&instance->completion_lock, flags); - producer = *instance->producer; - consumer = *instance->consumer; + producer = le32_to_cpu(*instance->producer); + consumer = le32_to_cpu(*instance->consumer); while (consumer != producer) { - context = instance->reply_queue[consumer]; + context = le32_to_cpu(instance->reply_queue[consumer]); if (context >= instance->max_fw_cmds) { printk(KERN_ERR "Unexpected context value %x\n", context); @@ -1695,7 +1720,7 @@ static void megasas_complete_cmd_dpc(unsigned long instance_addr) } } - *instance->consumer = producer; + *instance->consumer = cpu_to_le32(producer); spin_unlock_irqrestore(&instance->completion_lock, flags); @@ -1716,7 +1741,7 @@ void megasas_do_ocr(struct megasas_instance *instance) if ((instance->pdev->device == PCI_DEVICE_ID_LSI_SAS1064R) || (instance->pdev->device == PCI_DEVICE_ID_DELL_PERC5) || (instance->pdev->device == PCI_DEVICE_ID_LSI_VERDE_ZCR)) { - *instance->consumer = MEGASAS_ADPRESET_INPROG_SIGN; + *instance->consumer = cpu_to_le32(MEGASAS_ADPRESET_INPROG_SIGN); } instance->instancet->disable_intr(instance); instance->adprecovery = MEGASAS_ADPRESET_SM_INFAULT; @@ -2186,6 +2211,7 @@ megasas_complete_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd, struct megasas_header *hdr = &cmd->frame->hdr; unsigned long flags; struct fusion_context *fusion = instance->ctrl_context; + u32 opcode; /* flag for the retry reset */ cmd->retry_for_fw_reset = 0; @@ -2287,9 +2313,10 @@ megasas_complete_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd, case MFI_CMD_SMP: case MFI_CMD_STP: case MFI_CMD_DCMD: + opcode = le32_to_cpu(cmd->frame->dcmd.opcode); /* Check for LD map update */ - if ((cmd->frame->dcmd.opcode == MR_DCMD_LD_MAP_GET_INFO) && - (cmd->frame->dcmd.mbox.b[1] == 1)) { + if ((opcode == MR_DCMD_LD_MAP_GET_INFO) + && (cmd->frame->dcmd.mbox.b[1] == 1)) { fusion->fast_path_io = 0; spin_lock_irqsave(instance->host->host_lock, flags); if (cmd->frame->hdr.cmd_status != 0) { @@ -2323,8 +2350,8 @@ megasas_complete_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd, flags); break; } - if (cmd->frame->dcmd.opcode == MR_DCMD_CTRL_EVENT_GET_INFO || - cmd->frame->dcmd.opcode == MR_DCMD_CTRL_EVENT_GET) { + if (opcode == MR_DCMD_CTRL_EVENT_GET_INFO || + opcode == MR_DCMD_CTRL_EVENT_GET) { spin_lock_irqsave(&poll_aen_lock, flags); megasas_poll_wait_aen = 0; spin_unlock_irqrestore(&poll_aen_lock, flags); @@ -2333,7 +2360,7 @@ megasas_complete_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd, /* * See if got an event notification */ - if (cmd->frame->dcmd.opcode == MR_DCMD_CTRL_EVENT_WAIT) + if (opcode == MR_DCMD_CTRL_EVENT_WAIT) megasas_service_aen(instance, cmd); else megasas_complete_int_cmd(instance, cmd); @@ -2606,7 +2633,7 @@ megasas_deplete_reply_queue(struct megasas_instance *instance, PCI_DEVICE_ID_LSI_VERDE_ZCR)) { *instance->consumer = - MEGASAS_ADPRESET_INPROG_SIGN; + cpu_to_le32(MEGASAS_ADPRESET_INPROG_SIGN); } @@ -2983,7 +3010,7 @@ static int megasas_create_frame_pool(struct megasas_instance *instance) } memset(cmd->frame, 0, total_sz); - cmd->frame->io.context = cmd->index; + cmd->frame->io.context = cpu_to_le32(cmd->index); cmd->frame->io.pad_0 = 0; if ((instance->pdev->device != PCI_DEVICE_ID_LSI_FUSION) && (instance->pdev->device != PCI_DEVICE_ID_LSI_INVADER) && @@ -3143,13 +3170,13 @@ megasas_get_pd_list(struct megasas_instance *instance) dcmd->cmd = MFI_CMD_DCMD; dcmd->cmd_status = 0xFF; dcmd->sge_count = 1; - dcmd->flags = MFI_FRAME_DIR_READ; + dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_READ); dcmd->timeout = 0; dcmd->pad_0 = 0; - dcmd->data_xfer_len = MEGASAS_MAX_PD * sizeof(struct MR_PD_LIST); - dcmd->opcode = MR_DCMD_PD_LIST_QUERY; - dcmd->sgl.sge32[0].phys_addr = ci_h; - dcmd->sgl.sge32[0].length = MEGASAS_MAX_PD * sizeof(struct MR_PD_LIST); + dcmd->data_xfer_len = cpu_to_le32(MEGASAS_MAX_PD * sizeof(struct MR_PD_LIST)); + dcmd->opcode = cpu_to_le32(MR_DCMD_PD_LIST_QUERY); + dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(ci_h); + dcmd->sgl.sge32[0].length = cpu_to_le32(MEGASAS_MAX_PD * sizeof(struct MR_PD_LIST)); if (!megasas_issue_polled(instance, cmd)) { ret = 0; @@ -3164,16 +3191,16 @@ megasas_get_pd_list(struct megasas_instance *instance) pd_addr = ci->addr; if ( ret == 0 && - (ci->count < + (le32_to_cpu(ci->count) < (MEGASAS_MAX_PD_CHANNELS * MEGASAS_MAX_DEV_PER_CHANNEL))) { memset(instance->pd_list, 0, MEGASAS_MAX_PD * sizeof(struct megasas_pd_list)); - for (pd_index = 0; pd_index < ci->count; pd_index++) { + for (pd_index = 0; pd_index < le32_to_cpu(ci->count); pd_index++) { instance->pd_list[pd_addr->deviceId].tid = - pd_addr->deviceId; + le16_to_cpu(pd_addr->deviceId); instance->pd_list[pd_addr->deviceId].driveType = pd_addr->scsiDevType; instance->pd_list[pd_addr->deviceId].driveState = @@ -3207,6 +3234,7 @@ megasas_get_ld_list(struct megasas_instance *instance) struct megasas_dcmd_frame *dcmd; struct MR_LD_LIST *ci; dma_addr_t ci_h = 0; + u32 ld_count; cmd = megasas_get_cmd(instance); @@ -3233,12 +3261,12 @@ megasas_get_ld_list(struct megasas_instance *instance) dcmd->cmd = MFI_CMD_DCMD; dcmd->cmd_status = 0xFF; dcmd->sge_count = 1; - dcmd->flags = MFI_FRAME_DIR_READ; + dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_READ); dcmd->timeout = 0; - dcmd->data_xfer_len = sizeof(struct MR_LD_LIST); - dcmd->opcode = MR_DCMD_LD_GET_LIST; - dcmd->sgl.sge32[0].phys_addr = ci_h; - dcmd->sgl.sge32[0].length = sizeof(struct MR_LD_LIST); + dcmd->data_xfer_len = cpu_to_le32(sizeof(struct MR_LD_LIST)); + dcmd->opcode = cpu_to_le32(MR_DCMD_LD_GET_LIST); + dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(ci_h); + dcmd->sgl.sge32[0].length = cpu_to_le32(sizeof(struct MR_LD_LIST)); dcmd->pad_0 = 0; if (!megasas_issue_polled(instance, cmd)) { @@ -3247,12 +3275,14 @@ megasas_get_ld_list(struct megasas_instance *instance) ret = -1; } + ld_count = le32_to_cpu(ci->ldCount); + /* the following function will get the instance PD LIST */ - if ((ret == 0) && (ci->ldCount <= MAX_LOGICAL_DRIVES)) { + if ((ret == 0) && (ld_count <= MAX_LOGICAL_DRIVES)) { memset(instance->ld_ids, 0xff, MEGASAS_MAX_LD_IDS); - for (ld_index = 0; ld_index < ci->ldCount; ld_index++) { + for (ld_index = 0; ld_index < ld_count; ld_index++) { if (ci->ldList[ld_index].state != 0) { ids = ci->ldList[ld_index].ref.targetId; instance->ld_ids[ids] = @@ -3271,6 +3301,87 @@ megasas_get_ld_list(struct megasas_instance *instance) } /** + * megasas_ld_list_query - Returns FW's ld_list structure + * @instance: Adapter soft state + * @ld_list: ld_list structure + * + * Issues an internal command (DCMD) to get the FW's controller PD + * list structure. This information is mainly used to find out SYSTEM + * supported by the FW. + */ +static int +megasas_ld_list_query(struct megasas_instance *instance, u8 query_type) +{ + int ret = 0, ld_index = 0, ids = 0; + struct megasas_cmd *cmd; + struct megasas_dcmd_frame *dcmd; + struct MR_LD_TARGETID_LIST *ci; + dma_addr_t ci_h = 0; + u32 tgtid_count; + + cmd = megasas_get_cmd(instance); + + if (!cmd) { + printk(KERN_WARNING + "megasas:(megasas_ld_list_query): Failed to get cmd\n"); + return -ENOMEM; + } + + dcmd = &cmd->frame->dcmd; + + ci = pci_alloc_consistent(instance->pdev, + sizeof(struct MR_LD_TARGETID_LIST), &ci_h); + + if (!ci) { + printk(KERN_WARNING + "megasas: Failed to alloc mem for ld_list_query\n"); + megasas_return_cmd(instance, cmd); + return -ENOMEM; + } + + memset(ci, 0, sizeof(*ci)); + memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE); + + dcmd->mbox.b[0] = query_type; + + dcmd->cmd = MFI_CMD_DCMD; + dcmd->cmd_status = 0xFF; + dcmd->sge_count = 1; + dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_READ); + dcmd->timeout = 0; + dcmd->data_xfer_len = cpu_to_le32(sizeof(struct MR_LD_TARGETID_LIST)); + dcmd->opcode = cpu_to_le32(MR_DCMD_LD_LIST_QUERY); + dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(ci_h); + dcmd->sgl.sge32[0].length = cpu_to_le32(sizeof(struct MR_LD_TARGETID_LIST)); + dcmd->pad_0 = 0; + + if (!megasas_issue_polled(instance, cmd) && !dcmd->cmd_status) { + ret = 0; + } else { + /* On failure, call older LD list DCMD */ + ret = 1; + } + + tgtid_count = le32_to_cpu(ci->count); + + if ((ret == 0) && (tgtid_count <= (MAX_LOGICAL_DRIVES))) { + memset(instance->ld_ids, 0xff, MEGASAS_MAX_LD_IDS); + for (ld_index = 0; ld_index < tgtid_count; ld_index++) { + ids = ci->targetId[ld_index]; + instance->ld_ids[ids] = ci->targetId[ld_index]; + } + + } + + pci_free_consistent(instance->pdev, sizeof(struct MR_LD_TARGETID_LIST), + ci, ci_h); + + megasas_return_cmd(instance, cmd); + + return ret; +} + +/** * megasas_get_controller_info - Returns FW's controller structure * @instance: Adapter soft state * @ctrl_info: Controller information structure @@ -3313,13 +3424,13 @@ megasas_get_ctrl_info(struct megasas_instance *instance, dcmd->cmd = MFI_CMD_DCMD; dcmd->cmd_status = 0xFF; dcmd->sge_count = 1; - dcmd->flags = MFI_FRAME_DIR_READ; + dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_READ); dcmd->timeout = 0; dcmd->pad_0 = 0; - dcmd->data_xfer_len = sizeof(struct megasas_ctrl_info); - dcmd->opcode = MR_DCMD_CTRL_GET_INFO; - dcmd->sgl.sge32[0].phys_addr = ci_h; - dcmd->sgl.sge32[0].length = sizeof(struct megasas_ctrl_info); + dcmd->data_xfer_len = cpu_to_le32(sizeof(struct megasas_ctrl_info)); + dcmd->opcode = cpu_to_le32(MR_DCMD_CTRL_GET_INFO); + dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(ci_h); + dcmd->sgl.sge32[0].length = cpu_to_le32(sizeof(struct megasas_ctrl_info)); if (!megasas_issue_polled(instance, cmd)) { ret = 0; @@ -3375,17 +3486,20 @@ megasas_issue_init_mfi(struct megasas_instance *instance) memset(initq_info, 0, sizeof(struct megasas_init_queue_info)); init_frame->context = context; - initq_info->reply_queue_entries = instance->max_fw_cmds + 1; - initq_info->reply_queue_start_phys_addr_lo = instance->reply_queue_h; + initq_info->reply_queue_entries = cpu_to_le32(instance->max_fw_cmds + 1); + initq_info->reply_queue_start_phys_addr_lo = cpu_to_le32(instance->reply_queue_h); - initq_info->producer_index_phys_addr_lo = instance->producer_h; - initq_info->consumer_index_phys_addr_lo = instance->consumer_h; + initq_info->producer_index_phys_addr_lo = cpu_to_le32(instance->producer_h); + initq_info->consumer_index_phys_addr_lo = cpu_to_le32(instance->consumer_h); init_frame->cmd = MFI_CMD_INIT; init_frame->cmd_status = 0xFF; - init_frame->queue_info_new_phys_addr_lo = initq_info_h; + init_frame->queue_info_new_phys_addr_lo = + cpu_to_le32(lower_32_bits(initq_info_h)); + init_frame->queue_info_new_phys_addr_hi = + cpu_to_le32(upper_32_bits(initq_info_h)); - init_frame->data_xfer_len = sizeof(struct megasas_init_queue_info); + init_frame->data_xfer_len = cpu_to_le32(sizeof(struct megasas_init_queue_info)); /* * disable the intr before firing the init frame to FW @@ -3648,7 +3762,9 @@ static int megasas_init_fw(struct megasas_instance *instance) megasas_get_pd_list(instance); memset(instance->ld_ids, 0xff, MEGASAS_MAX_LD_IDS); - megasas_get_ld_list(instance); + if (megasas_ld_list_query(instance, + MR_LD_QUERY_TYPE_EXPOSED_TO_HOST)) + megasas_get_ld_list(instance); ctrl_info = kmalloc(sizeof(struct megasas_ctrl_info), GFP_KERNEL); @@ -3665,8 +3781,8 @@ static int megasas_init_fw(struct megasas_instance *instance) if (ctrl_info && !megasas_get_ctrl_info(instance, ctrl_info)) { max_sectors_1 = (1 << ctrl_info->stripe_sz_ops.min) * - ctrl_info->max_strips_per_io; - max_sectors_2 = ctrl_info->max_request_size; + le16_to_cpu(ctrl_info->max_strips_per_io); + max_sectors_2 = le32_to_cpu(ctrl_info->max_request_size); tmp_sectors = min_t(u32, max_sectors_1 , max_sectors_2); @@ -3675,14 +3791,18 @@ static int megasas_init_fw(struct megasas_instance *instance) instance->is_imr = 0; dev_info(&instance->pdev->dev, "Controller type: MR," "Memory size is: %dMB\n", - ctrl_info->memory_size); + le16_to_cpu(ctrl_info->memory_size)); } else { instance->is_imr = 1; dev_info(&instance->pdev->dev, "Controller type: iMR\n"); } + /* OnOffProperties are converted into CPU arch*/ + le32_to_cpus((u32 *)&ctrl_info->properties.OnOffProperties); instance->disableOnlineCtrlReset = ctrl_info->properties.OnOffProperties.disableOnlineCtrlReset; + /* adapterOperations2 are converted into CPU arch*/ + le32_to_cpus((u32 *)&ctrl_info->adapterOperations2); instance->UnevenSpanSupport = ctrl_info->adapterOperations2.supportUnevenSpans; if (instance->UnevenSpanSupport) { @@ -3696,7 +3816,6 @@ static int megasas_init_fw(struct megasas_instance *instance) } } - instance->max_sectors_per_req = instance->max_num_sge * PAGE_SIZE / 512; if (tmp_sectors && (instance->max_sectors_per_req > tmp_sectors)) @@ -3802,20 +3921,24 @@ megasas_get_seq_num(struct megasas_instance *instance, dcmd->cmd = MFI_CMD_DCMD; dcmd->cmd_status = 0x0; dcmd->sge_count = 1; - dcmd->flags = MFI_FRAME_DIR_READ; + dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_READ); dcmd->timeout = 0; dcmd->pad_0 = 0; - dcmd->data_xfer_len = sizeof(struct megasas_evt_log_info); - dcmd->opcode = MR_DCMD_CTRL_EVENT_GET_INFO; - dcmd->sgl.sge32[0].phys_addr = el_info_h; - dcmd->sgl.sge32[0].length = sizeof(struct megasas_evt_log_info); + dcmd->data_xfer_len = cpu_to_le32(sizeof(struct megasas_evt_log_info)); + dcmd->opcode = cpu_to_le32(MR_DCMD_CTRL_EVENT_GET_INFO); + dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(el_info_h); + dcmd->sgl.sge32[0].length = cpu_to_le32(sizeof(struct megasas_evt_log_info)); megasas_issue_blocked_cmd(instance, cmd); /* * Copy the data back into callers buffer */ - memcpy(eli, el_info, sizeof(struct megasas_evt_log_info)); + eli->newest_seq_num = le32_to_cpu(el_info->newest_seq_num); + eli->oldest_seq_num = le32_to_cpu(el_info->oldest_seq_num); + eli->clear_seq_num = le32_to_cpu(el_info->clear_seq_num); + eli->shutdown_seq_num = le32_to_cpu(el_info->shutdown_seq_num); + eli->boot_seq_num = le32_to_cpu(el_info->boot_seq_num); pci_free_consistent(instance->pdev, sizeof(struct megasas_evt_log_info), el_info, el_info_h); @@ -3862,6 +3985,7 @@ megasas_register_aen(struct megasas_instance *instance, u32 seq_num, if (instance->aen_cmd) { prev_aen.word = instance->aen_cmd->frame->dcmd.mbox.w[1]; + prev_aen.members.locale = le16_to_cpu(prev_aen.members.locale); /* * A class whose enum value is smaller is inclusive of all @@ -3874,7 +3998,7 @@ megasas_register_aen(struct megasas_instance *instance, u32 seq_num, * values */ if ((prev_aen.members.class <= curr_aen.members.class) && - !((prev_aen.members.locale & curr_aen.members.locale) ^ + !((le16_to_cpu(prev_aen.members.locale) & curr_aen.members.locale) ^ curr_aen.members.locale)) { /* * Previously issued event registration includes @@ -3882,7 +4006,7 @@ megasas_register_aen(struct megasas_instance *instance, u32 seq_num, */ return 0; } else { - curr_aen.members.locale |= prev_aen.members.locale; + curr_aen.members.locale |= le16_to_cpu(prev_aen.members.locale); if (prev_aen.members.class < curr_aen.members.class) curr_aen.members.class = prev_aen.members.class; @@ -3917,16 +4041,16 @@ megasas_register_aen(struct megasas_instance *instance, u32 seq_num, dcmd->cmd = MFI_CMD_DCMD; dcmd->cmd_status = 0x0; dcmd->sge_count = 1; - dcmd->flags = MFI_FRAME_DIR_READ; + dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_READ); dcmd->timeout = 0; dcmd->pad_0 = 0; + dcmd->data_xfer_len = cpu_to_le32(sizeof(struct megasas_evt_detail)); + dcmd->opcode = cpu_to_le32(MR_DCMD_CTRL_EVENT_WAIT); + dcmd->mbox.w[0] = cpu_to_le32(seq_num); instance->last_seq_num = seq_num; - dcmd->data_xfer_len = sizeof(struct megasas_evt_detail); - dcmd->opcode = MR_DCMD_CTRL_EVENT_WAIT; - dcmd->mbox.w[0] = seq_num; - dcmd->mbox.w[1] = curr_aen.word; - dcmd->sgl.sge32[0].phys_addr = (u32) instance->evt_detail_h; - dcmd->sgl.sge32[0].length = sizeof(struct megasas_evt_detail); + dcmd->mbox.w[1] = cpu_to_le32(curr_aen.word); + dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(instance->evt_detail_h); + dcmd->sgl.sge32[0].length = cpu_to_le32(sizeof(struct megasas_evt_detail)); if (instance->aen_cmd != NULL) { megasas_return_cmd(instance, cmd); @@ -3972,8 +4096,9 @@ static int megasas_start_aen(struct megasas_instance *instance) class_locale.members.locale = MR_EVT_LOCALE_ALL; class_locale.members.class = MR_EVT_CLASS_DEBUG; - return megasas_register_aen(instance, eli.newest_seq_num + 1, - class_locale.word); + return megasas_register_aen(instance, + le32_to_cpu(eli.newest_seq_num) + 1, + class_locale.word); } /** @@ -4068,6 +4193,7 @@ megasas_set_dma_mask(struct pci_dev *pdev) if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) goto fail_set_dma_mask; } + return 0; fail_set_dma_mask: @@ -4386,11 +4512,11 @@ static void megasas_flush_cache(struct megasas_instance *instance) dcmd->cmd = MFI_CMD_DCMD; dcmd->cmd_status = 0x0; dcmd->sge_count = 0; - dcmd->flags = MFI_FRAME_DIR_NONE; + dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_NONE); dcmd->timeout = 0; dcmd->pad_0 = 0; dcmd->data_xfer_len = 0; - dcmd->opcode = MR_DCMD_CTRL_CACHE_FLUSH; + dcmd->opcode = cpu_to_le32(MR_DCMD_CTRL_CACHE_FLUSH); dcmd->mbox.b[0] = MR_FLUSH_CTRL_CACHE | MR_FLUSH_DISK_CACHE; megasas_issue_blocked_cmd(instance, cmd); @@ -4431,11 +4557,11 @@ static void megasas_shutdown_controller(struct megasas_instance *instance, dcmd->cmd = MFI_CMD_DCMD; dcmd->cmd_status = 0x0; dcmd->sge_count = 0; - dcmd->flags = MFI_FRAME_DIR_NONE; + dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_NONE); dcmd->timeout = 0; dcmd->pad_0 = 0; dcmd->data_xfer_len = 0; - dcmd->opcode = opcode; + dcmd->opcode = cpu_to_le32(opcode); megasas_issue_blocked_cmd(instance, cmd); @@ -4850,10 +4976,11 @@ megasas_mgmt_fw_ioctl(struct megasas_instance *instance, * alone separately */ memcpy(cmd->frame, ioc->frame.raw, 2 * MEGAMFI_FRAME_SIZE); - cmd->frame->hdr.context = cmd->index; + cmd->frame->hdr.context = cpu_to_le32(cmd->index); cmd->frame->hdr.pad_0 = 0; - cmd->frame->hdr.flags &= ~(MFI_FRAME_IEEE | MFI_FRAME_SGL64 | - MFI_FRAME_SENSE64); + cmd->frame->hdr.flags &= cpu_to_le16(~(MFI_FRAME_IEEE | + MFI_FRAME_SGL64 | + MFI_FRAME_SENSE64)); /* * The management interface between applications and the fw uses @@ -4887,8 +5014,8 @@ megasas_mgmt_fw_ioctl(struct megasas_instance *instance, * We don't change the dma_coherent_mask, so * pci_alloc_consistent only returns 32bit addresses */ - kern_sge32[i].phys_addr = (u32) buf_handle; - kern_sge32[i].length = ioc->sgl[i].iov_len; + kern_sge32[i].phys_addr = cpu_to_le32(buf_handle); + kern_sge32[i].length = cpu_to_le32(ioc->sgl[i].iov_len); /* * We created a kernel buffer corresponding to the @@ -4911,7 +5038,7 @@ megasas_mgmt_fw_ioctl(struct megasas_instance *instance, sense_ptr = (unsigned long *) ((unsigned long)cmd->frame + ioc->sense_off); - *sense_ptr = sense_handle; + *sense_ptr = cpu_to_le32(sense_handle); } /* @@ -4971,9 +5098,9 @@ megasas_mgmt_fw_ioctl(struct megasas_instance *instance, for (i = 0; i < ioc->sge_count; i++) { if (kbuff_arr[i]) dma_free_coherent(&instance->pdev->dev, - kern_sge32[i].length, + le32_to_cpu(kern_sge32[i].length), kbuff_arr[i], - kern_sge32[i].phys_addr); + le32_to_cpu(kern_sge32[i].phys_addr)); } megasas_return_cmd(instance, cmd); @@ -5327,7 +5454,7 @@ megasas_aen_polling(struct work_struct *work) host = instance->host; if (instance->evt_detail) { - switch (instance->evt_detail->code) { + switch (le32_to_cpu(instance->evt_detail->code)) { case MR_EVT_PD_INSERTED: if (megasas_get_pd_list(instance) == 0) { for (i = 0; i < MEGASAS_MAX_PD_CHANNELS; i++) { @@ -5389,7 +5516,9 @@ megasas_aen_polling(struct work_struct *work) case MR_EVT_LD_OFFLINE: case MR_EVT_CFG_CLEARED: case MR_EVT_LD_DELETED: - megasas_get_ld_list(instance); + if (megasas_ld_list_query(instance, + MR_LD_QUERY_TYPE_EXPOSED_TO_HOST)) + megasas_get_ld_list(instance); for (i = 0; i < MEGASAS_MAX_LD_CHANNELS; i++) { for (j = 0; j < MEGASAS_MAX_DEV_PER_CHANNEL; @@ -5399,7 +5528,7 @@ megasas_aen_polling(struct work_struct *work) (i * MEGASAS_MAX_DEV_PER_CHANNEL) + j; sdev1 = scsi_device_lookup(host, - i + MEGASAS_MAX_LD_CHANNELS, + MEGASAS_MAX_PD_CHANNELS + i, j, 0); @@ -5418,7 +5547,9 @@ megasas_aen_polling(struct work_struct *work) doscan = 0; break; case MR_EVT_LD_CREATED: - megasas_get_ld_list(instance); + if (megasas_ld_list_query(instance, + MR_LD_QUERY_TYPE_EXPOSED_TO_HOST)) + megasas_get_ld_list(instance); for (i = 0; i < MEGASAS_MAX_LD_CHANNELS; i++) { for (j = 0; j < MEGASAS_MAX_DEV_PER_CHANNEL; @@ -5427,14 +5558,14 @@ megasas_aen_polling(struct work_struct *work) (i * MEGASAS_MAX_DEV_PER_CHANNEL) + j; sdev1 = scsi_device_lookup(host, - i+MEGASAS_MAX_LD_CHANNELS, + MEGASAS_MAX_PD_CHANNELS + i, j, 0); if (instance->ld_ids[ld_index] != 0xff) { if (!sdev1) { scsi_add_device(host, - i + 2, + MEGASAS_MAX_PD_CHANNELS + i, j, 0); } } @@ -5483,18 +5614,20 @@ megasas_aen_polling(struct work_struct *work) } } - megasas_get_ld_list(instance); + if (megasas_ld_list_query(instance, + MR_LD_QUERY_TYPE_EXPOSED_TO_HOST)) + megasas_get_ld_list(instance); for (i = 0; i < MEGASAS_MAX_LD_CHANNELS; i++) { for (j = 0; j < MEGASAS_MAX_DEV_PER_CHANNEL; j++) { ld_index = (i * MEGASAS_MAX_DEV_PER_CHANNEL) + j; sdev1 = scsi_device_lookup(host, - i+MEGASAS_MAX_LD_CHANNELS, j, 0); + MEGASAS_MAX_PD_CHANNELS + i, j, 0); if (instance->ld_ids[ld_index] != 0xff) { if (!sdev1) { scsi_add_device(host, - i+2, + MEGASAS_MAX_PD_CHANNELS + i, j, 0); } else { scsi_device_put(sdev1); @@ -5514,7 +5647,7 @@ megasas_aen_polling(struct work_struct *work) return ; } - seq_num = instance->evt_detail->seq_num + 1; + seq_num = le32_to_cpu(instance->evt_detail->seq_num) + 1; /* Register AEN with FW for latest sequence number plus 1 */ class_locale.members.reserved = 0; diff --git a/drivers/scsi/megaraid/megaraid_sas_fp.c b/drivers/scsi/megaraid/megaraid_sas_fp.c index 4f401f753f8e..e24b6eb645b5 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fp.c +++ b/drivers/scsi/megaraid/megaraid_sas_fp.c @@ -126,17 +126,17 @@ static u8 MR_LdDataArmGet(u32 ld, u32 armIdx, struct MR_FW_RAID_MAP_ALL *map) return map->raidMap.ldSpanMap[ld].dataArmMap[armIdx]; } -static u16 MR_ArPdGet(u32 ar, u32 arm, struct MR_FW_RAID_MAP_ALL *map) +u16 MR_ArPdGet(u32 ar, u32 arm, struct MR_FW_RAID_MAP_ALL *map) { - return map->raidMap.arMapInfo[ar].pd[arm]; + return le16_to_cpu(map->raidMap.arMapInfo[ar].pd[arm]); } -static u16 MR_LdSpanArrayGet(u32 ld, u32 span, struct MR_FW_RAID_MAP_ALL *map) +u16 MR_LdSpanArrayGet(u32 ld, u32 span, struct MR_FW_RAID_MAP_ALL *map) { - return map->raidMap.ldSpanMap[ld].spanBlock[span].span.arrayRef; + return le16_to_cpu(map->raidMap.ldSpanMap[ld].spanBlock[span].span.arrayRef); } -static u16 MR_PdDevHandleGet(u32 pd, struct MR_FW_RAID_MAP_ALL *map) +u16 MR_PdDevHandleGet(u32 pd, struct MR_FW_RAID_MAP_ALL *map) { return map->raidMap.devHndlInfo[pd].curDevHdl; } @@ -148,7 +148,7 @@ u16 MR_GetLDTgtId(u32 ld, struct MR_FW_RAID_MAP_ALL *map) u16 MR_TargetIdToLdGet(u32 ldTgtId, struct MR_FW_RAID_MAP_ALL *map) { - return map->raidMap.ldTgtIdToLd[ldTgtId]; + return le16_to_cpu(map->raidMap.ldTgtIdToLd[ldTgtId]); } static struct MR_LD_SPAN *MR_LdSpanPtrGet(u32 ld, u32 span, @@ -167,18 +167,22 @@ u8 MR_ValidateMapInfo(struct megasas_instance *instance) struct LD_LOAD_BALANCE_INFO *lbInfo = fusion->load_balance_info; PLD_SPAN_INFO ldSpanInfo = fusion->log_to_span; struct MR_FW_RAID_MAP *pFwRaidMap = &map->raidMap; + struct MR_LD_RAID *raid; + int ldCount, num_lds; + u16 ld; + - if (pFwRaidMap->totalSize != + if (le32_to_cpu(pFwRaidMap->totalSize) != (sizeof(struct MR_FW_RAID_MAP) -sizeof(struct MR_LD_SPAN_MAP) + - (sizeof(struct MR_LD_SPAN_MAP) *pFwRaidMap->ldCount))) { + (sizeof(struct MR_LD_SPAN_MAP) * le32_to_cpu(pFwRaidMap->ldCount)))) { printk(KERN_ERR "megasas: map info structure size 0x%x is not matching with ld count\n", (unsigned int)((sizeof(struct MR_FW_RAID_MAP) - sizeof(struct MR_LD_SPAN_MAP)) + (sizeof(struct MR_LD_SPAN_MAP) * - pFwRaidMap->ldCount))); + le32_to_cpu(pFwRaidMap->ldCount)))); printk(KERN_ERR "megasas: span map %x, pFwRaidMap->totalSize " ": %x\n", (unsigned int)sizeof(struct MR_LD_SPAN_MAP), - pFwRaidMap->totalSize); + le32_to_cpu(pFwRaidMap->totalSize)); return 0; } @@ -187,6 +191,15 @@ u8 MR_ValidateMapInfo(struct megasas_instance *instance) mr_update_load_balance_params(map, lbInfo); + num_lds = le32_to_cpu(map->raidMap.ldCount); + + /*Convert Raid capability values to CPU arch */ + for (ldCount = 0; ldCount < num_lds; ldCount++) { + ld = MR_TargetIdToLdGet(ldCount, map); + raid = MR_LdRaidGet(ld, map); + le32_to_cpus((u32 *)&raid->capability); + } + return 1; } @@ -200,23 +213,20 @@ u32 MR_GetSpanBlock(u32 ld, u64 row, u64 *span_blk, for (span = 0; span < raid->spanDepth; span++, pSpanBlock++) { - for (j = 0; j < pSpanBlock->block_span_info.noElements; j++) { + for (j = 0; j < le32_to_cpu(pSpanBlock->block_span_info.noElements); j++) { quad = &pSpanBlock->block_span_info.quad[j]; - if (quad->diff == 0) + if (le32_to_cpu(quad->diff) == 0) return SPAN_INVALID; - if (quad->logStart <= row && row <= quad->logEnd && - (mega_mod64(row-quad->logStart, quad->diff)) == 0) { + if (le64_to_cpu(quad->logStart) <= row && row <= + le64_to_cpu(quad->logEnd) && (mega_mod64(row - le64_to_cpu(quad->logStart), + le32_to_cpu(quad->diff))) == 0) { if (span_blk != NULL) { u64 blk, debugBlk; - blk = - mega_div64_32( - (row-quad->logStart), - quad->diff); + blk = mega_div64_32((row-le64_to_cpu(quad->logStart)), le32_to_cpu(quad->diff)); debugBlk = blk; - blk = (blk + quad->offsetInSpan) << - raid->stripeShift; + blk = (blk + le64_to_cpu(quad->offsetInSpan)) << raid->stripeShift; *span_blk = blk; } return span; @@ -257,8 +267,8 @@ static int getSpanInfo(struct MR_FW_RAID_MAP_ALL *map, PLD_SPAN_INFO ldSpanInfo) for (span = 0; span < raid->spanDepth; span++) dev_dbg(&instance->pdev->dev, "Span=%x," " number of quads=%x\n", span, - map->raidMap.ldSpanMap[ld].spanBlock[span]. - block_span_info.noElements); + le32_to_cpu(map->raidMap.ldSpanMap[ld].spanBlock[span]. + block_span_info.noElements)); for (element = 0; element < MAX_QUAD_DEPTH; element++) { span_set = &(ldSpanInfo[ld].span_set[element]); if (span_set->span_row_data_width == 0) @@ -286,22 +296,22 @@ static int getSpanInfo(struct MR_FW_RAID_MAP_ALL *map, PLD_SPAN_INFO ldSpanInfo) (long unsigned int)span_set->data_strip_end); for (span = 0; span < raid->spanDepth; span++) { - if (map->raidMap.ldSpanMap[ld].spanBlock[span]. - block_span_info.noElements >= + if (le32_to_cpu(map->raidMap.ldSpanMap[ld].spanBlock[span]. + block_span_info.noElements) >= element + 1) { quad = &map->raidMap.ldSpanMap[ld]. spanBlock[span].block_span_info. quad[element]; dev_dbg(&instance->pdev->dev, "Span=%x," "Quad=%x, diff=%x\n", span, - element, quad->diff); + element, le32_to_cpu(quad->diff)); dev_dbg(&instance->pdev->dev, "offset_in_span=0x%08lx\n", - (long unsigned int)quad->offsetInSpan); + (long unsigned int)le64_to_cpu(quad->offsetInSpan)); dev_dbg(&instance->pdev->dev, "logical start=0x%08lx, end=0x%08lx\n", - (long unsigned int)quad->logStart, - (long unsigned int)quad->logEnd); + (long unsigned int)le64_to_cpu(quad->logStart), + (long unsigned int)le64_to_cpu(quad->logEnd)); } } } @@ -348,23 +358,23 @@ u32 mr_spanset_get_span_block(struct megasas_instance *instance, continue; for (span = 0; span < raid->spanDepth; span++) - if (map->raidMap.ldSpanMap[ld].spanBlock[span]. - block_span_info.noElements >= info+1) { + if (le32_to_cpu(map->raidMap.ldSpanMap[ld].spanBlock[span]. + block_span_info.noElements) >= info+1) { quad = &map->raidMap.ldSpanMap[ld]. spanBlock[span]. block_span_info.quad[info]; - if (quad->diff == 0) + if (le32_to_cpu(quad->diff == 0)) return SPAN_INVALID; - if (quad->logStart <= row && - row <= quad->logEnd && - (mega_mod64(row - quad->logStart, - quad->diff)) == 0) { + if (le64_to_cpu(quad->logStart) <= row && + row <= le64_to_cpu(quad->logEnd) && + (mega_mod64(row - le64_to_cpu(quad->logStart), + le32_to_cpu(quad->diff))) == 0) { if (span_blk != NULL) { u64 blk; blk = mega_div64_32 - ((row - quad->logStart), - quad->diff); - blk = (blk + quad->offsetInSpan) + ((row - le64_to_cpu(quad->logStart)), + le32_to_cpu(quad->diff)); + blk = (blk + le64_to_cpu(quad->offsetInSpan)) << raid->stripeShift; *span_blk = blk; } @@ -415,8 +425,8 @@ static u64 get_row_from_strip(struct megasas_instance *instance, span_set_Row = mega_div64_32(span_set_Strip, span_set->span_row_data_width) * span_set->diff; for (span = 0, span_offset = 0; span < raid->spanDepth; span++) - if (map->raidMap.ldSpanMap[ld].spanBlock[span]. - block_span_info.noElements >= info+1) { + if (le32_to_cpu(map->raidMap.ldSpanMap[ld].spanBlock[span]. + block_span_info.noElements >= info+1)) { if (strip_offset >= span_set->strip_offset[span]) span_offset++; @@ -480,18 +490,18 @@ static u64 get_strip_from_row(struct megasas_instance *instance, continue; for (span = 0; span < raid->spanDepth; span++) - if (map->raidMap.ldSpanMap[ld].spanBlock[span]. - block_span_info.noElements >= info+1) { + if (le32_to_cpu(map->raidMap.ldSpanMap[ld].spanBlock[span]. + block_span_info.noElements) >= info+1) { quad = &map->raidMap.ldSpanMap[ld]. spanBlock[span].block_span_info.quad[info]; - if (quad->logStart <= row && - row <= quad->logEnd && - mega_mod64((row - quad->logStart), - quad->diff) == 0) { + if (le64_to_cpu(quad->logStart) <= row && + row <= le64_to_cpu(quad->logEnd) && + mega_mod64((row - le64_to_cpu(quad->logStart)), + le32_to_cpu(quad->diff)) == 0) { strip = mega_div64_32 (((row - span_set->data_row_start) - - quad->logStart), - quad->diff); + - le64_to_cpu(quad->logStart)), + le32_to_cpu(quad->diff)); strip *= span_set->span_row_data_width; strip += span_set->data_strip_start; strip += span_set->strip_offset[span]; @@ -543,8 +553,8 @@ static u32 get_arm_from_strip(struct megasas_instance *instance, span_set->span_row_data_width); for (span = 0, span_offset = 0; span < raid->spanDepth; span++) - if (map->raidMap.ldSpanMap[ld].spanBlock[span]. - block_span_info.noElements >= info+1) { + if (le32_to_cpu(map->raidMap.ldSpanMap[ld].spanBlock[span]. + block_span_info.noElements) >= info+1) { if (strip_offset >= span_set->strip_offset[span]) span_offset = @@ -669,7 +679,7 @@ static u8 mr_spanset_get_phy_params(struct megasas_instance *instance, u32 ld, } } - *pdBlock += stripRef + MR_LdSpanPtrGet(ld, span, map)->startBlk; + *pdBlock += stripRef + le64_to_cpu(MR_LdSpanPtrGet(ld, span, map)->startBlk); pRAID_Context->spanArm = (span << RAID_CTX_SPANARM_SPAN_SHIFT) | physArm; return retval; @@ -765,7 +775,7 @@ u8 MR_GetPhyParams(struct megasas_instance *instance, u32 ld, u64 stripRow, } } - *pdBlock += stripRef + MR_LdSpanPtrGet(ld, span, map)->startBlk; + *pdBlock += stripRef + le64_to_cpu(MR_LdSpanPtrGet(ld, span, map)->startBlk); pRAID_Context->spanArm = (span << RAID_CTX_SPANARM_SPAN_SHIFT) | physArm; return retval; @@ -784,7 +794,7 @@ u8 MR_BuildRaidContext(struct megasas_instance *instance, struct IO_REQUEST_INFO *io_info, struct RAID_CONTEXT *pRAID_Context, - struct MR_FW_RAID_MAP_ALL *map) + struct MR_FW_RAID_MAP_ALL *map, u8 **raidLUN) { struct MR_LD_RAID *raid; u32 ld, stripSize, stripe_mask; @@ -965,7 +975,7 @@ MR_BuildRaidContext(struct megasas_instance *instance, regSize += stripSize; } - pRAID_Context->timeoutValue = map->raidMap.fpPdIoTimeoutSec; + pRAID_Context->timeoutValue = cpu_to_le16(map->raidMap.fpPdIoTimeoutSec); if ((instance->pdev->device == PCI_DEVICE_ID_LSI_INVADER) || (instance->pdev->device == PCI_DEVICE_ID_LSI_FURY)) pRAID_Context->regLockFlags = (isRead) ? @@ -974,9 +984,12 @@ MR_BuildRaidContext(struct megasas_instance *instance, pRAID_Context->regLockFlags = (isRead) ? REGION_TYPE_SHARED_READ : raid->regTypeReqOnWrite; pRAID_Context->VirtualDiskTgtId = raid->targetId; - pRAID_Context->regLockRowLBA = regStart; - pRAID_Context->regLockLength = regSize; + pRAID_Context->regLockRowLBA = cpu_to_le64(regStart); + pRAID_Context->regLockLength = cpu_to_le32(regSize); pRAID_Context->configSeqNum = raid->seqNum; + /* save pointer to raid->LUN array */ + *raidLUN = raid->LUN; + /*Get Phy Params only if FP capable, or else leave it to MR firmware to do the calculation.*/ @@ -1047,8 +1060,8 @@ void mr_update_span_set(struct MR_FW_RAID_MAP_ALL *map, raid = MR_LdRaidGet(ld, map); for (element = 0; element < MAX_QUAD_DEPTH; element++) { for (span = 0; span < raid->spanDepth; span++) { - if (map->raidMap.ldSpanMap[ld].spanBlock[span]. - block_span_info.noElements < + if (le32_to_cpu(map->raidMap.ldSpanMap[ld].spanBlock[span]. + block_span_info.noElements) < element + 1) continue; span_set = &(ldSpanInfo[ld].span_set[element]); @@ -1056,14 +1069,14 @@ void mr_update_span_set(struct MR_FW_RAID_MAP_ALL *map, spanBlock[span].block_span_info. quad[element]; - span_set->diff = quad->diff; + span_set->diff = le32_to_cpu(quad->diff); for (count = 0, span_row_width = 0; count < raid->spanDepth; count++) { - if (map->raidMap.ldSpanMap[ld]. + if (le32_to_cpu(map->raidMap.ldSpanMap[ld]. spanBlock[count]. block_span_info. - noElements >= element + 1) { + noElements) >= element + 1) { span_set->strip_offset[count] = span_row_width; span_row_width += @@ -1077,9 +1090,9 @@ void mr_update_span_set(struct MR_FW_RAID_MAP_ALL *map, } span_set->span_row_data_width = span_row_width; - span_row = mega_div64_32(((quad->logEnd - - quad->logStart) + quad->diff), - quad->diff); + span_row = mega_div64_32(((le64_to_cpu(quad->logEnd) - + le64_to_cpu(quad->logStart)) + le32_to_cpu(quad->diff)), + le32_to_cpu(quad->diff)); if (element == 0) { span_set->log_start_lba = 0; @@ -1096,7 +1109,7 @@ void mr_update_span_set(struct MR_FW_RAID_MAP_ALL *map, span_set->data_row_start = 0; span_set->data_row_end = - (span_row * quad->diff) - 1; + (span_row * le32_to_cpu(quad->diff)) - 1; } else { span_set_prev = &(ldSpanInfo[ld]. span_set[element - 1]); @@ -1122,7 +1135,7 @@ void mr_update_span_set(struct MR_FW_RAID_MAP_ALL *map, span_set_prev->data_row_end + 1; span_set->data_row_end = span_set->data_row_start + - (span_row * quad->diff) - 1; + (span_row * le32_to_cpu(quad->diff)) - 1; } break; } diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 417d5f167aa2..f6555921fd7a 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -72,17 +72,6 @@ megasas_clear_intr_fusion(struct megasas_register_set __iomem *regs); int megasas_issue_polled(struct megasas_instance *instance, struct megasas_cmd *cmd); - -u8 -MR_BuildRaidContext(struct megasas_instance *instance, - struct IO_REQUEST_INFO *io_info, - struct RAID_CONTEXT *pRAID_Context, - struct MR_FW_RAID_MAP_ALL *map); -u16 MR_TargetIdToLdGet(u32 ldTgtId, struct MR_FW_RAID_MAP_ALL *map); -struct MR_LD_RAID *MR_LdRaidGet(u32 ld, struct MR_FW_RAID_MAP_ALL *map); - -u16 MR_GetLDTgtId(u32 ld, struct MR_FW_RAID_MAP_ALL *map); - void megasas_check_and_restore_queue_depth(struct megasas_instance *instance); @@ -626,23 +615,20 @@ megasas_ioc_init_fusion(struct megasas_instance *instance) IOCInitMessage->Function = MPI2_FUNCTION_IOC_INIT; IOCInitMessage->WhoInit = MPI2_WHOINIT_HOST_DRIVER; - IOCInitMessage->MsgVersion = MPI2_VERSION; - IOCInitMessage->HeaderVersion = MPI2_HEADER_VERSION; - IOCInitMessage->SystemRequestFrameSize = - MEGA_MPI2_RAID_DEFAULT_IO_FRAME_SIZE / 4; - - IOCInitMessage->ReplyDescriptorPostQueueDepth = fusion->reply_q_depth; - IOCInitMessage->ReplyDescriptorPostQueueAddress = - fusion->reply_frames_desc_phys; - IOCInitMessage->SystemRequestFrameBaseAddress = - fusion->io_request_frames_phys; + IOCInitMessage->MsgVersion = cpu_to_le16(MPI2_VERSION); + IOCInitMessage->HeaderVersion = cpu_to_le16(MPI2_HEADER_VERSION); + IOCInitMessage->SystemRequestFrameSize = cpu_to_le16(MEGA_MPI2_RAID_DEFAULT_IO_FRAME_SIZE / 4); + + IOCInitMessage->ReplyDescriptorPostQueueDepth = cpu_to_le16(fusion->reply_q_depth); + IOCInitMessage->ReplyDescriptorPostQueueAddress = cpu_to_le64(fusion->reply_frames_desc_phys); + IOCInitMessage->SystemRequestFrameBaseAddress = cpu_to_le64(fusion->io_request_frames_phys); IOCInitMessage->HostMSIxVectors = instance->msix_vectors; init_frame = (struct megasas_init_frame *)cmd->frame; memset(init_frame, 0, MEGAMFI_FRAME_SIZE); frame_hdr = &cmd->frame->hdr; frame_hdr->cmd_status = 0xFF; - frame_hdr->flags |= MFI_FRAME_DONT_POST_IN_REPLY_QUEUE; + frame_hdr->flags |= cpu_to_le16(MFI_FRAME_DONT_POST_IN_REPLY_QUEUE); init_frame->cmd = MFI_CMD_INIT; init_frame->cmd_status = 0xFF; @@ -652,17 +638,24 @@ megasas_ioc_init_fusion(struct megasas_instance *instance) (instance->pdev->device == PCI_DEVICE_ID_LSI_FURY)) init_frame->driver_operations. mfi_capabilities.support_additional_msix = 1; + /* driver supports HA / Remote LUN over Fast Path interface */ + init_frame->driver_operations.mfi_capabilities.support_fp_remote_lun + = 1; + /* Convert capability to LE32 */ + cpu_to_le32s((u32 *)&init_frame->driver_operations.mfi_capabilities); - init_frame->queue_info_new_phys_addr_lo = ioc_init_handle; - init_frame->data_xfer_len = sizeof(struct MPI2_IOC_INIT_REQUEST); + init_frame->queue_info_new_phys_addr_lo = cpu_to_le32((u32)ioc_init_handle); + init_frame->data_xfer_len = cpu_to_le32(sizeof(struct MPI2_IOC_INIT_REQUEST)); req_desc = (union MEGASAS_REQUEST_DESCRIPTOR_UNION *)fusion->req_frames_desc; - req_desc->Words = cmd->frame_phys_addr; + req_desc->Words = 0; req_desc->MFAIo.RequestFlags = (MEGASAS_REQ_DESCRIPT_FLAGS_MFA << MEGASAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT); + cpu_to_le32s((u32 *)&req_desc->MFAIo); + req_desc->Words |= cpu_to_le64(cmd->frame_phys_addr); /* * disable the intr before firing the init frame @@ -753,13 +746,13 @@ megasas_get_ld_map_info(struct megasas_instance *instance) dcmd->cmd = MFI_CMD_DCMD; dcmd->cmd_status = 0xFF; dcmd->sge_count = 1; - dcmd->flags = MFI_FRAME_DIR_READ; + dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_READ); dcmd->timeout = 0; dcmd->pad_0 = 0; - dcmd->data_xfer_len = size_map_info; - dcmd->opcode = MR_DCMD_LD_MAP_GET_INFO; - dcmd->sgl.sge32[0].phys_addr = ci_h; - dcmd->sgl.sge32[0].length = size_map_info; + dcmd->data_xfer_len = cpu_to_le32(size_map_info); + dcmd->opcode = cpu_to_le32(MR_DCMD_LD_MAP_GET_INFO); + dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(ci_h); + dcmd->sgl.sge32[0].length = cpu_to_le32(size_map_info); if (!megasas_issue_polled(instance, cmd)) ret = 0; @@ -828,7 +821,7 @@ megasas_sync_map_info(struct megasas_instance *instance) map = fusion->ld_map[instance->map_id & 1]; - num_lds = map->raidMap.ldCount; + num_lds = le32_to_cpu(map->raidMap.ldCount); dcmd = &cmd->frame->dcmd; @@ -856,15 +849,15 @@ megasas_sync_map_info(struct megasas_instance *instance) dcmd->cmd = MFI_CMD_DCMD; dcmd->cmd_status = 0xFF; dcmd->sge_count = 1; - dcmd->flags = MFI_FRAME_DIR_WRITE; + dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_WRITE); dcmd->timeout = 0; dcmd->pad_0 = 0; - dcmd->data_xfer_len = size_map_info; + dcmd->data_xfer_len = cpu_to_le32(size_map_info); dcmd->mbox.b[0] = num_lds; dcmd->mbox.b[1] = MEGASAS_DCMD_MBOX_PEND_FLAG; - dcmd->opcode = MR_DCMD_LD_MAP_GET_INFO; - dcmd->sgl.sge32[0].phys_addr = ci_h; - dcmd->sgl.sge32[0].length = size_map_info; + dcmd->opcode = cpu_to_le32(MR_DCMD_LD_MAP_GET_INFO); + dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(ci_h); + dcmd->sgl.sge32[0].length = cpu_to_le32(size_map_info); instance->map_update_cmd = cmd; @@ -1067,9 +1060,8 @@ megasas_fire_cmd_fusion(struct megasas_instance *instance, spin_lock_irqsave(&instance->hba_lock, flags); - writel(req_desc_lo, - &(regs)->inbound_low_queue_port); - writel(req_desc_hi, &(regs)->inbound_high_queue_port); + writel(le32_to_cpu(req_desc_lo), &(regs)->inbound_low_queue_port); + writel(le32_to_cpu(req_desc_hi), &(regs)->inbound_high_queue_port); spin_unlock_irqrestore(&instance->hba_lock, flags); } @@ -1157,8 +1149,8 @@ megasas_make_sgl_fusion(struct megasas_instance *instance, return sge_count; scsi_for_each_sg(scp, os_sgl, sge_count, i) { - sgl_ptr->Length = sg_dma_len(os_sgl); - sgl_ptr->Address = sg_dma_address(os_sgl); + sgl_ptr->Length = cpu_to_le32(sg_dma_len(os_sgl)); + sgl_ptr->Address = cpu_to_le64(sg_dma_address(os_sgl)); sgl_ptr->Flags = 0; if ((instance->pdev->device == PCI_DEVICE_ID_LSI_INVADER) || (instance->pdev->device == PCI_DEVICE_ID_LSI_FURY)) { @@ -1177,9 +1169,9 @@ megasas_make_sgl_fusion(struct megasas_instance *instance, PCI_DEVICE_ID_LSI_INVADER) || (instance->pdev->device == PCI_DEVICE_ID_LSI_FURY)) { - if ((cmd->io_request->IoFlags & - MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH) != - MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH) + if ((le16_to_cpu(cmd->io_request->IoFlags) & + MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH) != + MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH) cmd->io_request->ChainOffset = fusion-> chain_offset_io_request; @@ -1201,9 +1193,8 @@ megasas_make_sgl_fusion(struct megasas_instance *instance, sg_chain->Flags = (IEEE_SGE_FLAGS_CHAIN_ELEMENT | MPI2_IEEE_SGE_FLAGS_IOCPLBNTA_ADDR); - sg_chain->Length = (sizeof(union MPI2_SGE_IO_UNION) - *(sge_count - sg_processed)); - sg_chain->Address = cmd->sg_frame_phys_addr; + sg_chain->Length = cpu_to_le32((sizeof(union MPI2_SGE_IO_UNION) * (sge_count - sg_processed))); + sg_chain->Address = cpu_to_le64(cmd->sg_frame_phys_addr); sgl_ptr = (struct MPI25_IEEE_SGE_CHAIN64 *)cmd->sg_frame; @@ -1261,7 +1252,7 @@ megasas_set_pd_lba(struct MPI2_RAID_SCSI_IO_REQUEST *io_request, u8 cdb_len, io_request->CDB.EEDP32.PrimaryReferenceTag = cpu_to_be32(ref_tag); io_request->CDB.EEDP32.PrimaryApplicationTagMask = 0xffff; - io_request->IoFlags = 32; /* Specify 32-byte cdb */ + io_request->IoFlags = cpu_to_le16(32); /* Specify 32-byte cdb */ /* Transfer length */ cdb[28] = (u8)((num_blocks >> 24) & 0xff); @@ -1271,19 +1262,19 @@ megasas_set_pd_lba(struct MPI2_RAID_SCSI_IO_REQUEST *io_request, u8 cdb_len, /* set SCSI IO EEDPFlags */ if (scp->sc_data_direction == PCI_DMA_FROMDEVICE) { - io_request->EEDPFlags = + io_request->EEDPFlags = cpu_to_le16( MPI2_SCSIIO_EEDPFLAGS_INC_PRI_REFTAG | MPI2_SCSIIO_EEDPFLAGS_CHECK_REFTAG | MPI2_SCSIIO_EEDPFLAGS_CHECK_REMOVE_OP | MPI2_SCSIIO_EEDPFLAGS_CHECK_APPTAG | - MPI2_SCSIIO_EEDPFLAGS_CHECK_GUARD; + MPI2_SCSIIO_EEDPFLAGS_CHECK_GUARD); } else { - io_request->EEDPFlags = + io_request->EEDPFlags = cpu_to_le16( MPI2_SCSIIO_EEDPFLAGS_INC_PRI_REFTAG | - MPI2_SCSIIO_EEDPFLAGS_INSERT_OP; + MPI2_SCSIIO_EEDPFLAGS_INSERT_OP); } - io_request->Control |= (0x4 << 26); - io_request->EEDPBlockSize = scp->device->sector_size; + io_request->Control |= cpu_to_le32((0x4 << 26)); + io_request->EEDPBlockSize = cpu_to_le32(scp->device->sector_size); } else { /* Some drives don't support 16/12 byte CDB's, convert to 10 */ if (((cdb_len == 12) || (cdb_len == 16)) && @@ -1311,7 +1302,7 @@ megasas_set_pd_lba(struct MPI2_RAID_SCSI_IO_REQUEST *io_request, u8 cdb_len, cdb[8] = (u8)(num_blocks & 0xff); cdb[7] = (u8)((num_blocks >> 8) & 0xff); - io_request->IoFlags = 10; /* Specify 10-byte cdb */ + io_request->IoFlags = cpu_to_le16(10); /* Specify 10-byte cdb */ cdb_len = 10; } else if ((cdb_len < 16) && (start_blk > 0xffffffff)) { /* Convert to 16 byte CDB for large LBA's */ @@ -1349,7 +1340,7 @@ megasas_set_pd_lba(struct MPI2_RAID_SCSI_IO_REQUEST *io_request, u8 cdb_len, cdb[11] = (u8)((num_blocks >> 16) & 0xff); cdb[10] = (u8)((num_blocks >> 24) & 0xff); - io_request->IoFlags = 16; /* Specify 16-byte cdb */ + io_request->IoFlags = cpu_to_le16(16); /* Specify 16-byte cdb */ cdb_len = 16; } @@ -1410,13 +1401,14 @@ megasas_build_ldio_fusion(struct megasas_instance *instance, struct IO_REQUEST_INFO io_info; struct fusion_context *fusion; struct MR_FW_RAID_MAP_ALL *local_map_ptr; + u8 *raidLUN; device_id = MEGASAS_DEV_INDEX(instance, scp); fusion = instance->ctrl_context; io_request = cmd->io_request; - io_request->RaidContext.VirtualDiskTgtId = device_id; + io_request->RaidContext.VirtualDiskTgtId = cpu_to_le16(device_id); io_request->RaidContext.status = 0; io_request->RaidContext.exStatus = 0; @@ -1480,7 +1472,7 @@ megasas_build_ldio_fusion(struct megasas_instance *instance, io_info.ldStartBlock = ((u64)start_lba_hi << 32) | start_lba_lo; io_info.numBlocks = datalength; io_info.ldTgtId = device_id; - io_request->DataLength = scsi_bufflen(scp); + io_request->DataLength = cpu_to_le32(scsi_bufflen(scp)); if (scp->sc_data_direction == PCI_DMA_FROMDEVICE) io_info.isRead = 1; @@ -1494,7 +1486,7 @@ megasas_build_ldio_fusion(struct megasas_instance *instance, } else { if (MR_BuildRaidContext(instance, &io_info, &io_request->RaidContext, - local_map_ptr)) + local_map_ptr, &raidLUN)) fp_possible = io_info.fpOkForIo; } @@ -1520,8 +1512,7 @@ megasas_build_ldio_fusion(struct megasas_instance *instance, MEGASAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT); io_request->RaidContext.Type = MPI2_TYPE_CUDA; io_request->RaidContext.nseg = 0x1; - io_request->IoFlags |= - MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH; + io_request->IoFlags |= cpu_to_le16(MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH); io_request->RaidContext.regLockFlags |= (MR_RL_FLAGS_GRANT_DESTINATION_CUDA | MR_RL_FLAGS_SEQ_NUM_ENABLE); @@ -1537,9 +1528,11 @@ megasas_build_ldio_fusion(struct megasas_instance *instance, scp->SCp.Status &= ~MEGASAS_LOAD_BALANCE_FLAG; cmd->request_desc->SCSIIO.DevHandle = io_info.devHandle; io_request->DevHandle = io_info.devHandle; + /* populate the LUN field */ + memcpy(io_request->LUN, raidLUN, 8); } else { io_request->RaidContext.timeoutValue = - local_map_ptr->raidMap.fpPdIoTimeoutSec; + cpu_to_le16(local_map_ptr->raidMap.fpPdIoTimeoutSec); cmd->request_desc->SCSIIO.RequestFlags = (MEGASAS_REQ_DESCRIPT_FLAGS_LD_IO << MEGASAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT); @@ -1557,7 +1550,7 @@ megasas_build_ldio_fusion(struct megasas_instance *instance, io_request->RaidContext.nseg = 0x1; } io_request->Function = MEGASAS_MPI2_FUNCTION_LD_IO_REQUEST; - io_request->DevHandle = device_id; + io_request->DevHandle = cpu_to_le16(device_id); } /* Not FP */ } @@ -1579,6 +1572,11 @@ megasas_build_dcdb_fusion(struct megasas_instance *instance, u16 pd_index = 0; struct MR_FW_RAID_MAP_ALL *local_map_ptr; struct fusion_context *fusion = instance->ctrl_context; + u8 span, physArm; + u16 devHandle; + u32 ld, arRef, pd; + struct MR_LD_RAID *raid; + struct RAID_CONTEXT *pRAID_Context; io_request = cmd->io_request; device_id = MEGASAS_DEV_INDEX(instance, scmd); @@ -1586,6 +1584,9 @@ megasas_build_dcdb_fusion(struct megasas_instance *instance, +scmd->device->id; local_map_ptr = fusion->ld_map[(instance->map_id & 1)]; + io_request->DataLength = cpu_to_le32(scsi_bufflen(scmd)); + + /* Check if this is a system PD I/O */ if (scmd->device->channel < MEGASAS_MAX_PD_CHANNELS && instance->pd_list[pd_index].driveState == MR_PD_STATE_SYSTEM) { @@ -1623,15 +1624,62 @@ megasas_build_dcdb_fusion(struct megasas_instance *instance, scmd->request->timeout / HZ; } } else { + if (scmd->device->channel < MEGASAS_MAX_PD_CHANNELS) + goto NonFastPath; + + ld = MR_TargetIdToLdGet(device_id, local_map_ptr); + if ((ld >= MAX_LOGICAL_DRIVES) || (!fusion->fast_path_io)) + goto NonFastPath; + + raid = MR_LdRaidGet(ld, local_map_ptr); + + /* check if this LD is FP capable */ + if (!(raid->capability.fpNonRWCapable)) + /* not FP capable, send as non-FP */ + goto NonFastPath; + + /* get RAID_Context pointer */ + pRAID_Context = &io_request->RaidContext; + + /* set RAID context values */ + pRAID_Context->regLockFlags = REGION_TYPE_SHARED_READ; + pRAID_Context->timeoutValue = raid->fpIoTimeoutForLd; + pRAID_Context->VirtualDiskTgtId = cpu_to_le16(device_id); + pRAID_Context->regLockRowLBA = 0; + pRAID_Context->regLockLength = 0; + pRAID_Context->configSeqNum = raid->seqNum; + + /* get the DevHandle for the PD (since this is + fpNonRWCapable, this is a single disk RAID0) */ + span = physArm = 0; + arRef = MR_LdSpanArrayGet(ld, span, local_map_ptr); + pd = MR_ArPdGet(arRef, physArm, local_map_ptr); + devHandle = MR_PdDevHandleGet(pd, local_map_ptr); + + /* build request descriptor */ + cmd->request_desc->SCSIIO.RequestFlags = + (MPI2_REQ_DESCRIPT_FLAGS_HIGH_PRIORITY << + MEGASAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT); + cmd->request_desc->SCSIIO.DevHandle = devHandle; + + /* populate the LUN field */ + memcpy(io_request->LUN, raid->LUN, 8); + + /* build the raidScsiIO structure */ + io_request->Function = MPI2_FUNCTION_SCSI_IO_REQUEST; + io_request->DevHandle = devHandle; + + return; + +NonFastPath: io_request->Function = MEGASAS_MPI2_FUNCTION_LD_IO_REQUEST; - io_request->DevHandle = device_id; + io_request->DevHandle = cpu_to_le16(device_id); cmd->request_desc->SCSIIO.RequestFlags = (MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO << MEGASAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT); } - io_request->RaidContext.VirtualDiskTgtId = device_id; + io_request->RaidContext.VirtualDiskTgtId = cpu_to_le16(device_id); io_request->LUN[1] = scmd->device->lun; - io_request->DataLength = scsi_bufflen(scmd); } /** @@ -1670,7 +1718,7 @@ megasas_build_io_fusion(struct megasas_instance *instance, * Just the CDB length,rest of the Flags are zero * This will be modified for FP in build_ldio_fusion */ - io_request->IoFlags = scp->cmd_len; + io_request->IoFlags = cpu_to_le16(scp->cmd_len); if (megasas_is_ldio(scp)) megasas_build_ldio_fusion(instance, scp, cmd); @@ -1695,17 +1743,17 @@ megasas_build_io_fusion(struct megasas_instance *instance, io_request->RaidContext.numSGE = sge_count; - io_request->SGLFlags = MPI2_SGE_FLAGS_64_BIT_ADDRESSING; + io_request->SGLFlags = cpu_to_le16(MPI2_SGE_FLAGS_64_BIT_ADDRESSING); if (scp->sc_data_direction == PCI_DMA_TODEVICE) - io_request->Control |= MPI2_SCSIIO_CONTROL_WRITE; + io_request->Control |= cpu_to_le32(MPI2_SCSIIO_CONTROL_WRITE); else if (scp->sc_data_direction == PCI_DMA_FROMDEVICE) - io_request->Control |= MPI2_SCSIIO_CONTROL_READ; + io_request->Control |= cpu_to_le32(MPI2_SCSIIO_CONTROL_READ); io_request->SGLOffset0 = offsetof(struct MPI2_RAID_SCSI_IO_REQUEST, SGL) / 4; - io_request->SenseBufferLowAddress = cmd->sense_phys_addr; + io_request->SenseBufferLowAddress = cpu_to_le32(cmd->sense_phys_addr); io_request->SenseBufferLength = SCSI_SENSE_BUFFERSIZE; cmd->scmd = scp; @@ -1770,7 +1818,7 @@ megasas_build_and_issue_cmd_fusion(struct megasas_instance *instance, } req_desc = cmd->request_desc; - req_desc->SCSIIO.SMID = index; + req_desc->SCSIIO.SMID = cpu_to_le16(index); if (cmd->io_request->ChainOffset != 0 && cmd->io_request->ChainOffset != 0xF) @@ -1832,7 +1880,7 @@ complete_cmd_fusion(struct megasas_instance *instance, u32 MSIxIndex) num_completed = 0; while ((d_val.u.low != UINT_MAX) && (d_val.u.high != UINT_MAX)) { - smid = reply_desc->SMID; + smid = le16_to_cpu(reply_desc->SMID); cmd_fusion = fusion->cmd_list[smid - 1]; @@ -2050,12 +2098,12 @@ build_mpt_mfi_pass_thru(struct megasas_instance *instance, SGL) / 4; io_req->ChainOffset = fusion->chain_offset_mfi_pthru; - mpi25_ieee_chain->Address = mfi_cmd->frame_phys_addr; + mpi25_ieee_chain->Address = cpu_to_le64(mfi_cmd->frame_phys_addr); mpi25_ieee_chain->Flags = IEEE_SGE_FLAGS_CHAIN_ELEMENT | MPI2_IEEE_SGE_FLAGS_IOCPLBNTA_ADDR; - mpi25_ieee_chain->Length = MEGASAS_MAX_SZ_CHAIN_FRAME; + mpi25_ieee_chain->Length = cpu_to_le32(MEGASAS_MAX_SZ_CHAIN_FRAME); return 0; } @@ -2088,7 +2136,7 @@ build_mpt_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd) req_desc->SCSIIO.RequestFlags = (MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO << MEGASAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT); - req_desc->SCSIIO.SMID = index; + req_desc->SCSIIO.SMID = cpu_to_le16(index); return req_desc; } diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.h b/drivers/scsi/megaraid/megaraid_sas_fusion.h index 4eb84011cb07..35a51397b364 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.h +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.h @@ -93,8 +93,13 @@ enum MR_RAID_FLAGS_IO_SUB_TYPE { */ struct RAID_CONTEXT { +#if defined(__BIG_ENDIAN_BITFIELD) + u8 nseg:4; + u8 Type:4; +#else u8 Type:4; u8 nseg:4; +#endif u8 resvd0; u16 timeoutValue; u8 regLockFlags; @@ -298,8 +303,13 @@ struct MPI2_RAID_SCSI_IO_REQUEST { * MPT RAID MFA IO Descriptor. */ struct MEGASAS_RAID_MFA_IO_REQUEST_DESCRIPTOR { +#if defined(__BIG_ENDIAN_BITFIELD) + u32 MessageAddress1:24; /* bits 31:8*/ + u32 RequestFlags:8; +#else u32 RequestFlags:8; u32 MessageAddress1:24; /* bits 31:8*/ +#endif u32 MessageAddress2; /* bits 61:32 */ }; @@ -518,6 +528,19 @@ struct MR_SPAN_BLOCK_INFO { struct MR_LD_RAID { struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u32 reserved4:7; + u32 fpNonRWCapable:1; + u32 fpReadAcrossStripe:1; + u32 fpWriteAcrossStripe:1; + u32 fpReadCapable:1; + u32 fpWriteCapable:1; + u32 encryptionType:8; + u32 pdPiMode:4; + u32 ldPiMode:4; + u32 reserved5:3; + u32 fpCapable:1; +#else u32 fpCapable:1; u32 reserved5:3; u32 ldPiMode:4; @@ -527,7 +550,9 @@ struct MR_LD_RAID { u32 fpReadCapable:1; u32 fpWriteAcrossStripe:1; u32 fpReadAcrossStripe:1; - u32 reserved4:8; + u32 fpNonRWCapable:1; + u32 reserved4:7; +#endif } capability; u32 reserved6; u64 size; @@ -551,7 +576,9 @@ struct MR_LD_RAID { u32 reserved:31; } flags; - u8 reserved3[0x5C]; + u8 LUN[8]; /* 0x24 8 byte LUN field used for SCSI IO's */ + u8 fpIoTimeoutForLd;/*0x2C timeout value used by driver in FP IO*/ + u8 reserved3[0x80-0x2D]; /* 0x2D */ }; struct MR_LD_SPAN_MAP { diff --git a/drivers/scsi/mpt3sas/Makefile b/drivers/scsi/mpt3sas/Makefile index 4c1d2e7a1176..efb0c4c2e310 100644 --- a/drivers/scsi/mpt3sas/Makefile +++ b/drivers/scsi/mpt3sas/Makefile @@ -1,5 +1,5 @@ # mpt3sas makefile -obj-m += mpt3sas.o +obj-$(CONFIG_SCSI_MPT3SAS) += mpt3sas.o mpt3sas-y += mpt3sas_base.o \ mpt3sas_config.o \ mpt3sas_scsih.o \ diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index ff12d4677cc4..596480022b0a 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -10,7 +10,7 @@ * * Forward port and refactoring to modern qla2xxx and target/configfs * - * Copyright (C) 2010-2011 Nicholas A. Bellinger <nab@kernel.org> + * Copyright (C) 2010-2013 Nicholas A. Bellinger <nab@kernel.org> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index a6da313e253b..f85b9e5c1f05 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -2,12 +2,9 @@ * This file contains tcm implementation using v4 configfs fabric infrastructure * for QLogic target mode HBAs * - * ?? Copyright 2010-2011 RisingTide Systems LLC. + * (c) Copyright 2010-2013 Datera, Inc. * - * Licensed to the Linux Foundation under the General Public License (GPL) - * version 2. - * - * Author: Nicholas A. Bellinger <nab@risingtidesystems.com> + * Author: Nicholas A. Bellinger <nab@daterainc.com> * * tcm_qla2xxx_parse_wwn() and tcm_qla2xxx_format_wwn() contains code from * the TCM_FC / Open-FCoE.org fabric module. @@ -360,6 +357,14 @@ static int tcm_qla2xxx_check_prod_write_protect(struct se_portal_group *se_tpg) return QLA_TPG_ATTRIB(tpg)->prod_mode_write_protect; } +static int tcm_qla2xxx_check_demo_mode_login_only(struct se_portal_group *se_tpg) +{ + struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, + struct tcm_qla2xxx_tpg, se_tpg); + + return QLA_TPG_ATTRIB(tpg)->demo_mode_login_only; +} + static struct se_node_acl *tcm_qla2xxx_alloc_fabric_acl( struct se_portal_group *se_tpg) { @@ -489,38 +494,13 @@ static u32 tcm_qla2xxx_sess_get_index(struct se_session *se_sess) return 0; } -/* - * The LIO target core uses DMA_TO_DEVICE to mean that data is going - * to the target (eg handling a WRITE) and DMA_FROM_DEVICE to mean - * that data is coming from the target (eg handling a READ). However, - * this is just the opposite of what we have to tell the DMA mapping - * layer -- eg when handling a READ, the HBA will have to DMA the data - * out of memory so it can send it to the initiator, which means we - * need to use DMA_TO_DEVICE when we map the data. - */ -static enum dma_data_direction tcm_qla2xxx_mapping_dir(struct se_cmd *se_cmd) -{ - if (se_cmd->se_cmd_flags & SCF_BIDI) - return DMA_BIDIRECTIONAL; - - switch (se_cmd->data_direction) { - case DMA_TO_DEVICE: - return DMA_FROM_DEVICE; - case DMA_FROM_DEVICE: - return DMA_TO_DEVICE; - case DMA_NONE: - default: - return DMA_NONE; - } -} - static int tcm_qla2xxx_write_pending(struct se_cmd *se_cmd) { struct qla_tgt_cmd *cmd = container_of(se_cmd, struct qla_tgt_cmd, se_cmd); cmd->bufflen = se_cmd->data_length; - cmd->dma_data_direction = tcm_qla2xxx_mapping_dir(se_cmd); + cmd->dma_data_direction = target_reverse_dma_direction(se_cmd); cmd->sg_cnt = se_cmd->t_data_nents; cmd->sg = se_cmd->t_data_sg; @@ -656,7 +636,7 @@ static int tcm_qla2xxx_queue_data_in(struct se_cmd *se_cmd) struct qla_tgt_cmd, se_cmd); cmd->bufflen = se_cmd->data_length; - cmd->dma_data_direction = tcm_qla2xxx_mapping_dir(se_cmd); + cmd->dma_data_direction = target_reverse_dma_direction(se_cmd); cmd->aborted = (se_cmd->transport_state & CMD_T_ABORTED); cmd->sg_cnt = se_cmd->t_data_nents; @@ -680,7 +660,7 @@ static int tcm_qla2xxx_queue_status(struct se_cmd *se_cmd) cmd->sg = NULL; cmd->sg_cnt = 0; cmd->offset = 0; - cmd->dma_data_direction = tcm_qla2xxx_mapping_dir(se_cmd); + cmd->dma_data_direction = target_reverse_dma_direction(se_cmd); cmd->aborted = (se_cmd->transport_state & CMD_T_ABORTED); if (se_cmd->data_direction == DMA_FROM_DEVICE) { @@ -939,11 +919,19 @@ DEF_QLA_TPG_ATTR_BOOL(prod_mode_write_protect); DEF_QLA_TPG_ATTRIB(prod_mode_write_protect); QLA_TPG_ATTR(prod_mode_write_protect, S_IRUGO | S_IWUSR); +/* + * Define tcm_qla2xxx_tpg_attrib_s_demo_mode_login_only + */ +DEF_QLA_TPG_ATTR_BOOL(demo_mode_login_only); +DEF_QLA_TPG_ATTRIB(demo_mode_login_only); +QLA_TPG_ATTR(demo_mode_login_only, S_IRUGO | S_IWUSR); + static struct configfs_attribute *tcm_qla2xxx_tpg_attrib_attrs[] = { &tcm_qla2xxx_tpg_attrib_generate_node_acls.attr, &tcm_qla2xxx_tpg_attrib_cache_dynamic_acls.attr, &tcm_qla2xxx_tpg_attrib_demo_mode_write_protect.attr, &tcm_qla2xxx_tpg_attrib_prod_mode_write_protect.attr, + &tcm_qla2xxx_tpg_attrib_demo_mode_login_only.attr, NULL, }; @@ -1042,6 +1030,7 @@ static struct se_portal_group *tcm_qla2xxx_make_tpg( QLA_TPG_ATTRIB(tpg)->generate_node_acls = 1; QLA_TPG_ATTRIB(tpg)->demo_mode_write_protect = 1; QLA_TPG_ATTRIB(tpg)->cache_dynamic_acls = 1; + QLA_TPG_ATTRIB(tpg)->demo_mode_login_only = 1; ret = core_tpg_register(&tcm_qla2xxx_fabric_configfs->tf_ops, wwn, &tpg->se_tpg, tpg, TRANSPORT_TPG_TYPE_NORMAL); @@ -1736,7 +1725,7 @@ static struct target_core_fabric_ops tcm_qla2xxx_ops = { tcm_qla2xxx_check_demo_write_protect, .tpg_check_prod_mode_write_protect = tcm_qla2xxx_check_prod_write_protect, - .tpg_check_demo_mode_login_only = tcm_qla2xxx_check_true, + .tpg_check_demo_mode_login_only = tcm_qla2xxx_check_demo_mode_login_only, .tpg_alloc_fabric_acl = tcm_qla2xxx_alloc_fabric_acl, .tpg_release_fabric_acl = tcm_qla2xxx_release_fabric_acl, .tpg_get_inst_index = tcm_qla2xxx_tpg_get_inst_index, @@ -1784,7 +1773,7 @@ static struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = { .tpg_check_demo_mode_cache = tcm_qla2xxx_check_true, .tpg_check_demo_mode_write_protect = tcm_qla2xxx_check_true, .tpg_check_prod_mode_write_protect = tcm_qla2xxx_check_false, - .tpg_check_demo_mode_login_only = tcm_qla2xxx_check_true, + .tpg_check_demo_mode_login_only = tcm_qla2xxx_check_demo_mode_login_only, .tpg_alloc_fabric_acl = tcm_qla2xxx_alloc_fabric_acl, .tpg_release_fabric_acl = tcm_qla2xxx_release_fabric_acl, .tpg_get_inst_index = tcm_qla2xxx_tpg_get_inst_index, diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.h b/drivers/scsi/qla2xxx/tcm_qla2xxx.h index 9ba075fe9781..329327528a55 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.h +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.h @@ -29,6 +29,7 @@ struct tcm_qla2xxx_tpg_attrib { int cache_dynamic_acls; int demo_mode_write_protect; int prod_mode_write_protect; + int demo_mode_login_only; }; struct tcm_qla2xxx_tpg { diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index b58e8f815a00..e62d17d41d4e 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2420,14 +2420,9 @@ sd_read_cache_type(struct scsi_disk *sdkp, unsigned char *buffer) } } - if (modepage == 0x3F) { - sd_printk(KERN_ERR, sdkp, "No Caching mode page " - "present\n"); - goto defaults; - } else if ((buffer[offset] & 0x3f) != modepage) { - sd_printk(KERN_ERR, sdkp, "Got wrong page\n"); - goto defaults; - } + sd_printk(KERN_ERR, sdkp, "No Caching mode page found\n"); + goto defaults; + Page_found: if (modepage == 8) { sdkp->WCE = ((buffer[offset + 2] & 0x04) != 0); diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h index bce09a6898c4..721050090520 100644 --- a/drivers/scsi/ufs/ufs.h +++ b/drivers/scsi/ufs/ufs.h @@ -177,6 +177,7 @@ enum { MASK_TASK_RESPONSE = 0xFF00, MASK_RSP_UPIU_RESULT = 0xFFFF, MASK_QUERY_DATA_SEG_LEN = 0xFFFF, + MASK_RSP_UPIU_DATA_SEG_LEN = 0xFFFF, MASK_RSP_EXCEPTION_EVENT = 0x10000, }; diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index b36ca9a2dfbb..04884d663e4e 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -36,9 +36,11 @@ #include <linux/async.h> #include "ufshcd.h" +#include "unipro.h" #define UFSHCD_ENABLE_INTRS (UTP_TRANSFER_REQ_COMPL |\ UTP_TASK_REQ_COMPL |\ + UIC_POWER_MODE |\ UFSHCD_ERROR_MASK) /* UIC command timeout, unit: ms */ #define UIC_CMD_TIMEOUT 500 @@ -56,6 +58,9 @@ /* Expose the flag value from utp_upiu_query.value */ #define MASK_QUERY_UPIU_FLAG_LOC 0xFF +/* Interrupt aggregation default timeout, unit: 40us */ +#define INT_AGGR_DEF_TO 0x02 + enum { UFSHCD_MAX_CHANNEL = 0, UFSHCD_MAX_ID = 1, @@ -78,12 +83,6 @@ enum { UFSHCD_INT_CLEAR, }; -/* Interrupt aggregation options */ -enum { - INT_AGGR_RESET, - INT_AGGR_CONFIG, -}; - /* * ufshcd_wait_for_register - wait for register value to change * @hba - per-adapter interface @@ -238,6 +237,18 @@ static inline int ufshcd_get_uic_cmd_result(struct ufs_hba *hba) } /** + * ufshcd_get_dme_attr_val - Get the value of attribute returned by UIC command + * @hba: Pointer to adapter instance + * + * This function gets UIC command argument3 + * Returns 0 on success, non zero value on error + */ +static inline u32 ufshcd_get_dme_attr_val(struct ufs_hba *hba) +{ + return ufshcd_readl(hba, REG_UIC_COMMAND_ARG_3); +} + +/** * ufshcd_get_req_rsp - returns the TR response transaction type * @ucd_rsp_ptr: pointer to response UPIU */ @@ -260,6 +271,20 @@ ufshcd_get_rsp_upiu_result(struct utp_upiu_rsp *ucd_rsp_ptr) return be32_to_cpu(ucd_rsp_ptr->header.dword_1) & MASK_RSP_UPIU_RESULT; } +/* + * ufshcd_get_rsp_upiu_data_seg_len - Get the data segment length + * from response UPIU + * @ucd_rsp_ptr: pointer to response UPIU + * + * Return the data segment length. + */ +static inline unsigned int +ufshcd_get_rsp_upiu_data_seg_len(struct utp_upiu_rsp *ucd_rsp_ptr) +{ + return be32_to_cpu(ucd_rsp_ptr->header.dword_2) & + MASK_RSP_UPIU_DATA_SEG_LEN; +} + /** * ufshcd_is_exception_event - Check if the device raised an exception event * @ucd_rsp_ptr: pointer to response UPIU @@ -276,30 +301,30 @@ static inline bool ufshcd_is_exception_event(struct utp_upiu_rsp *ucd_rsp_ptr) } /** - * ufshcd_config_int_aggr - Configure interrupt aggregation values. - * Currently there is no use case where we want to configure - * interrupt aggregation dynamically. So to configure interrupt - * aggregation, #define INT_AGGR_COUNTER_THRESHOLD_VALUE and - * INT_AGGR_TIMEOUT_VALUE are used. + * ufshcd_reset_intr_aggr - Reset interrupt aggregation values. * @hba: per adapter instance - * @option: Interrupt aggregation option */ static inline void -ufshcd_config_int_aggr(struct ufs_hba *hba, int option) +ufshcd_reset_intr_aggr(struct ufs_hba *hba) { - switch (option) { - case INT_AGGR_RESET: - ufshcd_writel(hba, INT_AGGR_ENABLE | - INT_AGGR_COUNTER_AND_TIMER_RESET, - REG_UTP_TRANSFER_REQ_INT_AGG_CONTROL); - break; - case INT_AGGR_CONFIG: - ufshcd_writel(hba, INT_AGGR_ENABLE | INT_AGGR_PARAM_WRITE | - INT_AGGR_COUNTER_THRESHOLD_VALUE | - INT_AGGR_TIMEOUT_VALUE, - REG_UTP_TRANSFER_REQ_INT_AGG_CONTROL); - break; - } + ufshcd_writel(hba, INT_AGGR_ENABLE | + INT_AGGR_COUNTER_AND_TIMER_RESET, + REG_UTP_TRANSFER_REQ_INT_AGG_CONTROL); +} + +/** + * ufshcd_config_intr_aggr - Configure interrupt aggregation values. + * @hba: per adapter instance + * @cnt: Interrupt aggregation counter threshold + * @tmout: Interrupt aggregation timeout value + */ +static inline void +ufshcd_config_intr_aggr(struct ufs_hba *hba, u8 cnt, u8 tmout) +{ + ufshcd_writel(hba, INT_AGGR_ENABLE | INT_AGGR_PARAM_WRITE | + INT_AGGR_COUNTER_THLD_VAL(cnt) | + INT_AGGR_TIMEOUT_VAL(tmout), + REG_UTP_TRANSFER_REQ_INT_AGG_CONTROL); } /** @@ -355,7 +380,8 @@ void ufshcd_send_command(struct ufs_hba *hba, unsigned int task_tag) static inline void ufshcd_copy_sense_data(struct ufshcd_lrb *lrbp) { int len; - if (lrbp->sense_buffer) { + if (lrbp->sense_buffer && + ufshcd_get_rsp_upiu_data_seg_len(lrbp->ucd_rsp_ptr)) { len = be16_to_cpu(lrbp->ucd_rsp_ptr->sr.sense_data_len); memcpy(lrbp->sense_buffer, lrbp->ucd_rsp_ptr->sr.sense_data, @@ -446,6 +472,18 @@ static inline bool ufshcd_ready_for_uic_cmd(struct ufs_hba *hba) } /** + * ufshcd_get_upmcrs - Get the power mode change request status + * @hba: Pointer to adapter instance + * + * This function gets the UPMCRS field of HCS register + * Returns value of UPMCRS field + */ +static inline u8 ufshcd_get_upmcrs(struct ufs_hba *hba) +{ + return (ufshcd_readl(hba, REG_CONTROLLER_STATUS) >> 8) & 0x7; +} + +/** * ufshcd_dispatch_uic_cmd - Dispatch UIC commands to unipro layers * @hba: per adapter instance * @uic_cmd: UIC command @@ -1362,6 +1400,202 @@ static int ufshcd_dme_link_startup(struct ufs_hba *hba) } /** + * ufshcd_dme_set_attr - UIC command for DME_SET, DME_PEER_SET + * @hba: per adapter instance + * @attr_sel: uic command argument1 + * @attr_set: attribute set type as uic command argument2 + * @mib_val: setting value as uic command argument3 + * @peer: indicate whether peer or local + * + * Returns 0 on success, non-zero value on failure + */ +int ufshcd_dme_set_attr(struct ufs_hba *hba, u32 attr_sel, + u8 attr_set, u32 mib_val, u8 peer) +{ + struct uic_command uic_cmd = {0}; + static const char *const action[] = { + "dme-set", + "dme-peer-set" + }; + const char *set = action[!!peer]; + int ret; + + uic_cmd.command = peer ? + UIC_CMD_DME_PEER_SET : UIC_CMD_DME_SET; + uic_cmd.argument1 = attr_sel; + uic_cmd.argument2 = UIC_ARG_ATTR_TYPE(attr_set); + uic_cmd.argument3 = mib_val; + + ret = ufshcd_send_uic_cmd(hba, &uic_cmd); + if (ret) + dev_err(hba->dev, "%s: attr-id 0x%x val 0x%x error code %d\n", + set, UIC_GET_ATTR_ID(attr_sel), mib_val, ret); + + return ret; +} +EXPORT_SYMBOL_GPL(ufshcd_dme_set_attr); + +/** + * ufshcd_dme_get_attr - UIC command for DME_GET, DME_PEER_GET + * @hba: per adapter instance + * @attr_sel: uic command argument1 + * @mib_val: the value of the attribute as returned by the UIC command + * @peer: indicate whether peer or local + * + * Returns 0 on success, non-zero value on failure + */ +int ufshcd_dme_get_attr(struct ufs_hba *hba, u32 attr_sel, + u32 *mib_val, u8 peer) +{ + struct uic_command uic_cmd = {0}; + static const char *const action[] = { + "dme-get", + "dme-peer-get" + }; + const char *get = action[!!peer]; + int ret; + + uic_cmd.command = peer ? + UIC_CMD_DME_PEER_GET : UIC_CMD_DME_GET; + uic_cmd.argument1 = attr_sel; + + ret = ufshcd_send_uic_cmd(hba, &uic_cmd); + if (ret) { + dev_err(hba->dev, "%s: attr-id 0x%x error code %d\n", + get, UIC_GET_ATTR_ID(attr_sel), ret); + goto out; + } + + if (mib_val) + *mib_val = uic_cmd.argument3; +out: + return ret; +} +EXPORT_SYMBOL_GPL(ufshcd_dme_get_attr); + +/** + * ufshcd_uic_change_pwr_mode - Perform the UIC power mode chage + * using DME_SET primitives. + * @hba: per adapter instance + * @mode: powr mode value + * + * Returns 0 on success, non-zero value on failure + */ +int ufshcd_uic_change_pwr_mode(struct ufs_hba *hba, u8 mode) +{ + struct uic_command uic_cmd = {0}; + struct completion pwr_done; + unsigned long flags; + u8 status; + int ret; + + uic_cmd.command = UIC_CMD_DME_SET; + uic_cmd.argument1 = UIC_ARG_MIB(PA_PWRMODE); + uic_cmd.argument3 = mode; + init_completion(&pwr_done); + + mutex_lock(&hba->uic_cmd_mutex); + + spin_lock_irqsave(hba->host->host_lock, flags); + hba->pwr_done = &pwr_done; + spin_unlock_irqrestore(hba->host->host_lock, flags); + ret = __ufshcd_send_uic_cmd(hba, &uic_cmd); + if (ret) { + dev_err(hba->dev, + "pwr mode change with mode 0x%x uic error %d\n", + mode, ret); + goto out; + } + + if (!wait_for_completion_timeout(hba->pwr_done, + msecs_to_jiffies(UIC_CMD_TIMEOUT))) { + dev_err(hba->dev, + "pwr mode change with mode 0x%x completion timeout\n", + mode); + ret = -ETIMEDOUT; + goto out; + } + + status = ufshcd_get_upmcrs(hba); + if (status != PWR_LOCAL) { + dev_err(hba->dev, + "pwr mode change failed, host umpcrs:0x%x\n", + status); + ret = (status != PWR_OK) ? status : -1; + } +out: + spin_lock_irqsave(hba->host->host_lock, flags); + hba->pwr_done = NULL; + spin_unlock_irqrestore(hba->host->host_lock, flags); + mutex_unlock(&hba->uic_cmd_mutex); + return ret; +} + +/** + * ufshcd_config_max_pwr_mode - Set & Change power mode with + * maximum capability attribute information. + * @hba: per adapter instance + * + * Returns 0 on success, non-zero value on failure + */ +static int ufshcd_config_max_pwr_mode(struct ufs_hba *hba) +{ + enum {RX = 0, TX = 1}; + u32 lanes[] = {1, 1}; + u32 gear[] = {1, 1}; + u8 pwr[] = {FASTAUTO_MODE, FASTAUTO_MODE}; + int ret; + + /* Get the connected lane count */ + ufshcd_dme_get(hba, UIC_ARG_MIB(PA_CONNECTEDRXDATALANES), &lanes[RX]); + ufshcd_dme_get(hba, UIC_ARG_MIB(PA_CONNECTEDTXDATALANES), &lanes[TX]); + + /* + * First, get the maximum gears of HS speed. + * If a zero value, it means there is no HSGEAR capability. + * Then, get the maximum gears of PWM speed. + */ + ufshcd_dme_get(hba, UIC_ARG_MIB(PA_MAXRXHSGEAR), &gear[RX]); + if (!gear[RX]) { + ufshcd_dme_get(hba, UIC_ARG_MIB(PA_MAXRXPWMGEAR), &gear[RX]); + pwr[RX] = SLOWAUTO_MODE; + } + + ufshcd_dme_peer_get(hba, UIC_ARG_MIB(PA_MAXRXHSGEAR), &gear[TX]); + if (!gear[TX]) { + ufshcd_dme_peer_get(hba, UIC_ARG_MIB(PA_MAXRXPWMGEAR), + &gear[TX]); + pwr[TX] = SLOWAUTO_MODE; + } + + /* + * Configure attributes for power mode change with below. + * - PA_RXGEAR, PA_ACTIVERXDATALANES, PA_RXTERMINATION, + * - PA_TXGEAR, PA_ACTIVETXDATALANES, PA_TXTERMINATION, + * - PA_HSSERIES + */ + ufshcd_dme_set(hba, UIC_ARG_MIB(PA_RXGEAR), gear[RX]); + ufshcd_dme_set(hba, UIC_ARG_MIB(PA_ACTIVERXDATALANES), lanes[RX]); + if (pwr[RX] == FASTAUTO_MODE) + ufshcd_dme_set(hba, UIC_ARG_MIB(PA_RXTERMINATION), TRUE); + + ufshcd_dme_set(hba, UIC_ARG_MIB(PA_TXGEAR), gear[TX]); + ufshcd_dme_set(hba, UIC_ARG_MIB(PA_ACTIVETXDATALANES), lanes[TX]); + if (pwr[TX] == FASTAUTO_MODE) + ufshcd_dme_set(hba, UIC_ARG_MIB(PA_TXTERMINATION), TRUE); + + if (pwr[RX] == FASTAUTO_MODE || pwr[TX] == FASTAUTO_MODE) + ufshcd_dme_set(hba, UIC_ARG_MIB(PA_HSSERIES), PA_HS_MODE_B); + + ret = ufshcd_uic_change_pwr_mode(hba, pwr[RX] << 4 | pwr[TX]); + if (ret) + dev_err(hba->dev, + "pwr_mode: power mode change failed %d\n", ret); + + return ret; +} + +/** * ufshcd_complete_dev_init() - checks device readiness * hba: per-adapter instance * @@ -1442,7 +1676,7 @@ static int ufshcd_make_hba_operational(struct ufs_hba *hba) ufshcd_enable_intr(hba, UFSHCD_ENABLE_INTRS); /* Configure interrupt aggregation */ - ufshcd_config_int_aggr(hba, INT_AGGR_CONFIG); + ufshcd_config_intr_aggr(hba, hba->nutrs - 1, INT_AGGR_DEF_TO); /* Configure UTRL and UTMRL base address registers */ ufshcd_writel(hba, lower_32_bits(hba->utrdl_dma_addr), @@ -1788,32 +2022,24 @@ ufshcd_scsi_cmd_status(struct ufshcd_lrb *lrbp, int scsi_status) int result = 0; switch (scsi_status) { - case SAM_STAT_GOOD: - result |= DID_OK << 16 | - COMMAND_COMPLETE << 8 | - SAM_STAT_GOOD; - break; case SAM_STAT_CHECK_CONDITION: + ufshcd_copy_sense_data(lrbp); + case SAM_STAT_GOOD: result |= DID_OK << 16 | COMMAND_COMPLETE << 8 | - SAM_STAT_CHECK_CONDITION; - ufshcd_copy_sense_data(lrbp); - break; - case SAM_STAT_BUSY: - result |= SAM_STAT_BUSY; + scsi_status; break; case SAM_STAT_TASK_SET_FULL: - /* * If a LUN reports SAM_STAT_TASK_SET_FULL, then the LUN queue * depth needs to be adjusted to the exact number of * outstanding commands the LUN can handle at any given time. */ ufshcd_adjust_lun_qdepth(lrbp->cmd); - result |= SAM_STAT_TASK_SET_FULL; - break; + case SAM_STAT_BUSY: case SAM_STAT_TASK_ABORTED: - result |= SAM_STAT_TASK_ABORTED; + ufshcd_copy_sense_data(lrbp); + result |= scsi_status; break; default: result |= DID_ERROR << 16; @@ -1898,14 +2124,20 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) /** * ufshcd_uic_cmd_compl - handle completion of uic command * @hba: per adapter instance + * @intr_status: interrupt status generated by the controller */ -static void ufshcd_uic_cmd_compl(struct ufs_hba *hba) +static void ufshcd_uic_cmd_compl(struct ufs_hba *hba, u32 intr_status) { - if (hba->active_uic_cmd) { + if ((intr_status & UIC_COMMAND_COMPL) && hba->active_uic_cmd) { hba->active_uic_cmd->argument2 |= ufshcd_get_uic_cmd_result(hba); + hba->active_uic_cmd->argument3 = + ufshcd_get_dme_attr_val(hba); complete(&hba->active_uic_cmd->done); } + + if ((intr_status & UIC_POWER_MODE) && hba->pwr_done) + complete(hba->pwr_done); } /** @@ -1960,7 +2192,7 @@ static void ufshcd_transfer_req_compl(struct ufs_hba *hba) /* Reset interrupt aggregation counters */ if (int_aggr_reset) - ufshcd_config_int_aggr(hba, INT_AGGR_RESET); + ufshcd_reset_intr_aggr(hba); } /** @@ -2251,8 +2483,8 @@ static void ufshcd_sl_intr(struct ufs_hba *hba, u32 intr_status) if (hba->errors) ufshcd_err_handler(hba); - if (intr_status & UIC_COMMAND_COMPL) - ufshcd_uic_cmd_compl(hba); + if (intr_status & UFSHCD_UIC_MASK) + ufshcd_uic_cmd_compl(hba, intr_status); if (intr_status & UTP_TASK_REQ_COMPL) ufshcd_tmc_handler(hba); @@ -2494,6 +2726,8 @@ static void ufshcd_async_scan(void *data, async_cookie_t cookie) if (ret) goto out; + ufshcd_config_max_pwr_mode(hba); + ret = ufshcd_verify_dev_init(hba); if (ret) goto out; diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 59c9c4848be1..577679a2d189 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -175,6 +175,7 @@ struct ufs_dev_cmd { * @active_uic_cmd: handle of active UIC command * @uic_cmd_mutex: mutex for uic command * @ufshcd_tm_wait_queue: wait queue for task management + * @pwr_done: completion for power mode change * @tm_condition: condition variable for task management * @ufshcd_state: UFSHCD states * @intr_mask: Interrupt Mask Bits @@ -219,6 +220,8 @@ struct ufs_hba { wait_queue_head_t ufshcd_tm_wait_queue; unsigned long tm_condition; + struct completion *pwr_done; + u32 ufshcd_state; u32 intr_mask; u16 ee_ctrl_mask; @@ -263,4 +266,55 @@ static inline void check_upiu_size(void) extern int ufshcd_runtime_suspend(struct ufs_hba *hba); extern int ufshcd_runtime_resume(struct ufs_hba *hba); extern int ufshcd_runtime_idle(struct ufs_hba *hba); +extern int ufshcd_dme_set_attr(struct ufs_hba *hba, u32 attr_sel, + u8 attr_set, u32 mib_val, u8 peer); +extern int ufshcd_dme_get_attr(struct ufs_hba *hba, u32 attr_sel, + u32 *mib_val, u8 peer); + +/* UIC command interfaces for DME primitives */ +#define DME_LOCAL 0 +#define DME_PEER 1 +#define ATTR_SET_NOR 0 /* NORMAL */ +#define ATTR_SET_ST 1 /* STATIC */ + +static inline int ufshcd_dme_set(struct ufs_hba *hba, u32 attr_sel, + u32 mib_val) +{ + return ufshcd_dme_set_attr(hba, attr_sel, ATTR_SET_NOR, + mib_val, DME_LOCAL); +} + +static inline int ufshcd_dme_st_set(struct ufs_hba *hba, u32 attr_sel, + u32 mib_val) +{ + return ufshcd_dme_set_attr(hba, attr_sel, ATTR_SET_ST, + mib_val, DME_LOCAL); +} + +static inline int ufshcd_dme_peer_set(struct ufs_hba *hba, u32 attr_sel, + u32 mib_val) +{ + return ufshcd_dme_set_attr(hba, attr_sel, ATTR_SET_NOR, + mib_val, DME_PEER); +} + +static inline int ufshcd_dme_peer_st_set(struct ufs_hba *hba, u32 attr_sel, + u32 mib_val) +{ + return ufshcd_dme_set_attr(hba, attr_sel, ATTR_SET_ST, + mib_val, DME_PEER); +} + +static inline int ufshcd_dme_get(struct ufs_hba *hba, + u32 attr_sel, u32 *mib_val) +{ + return ufshcd_dme_get_attr(hba, attr_sel, mib_val, DME_LOCAL); +} + +static inline int ufshcd_dme_peer_get(struct ufs_hba *hba, + u32 attr_sel, u32 *mib_val) +{ + return ufshcd_dme_get_attr(hba, attr_sel, mib_val, DME_PEER); +} + #endif /* End of Header */ diff --git a/drivers/scsi/ufs/ufshci.h b/drivers/scsi/ufs/ufshci.h index f1e1b7459107..0475c6619a68 100644 --- a/drivers/scsi/ufs/ufshci.h +++ b/drivers/scsi/ufs/ufshci.h @@ -124,6 +124,9 @@ enum { #define CONTROLLER_FATAL_ERROR UFS_BIT(16) #define SYSTEM_BUS_FATAL_ERROR UFS_BIT(17) +#define UFSHCD_UIC_MASK (UIC_COMMAND_COMPL |\ + UIC_POWER_MODE) + #define UFSHCD_ERROR_MASK (UIC_ERROR |\ DEVICE_FATAL_ERROR |\ CONTROLLER_FATAL_ERROR |\ @@ -142,6 +145,15 @@ enum { #define DEVICE_ERROR_INDICATOR UFS_BIT(5) #define UIC_POWER_MODE_CHANGE_REQ_STATUS_MASK UFS_MASK(0x7, 8) +enum { + PWR_OK = 0x0, + PWR_LOCAL = 0x01, + PWR_REMOTE = 0x02, + PWR_BUSY = 0x03, + PWR_ERROR_CAP = 0x04, + PWR_FATAL_ERROR = 0x05, +}; + /* HCE - Host Controller Enable 34h */ #define CONTROLLER_ENABLE UFS_BIT(0) #define CONTROLLER_DISABLE 0x0 @@ -191,6 +203,12 @@ enum { #define CONFIG_RESULT_CODE_MASK 0xFF #define GENERIC_ERROR_CODE_MASK 0xFF +#define UIC_ARG_MIB_SEL(attr, sel) ((((attr) & 0xFFFF) << 16) |\ + ((sel) & 0xFFFF)) +#define UIC_ARG_MIB(attr) UIC_ARG_MIB_SEL(attr, 0) +#define UIC_ARG_ATTR_TYPE(t) (((t) & 0xFF) << 16) +#define UIC_GET_ATTR_ID(v) (((v) >> 16) & 0xFFFF) + /* UIC Commands */ enum { UIC_CMD_DME_GET = 0x01, @@ -226,8 +244,8 @@ enum { #define MASK_UIC_COMMAND_RESULT 0xFF -#define INT_AGGR_COUNTER_THRESHOLD_VALUE (0x1F << 8) -#define INT_AGGR_TIMEOUT_VALUE (0x02) +#define INT_AGGR_COUNTER_THLD_VAL(c) (((c) & 0x1F) << 8) +#define INT_AGGR_TIMEOUT_VAL(t) (((t) & 0xFF) << 0) /* Interrupt disable masks */ enum { diff --git a/drivers/scsi/ufs/unipro.h b/drivers/scsi/ufs/unipro.h new file mode 100644 index 000000000000..0bb8041c047a --- /dev/null +++ b/drivers/scsi/ufs/unipro.h @@ -0,0 +1,151 @@ +/* + * drivers/scsi/ufs/unipro.h + * + * Copyright (C) 2013 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _UNIPRO_H_ +#define _UNIPRO_H_ + +/* + * PHY Adpater attributes + */ +#define PA_ACTIVETXDATALANES 0x1560 +#define PA_ACTIVERXDATALANES 0x1580 +#define PA_TXTRAILINGCLOCKS 0x1564 +#define PA_PHY_TYPE 0x1500 +#define PA_AVAILTXDATALANES 0x1520 +#define PA_AVAILRXDATALANES 0x1540 +#define PA_MINRXTRAILINGCLOCKS 0x1543 +#define PA_TXPWRSTATUS 0x1567 +#define PA_RXPWRSTATUS 0x1582 +#define PA_TXFORCECLOCK 0x1562 +#define PA_TXPWRMODE 0x1563 +#define PA_LEGACYDPHYESCDL 0x1570 +#define PA_MAXTXSPEEDFAST 0x1521 +#define PA_MAXTXSPEEDSLOW 0x1522 +#define PA_MAXRXSPEEDFAST 0x1541 +#define PA_MAXRXSPEEDSLOW 0x1542 +#define PA_TXLINKSTARTUPHS 0x1544 +#define PA_TXSPEEDFAST 0x1565 +#define PA_TXSPEEDSLOW 0x1566 +#define PA_REMOTEVERINFO 0x15A0 +#define PA_TXGEAR 0x1568 +#define PA_TXTERMINATION 0x1569 +#define PA_HSSERIES 0x156A +#define PA_PWRMODE 0x1571 +#define PA_RXGEAR 0x1583 +#define PA_RXTERMINATION 0x1584 +#define PA_MAXRXPWMGEAR 0x1586 +#define PA_MAXRXHSGEAR 0x1587 +#define PA_RXHSUNTERMCAP 0x15A5 +#define PA_RXLSTERMCAP 0x15A6 +#define PA_PACPREQTIMEOUT 0x1590 +#define PA_PACPREQEOBTIMEOUT 0x1591 +#define PA_HIBERN8TIME 0x15A7 +#define PA_LOCALVERINFO 0x15A9 +#define PA_TACTIVATE 0x15A8 +#define PA_PACPFRAMECOUNT 0x15C0 +#define PA_PACPERRORCOUNT 0x15C1 +#define PA_PHYTESTCONTROL 0x15C2 +#define PA_PWRMODEUSERDATA0 0x15B0 +#define PA_PWRMODEUSERDATA1 0x15B1 +#define PA_PWRMODEUSERDATA2 0x15B2 +#define PA_PWRMODEUSERDATA3 0x15B3 +#define PA_PWRMODEUSERDATA4 0x15B4 +#define PA_PWRMODEUSERDATA5 0x15B5 +#define PA_PWRMODEUSERDATA6 0x15B6 +#define PA_PWRMODEUSERDATA7 0x15B7 +#define PA_PWRMODEUSERDATA8 0x15B8 +#define PA_PWRMODEUSERDATA9 0x15B9 +#define PA_PWRMODEUSERDATA10 0x15BA +#define PA_PWRMODEUSERDATA11 0x15BB +#define PA_CONNECTEDTXDATALANES 0x1561 +#define PA_CONNECTEDRXDATALANES 0x1581 +#define PA_LOGICALLANEMAP 0x15A1 +#define PA_SLEEPNOCONFIGTIME 0x15A2 +#define PA_STALLNOCONFIGTIME 0x15A3 +#define PA_SAVECONFIGTIME 0x15A4 + +/* PA power modes */ +enum { + FAST_MODE = 1, + SLOW_MODE = 2, + FASTAUTO_MODE = 4, + SLOWAUTO_MODE = 5, + UNCHANGED = 7, +}; + +/* PA TX/RX Frequency Series */ +enum { + PA_HS_MODE_A = 1, + PA_HS_MODE_B = 2, +}; + +/* + * Data Link Layer Attributes + */ +#define DL_TC0TXFCTHRESHOLD 0x2040 +#define DL_FC0PROTTIMEOUTVAL 0x2041 +#define DL_TC0REPLAYTIMEOUTVAL 0x2042 +#define DL_AFC0REQTIMEOUTVAL 0x2043 +#define DL_AFC0CREDITTHRESHOLD 0x2044 +#define DL_TC0OUTACKTHRESHOLD 0x2045 +#define DL_TC1TXFCTHRESHOLD 0x2060 +#define DL_FC1PROTTIMEOUTVAL 0x2061 +#define DL_TC1REPLAYTIMEOUTVAL 0x2062 +#define DL_AFC1REQTIMEOUTVAL 0x2063 +#define DL_AFC1CREDITTHRESHOLD 0x2064 +#define DL_TC1OUTACKTHRESHOLD 0x2065 +#define DL_TXPREEMPTIONCAP 0x2000 +#define DL_TC0TXMAXSDUSIZE 0x2001 +#define DL_TC0RXINITCREDITVAL 0x2002 +#define DL_TC0TXBUFFERSIZE 0x2005 +#define DL_PEERTC0PRESENT 0x2046 +#define DL_PEERTC0RXINITCREVAL 0x2047 +#define DL_TC1TXMAXSDUSIZE 0x2003 +#define DL_TC1RXINITCREDITVAL 0x2004 +#define DL_TC1TXBUFFERSIZE 0x2006 +#define DL_PEERTC1PRESENT 0x2066 +#define DL_PEERTC1RXINITCREVAL 0x2067 + +/* + * Network Layer Attributes + */ +#define N_DEVICEID 0x3000 +#define N_DEVICEID_VALID 0x3001 +#define N_TC0TXMAXSDUSIZE 0x3020 +#define N_TC1TXMAXSDUSIZE 0x3021 + +/* + * Transport Layer Attributes + */ +#define T_NUMCPORTS 0x4000 +#define T_NUMTESTFEATURES 0x4001 +#define T_CONNECTIONSTATE 0x4020 +#define T_PEERDEVICEID 0x4021 +#define T_PEERCPORTID 0x4022 +#define T_TRAFFICCLASS 0x4023 +#define T_PROTOCOLID 0x4024 +#define T_CPORTFLAGS 0x4025 +#define T_TXTOKENVALUE 0x4026 +#define T_RXTOKENVALUE 0x4027 +#define T_LOCALBUFFERSPACE 0x4028 +#define T_PEERBUFFERSPACE 0x4029 +#define T_CREDITSTOSEND 0x402A +#define T_CPORTMODE 0x402B +#define T_TC0TXMAXSDUSIZE 0x4060 +#define T_TC1TXMAXSDUSIZE 0x4061 + +/* Boolean attribute values */ +enum { + FALSE = 0, + TRUE, +}; + +#endif /* _UNIPRO_H_ */ diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index 0170d4c4a8a3..b9c53cc40e1f 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -55,7 +55,6 @@ comment "SPI Master Controller Drivers" config SPI_ALTERA tristate "Altera SPI Controller" - depends on GENERIC_HARDIRQS select SPI_BITBANG help This is the driver for the Altera SPI Controller. @@ -358,7 +357,7 @@ config SPI_PXA2XX_DMA config SPI_PXA2XX tristate "PXA2xx SSP SPI master" - depends on (ARCH_PXA || PCI || ACPI) && GENERIC_HARDIRQS + depends on (ARCH_PXA || PCI || ACPI) select PXA_SSP if ARCH_PXA help This enables using a PXA2xx or Sodaville SSP port as a SPI master diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 21a3f7250531..8e76ddca0999 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -341,27 +341,26 @@ out: /* * ashmem_shrink - our cache shrinker, called from mm/vmscan.c :: shrink_slab * - * 'nr_to_scan' is the number of objects (pages) to prune, or 0 to query how - * many objects (pages) we have in total. + * 'nr_to_scan' is the number of objects to scan for freeing. * * 'gfp_mask' is the mask of the allocation that got us into this mess. * - * Return value is the number of objects (pages) remaining, or -1 if we cannot + * Return value is the number of objects freed or -1 if we cannot * proceed without risk of deadlock (due to gfp_mask). * * We approximate LRU via least-recently-unpinned, jettisoning unpinned partial * chunks of ashmem regions LRU-wise one-at-a-time until we hit 'nr_to_scan' * pages freed. */ -static int ashmem_shrink(struct shrinker *s, struct shrink_control *sc) +static unsigned long +ashmem_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { struct ashmem_range *range, *next; + unsigned long freed = 0; /* We might recurse into filesystem code, so bail out if necessary */ - if (sc->nr_to_scan && !(sc->gfp_mask & __GFP_FS)) - return -1; - if (!sc->nr_to_scan) - return lru_count; + if (!(sc->gfp_mask & __GFP_FS)) + return SHRINK_STOP; mutex_lock(&ashmem_mutex); list_for_each_entry_safe(range, next, &ashmem_lru_list, lru) { @@ -374,17 +373,32 @@ static int ashmem_shrink(struct shrinker *s, struct shrink_control *sc) range->purged = ASHMEM_WAS_PURGED; lru_del(range); - sc->nr_to_scan -= range_size(range); - if (sc->nr_to_scan <= 0) + freed += range_size(range); + if (--sc->nr_to_scan <= 0) break; } mutex_unlock(&ashmem_mutex); + return freed; +} +static unsigned long +ashmem_shrink_count(struct shrinker *shrink, struct shrink_control *sc) +{ + /* + * note that lru_count is count of pages on the lru, not a count of + * objects on the list. This means the scan function needs to return the + * number of pages freed, not the number of objects scanned. + */ return lru_count; } static struct shrinker ashmem_shrinker = { - .shrink = ashmem_shrink, + .count_objects = ashmem_shrink_count, + .scan_objects = ashmem_shrink_scan, + /* + * XXX (dchinner): I wish people would comment on why they need on + * significant changes to the default value here + */ .seeks = DEFAULT_SEEKS * 4, }; @@ -690,11 +704,11 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (capable(CAP_SYS_ADMIN)) { struct shrink_control sc = { .gfp_mask = GFP_KERNEL, - .nr_to_scan = 0, + .nr_to_scan = LONG_MAX, }; - ret = ashmem_shrink(&ashmem_shrinker, &sc); - sc.nr_to_scan = ret; - ashmem_shrink(&ashmem_shrinker, &sc); + + nodes_setall(sc.nodes_to_scan); + ashmem_shrink_scan(&ashmem_shrinker, &sc); } break; } diff --git a/drivers/staging/android/logger.c b/drivers/staging/android/logger.c index a8c344422a77..d42f5785f098 100644 --- a/drivers/staging/android/logger.c +++ b/drivers/staging/android/logger.c @@ -481,7 +481,7 @@ static ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov, header.sec = now.tv_sec; header.nsec = now.tv_nsec; header.euid = current_euid(); - header.len = min_t(size_t, iocb->ki_left, LOGGER_ENTRY_MAX_PAYLOAD); + header.len = min_t(size_t, iocb->ki_nbytes, LOGGER_ENTRY_MAX_PAYLOAD); header.hdr_size = sizeof(struct logger_entry); /* null writes succeed, return zero */ diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index fe74494868ef..6f094b37f1f1 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -66,11 +66,20 @@ static unsigned long lowmem_deathpending_timeout; pr_info(x); \ } while (0) -static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) +static unsigned long lowmem_count(struct shrinker *s, + struct shrink_control *sc) +{ + return global_page_state(NR_ACTIVE_ANON) + + global_page_state(NR_ACTIVE_FILE) + + global_page_state(NR_INACTIVE_ANON) + + global_page_state(NR_INACTIVE_FILE); +} + +static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) { struct task_struct *tsk; struct task_struct *selected = NULL; - int rem = 0; + unsigned long rem = 0; int tasksize; int i; short min_score_adj = OOM_SCORE_ADJ_MAX + 1; @@ -92,19 +101,17 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) break; } } - if (sc->nr_to_scan > 0) - lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %hd\n", - sc->nr_to_scan, sc->gfp_mask, other_free, - other_file, min_score_adj); - rem = global_page_state(NR_ACTIVE_ANON) + - global_page_state(NR_ACTIVE_FILE) + - global_page_state(NR_INACTIVE_ANON) + - global_page_state(NR_INACTIVE_FILE); - if (sc->nr_to_scan <= 0 || min_score_adj == OOM_SCORE_ADJ_MAX + 1) { - lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n", - sc->nr_to_scan, sc->gfp_mask, rem); - return rem; + + lowmem_print(3, "lowmem_scan %lu, %x, ofree %d %d, ma %hd\n", + sc->nr_to_scan, sc->gfp_mask, other_free, + other_file, min_score_adj); + + if (min_score_adj == OOM_SCORE_ADJ_MAX + 1) { + lowmem_print(5, "lowmem_scan %lu, %x, return 0\n", + sc->nr_to_scan, sc->gfp_mask); + return 0; } + selected_oom_score_adj = min_score_adj; rcu_read_lock(); @@ -154,16 +161,18 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) lowmem_deathpending_timeout = jiffies + HZ; send_sig(SIGKILL, selected, 0); set_tsk_thread_flag(selected, TIF_MEMDIE); - rem -= selected_tasksize; + rem += selected_tasksize; } - lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n", + + lowmem_print(4, "lowmem_scan %lu, %x, return %lu\n", sc->nr_to_scan, sc->gfp_mask, rem); rcu_read_unlock(); return rem; } static struct shrinker lowmem_shrinker = { - .shrink = lowmem_shrink, + .scan_objects = lowmem_scan, + .count_objects = lowmem_count, .seeks = DEFAULT_SEEKS * 16 }; diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h index 63efb7b456c6..2af15d41e77a 100644 --- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h +++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h @@ -79,42 +79,4 @@ do { __oldfs = get_fs(); set_fs(get_ds());} while(0) #define MMSPACE_CLOSE set_fs(__oldfs) -/* - * Shrinker - */ - -# define SHRINKER_ARGS(sc, nr_to_scan, gfp_mask) \ - struct shrinker *shrinker, \ - struct shrink_control *sc -# define shrink_param(sc, var) ((sc)->var) - -typedef int (*shrinker_t)(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)); - -static inline -struct shrinker *set_shrinker(int seek, shrinker_t func) -{ - struct shrinker *s; - - s = kmalloc(sizeof(*s), GFP_KERNEL); - if (s == NULL) - return (NULL); - - s->shrink = func; - s->seeks = seek; - - register_shrinker(s); - - return s; -} - -static inline -void remove_shrinker(struct shrinker *shrinker) -{ - if (shrinker == NULL) - return; - - unregister_shrinker(shrinker); - kfree(shrinker); -} - #endif /* __LINUX_CFS_MEM_H__ */ diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c index 454027d68d54..0025ee6356da 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c @@ -521,7 +521,7 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl, int nr, unsigned int gfp_mask) { struct ldlm_namespace *ns; - int canceled = 0, unused; + int unused; ns = ldlm_pl2ns(pl); @@ -540,14 +540,10 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl, unused = ns->ns_nr_unused; spin_unlock(&ns->ns_lock); - if (nr) { - canceled = ldlm_cancel_lru(ns, nr, LCF_ASYNC, - LDLM_CANCEL_SHRINK); - } - /* - * Return the number of potentially reclaimable locks. - */ - return ((unused - canceled) / 100) * sysctl_vfs_cache_pressure; + if (nr == 0) + return (unused / 100) * sysctl_vfs_cache_pressure; + else + return ldlm_cancel_lru(ns, nr, LCF_ASYNC, LDLM_CANCEL_SHRINK); } struct ldlm_pool_ops ldlm_srv_pool_ops = { @@ -601,9 +597,10 @@ int ldlm_pool_recalc(struct ldlm_pool *pl) return recalc_interval_sec; } -/** +/* * Pool shrink wrapper. Will call either client or server pool recalc callback - * depending what pool \a pl is used. + * depending what pool pl is used. When nr == 0, just return the number of + * freeable locks. Otherwise, return the number of canceled locks. */ int ldlm_pool_shrink(struct ldlm_pool *pl, int nr, unsigned int gfp_mask) @@ -1017,29 +1014,24 @@ static int ldlm_pool_granted(struct ldlm_pool *pl) } static struct ptlrpc_thread *ldlm_pools_thread; -static struct shrinker *ldlm_pools_srv_shrinker; -static struct shrinker *ldlm_pools_cli_shrinker; static struct completion ldlm_pools_comp; /* - * Cancel \a nr locks from all namespaces (if possible). Returns number of - * cached locks after shrink is finished. All namespaces are asked to - * cancel approximately equal amount of locks to keep balancing. + * count locks from all namespaces (if possible). Returns number of + * cached locks. */ -static int ldlm_pools_shrink(ldlm_side_t client, int nr, - unsigned int gfp_mask) +static unsigned long ldlm_pools_count(ldlm_side_t client, unsigned int gfp_mask) { - int total = 0, cached = 0, nr_ns; + int total = 0, nr_ns; struct ldlm_namespace *ns; struct ldlm_namespace *ns_old = NULL; /* loop detection */ void *cookie; - if (client == LDLM_NAMESPACE_CLIENT && nr != 0 && - !(gfp_mask & __GFP_FS)) - return -1; + if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS)) + return 0; - CDEBUG(D_DLMTRACE, "Request to shrink %d %s locks from all pools\n", - nr, client == LDLM_NAMESPACE_CLIENT ? "client" : "server"); + CDEBUG(D_DLMTRACE, "Request to count %s locks from all pools\n", + client == LDLM_NAMESPACE_CLIENT ? "client" : "server"); cookie = cl_env_reenter(); @@ -1047,8 +1039,7 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr, * Find out how many resources we may release. */ for (nr_ns = ldlm_namespace_nr_read(client); - nr_ns > 0; nr_ns--) - { + nr_ns > 0; nr_ns--) { mutex_lock(ldlm_namespace_lock(client)); if (list_empty(ldlm_namespace_list(client))) { mutex_unlock(ldlm_namespace_lock(client)); @@ -1078,17 +1069,27 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr, ldlm_namespace_put(ns); } - if (nr == 0 || total == 0) { - cl_env_reexit(cookie); - return total; - } + cl_env_reexit(cookie); + return total; +} + +static unsigned long ldlm_pools_scan(ldlm_side_t client, int nr, unsigned int gfp_mask) +{ + unsigned long freed = 0; + int tmp, nr_ns; + struct ldlm_namespace *ns; + void *cookie; + + if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS)) + return -1; + + cookie = cl_env_reenter(); /* - * Shrink at least ldlm_namespace_nr(client) namespaces. + * Shrink at least ldlm_namespace_nr_read(client) namespaces. */ - for (nr_ns = ldlm_namespace_nr_read(client) - nr_ns; - nr_ns > 0; nr_ns--) - { + for (tmp = nr_ns = ldlm_namespace_nr_read(client); + tmp > 0; tmp--) { int cancel, nr_locks; /* @@ -1097,12 +1098,6 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr, mutex_lock(ldlm_namespace_lock(client)); if (list_empty(ldlm_namespace_list(client))) { mutex_unlock(ldlm_namespace_lock(client)); - /* - * If list is empty, we can't return any @cached > 0, - * that probably would cause needless shrinker - * call. - */ - cached = 0; break; } ns = ldlm_namespace_first_locked(client); @@ -1111,29 +1106,42 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr, mutex_unlock(ldlm_namespace_lock(client)); nr_locks = ldlm_pool_granted(&ns->ns_pool); - cancel = 1 + nr_locks * nr / total; - ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask); - cached += ldlm_pool_granted(&ns->ns_pool); + /* + * We use to shrink propotionally but with new shrinker API, + * we lost the total number of freeable locks. + */ + cancel = 1 + min_t(int, nr_locks, nr / nr_ns); + freed += ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask); ldlm_namespace_put(ns); } cl_env_reexit(cookie); - /* we only decrease the SLV in server pools shrinker, return -1 to - * kernel to avoid needless loop. LU-1128 */ - return (client == LDLM_NAMESPACE_SERVER) ? -1 : cached; + /* + * we only decrease the SLV in server pools shrinker, return + * SHRINK_STOP to kernel to avoid needless loop. LU-1128 + */ + return (client == LDLM_NAMESPACE_SERVER) ? SHRINK_STOP : freed; +} + +static unsigned long ldlm_pools_srv_count(struct shrinker *s, struct shrink_control *sc) +{ + return ldlm_pools_count(LDLM_NAMESPACE_SERVER, sc->gfp_mask); } -static int ldlm_pools_srv_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) +static unsigned long ldlm_pools_srv_scan(struct shrinker *s, struct shrink_control *sc) { - return ldlm_pools_shrink(LDLM_NAMESPACE_SERVER, - shrink_param(sc, nr_to_scan), - shrink_param(sc, gfp_mask)); + return ldlm_pools_scan(LDLM_NAMESPACE_SERVER, sc->nr_to_scan, + sc->gfp_mask); } -static int ldlm_pools_cli_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) +static unsigned long ldlm_pools_cli_count(struct shrinker *s, struct shrink_control *sc) { - return ldlm_pools_shrink(LDLM_NAMESPACE_CLIENT, - shrink_param(sc, nr_to_scan), - shrink_param(sc, gfp_mask)); + return ldlm_pools_count(LDLM_NAMESPACE_CLIENT, sc->gfp_mask); +} + +static unsigned long ldlm_pools_cli_scan(struct shrinker *s, struct shrink_control *sc) +{ + return ldlm_pools_scan(LDLM_NAMESPACE_CLIENT, sc->nr_to_scan, + sc->gfp_mask); } int ldlm_pools_recalc(ldlm_side_t client) @@ -1216,7 +1224,7 @@ int ldlm_pools_recalc(ldlm_side_t client) } /* - * Recalc at least ldlm_namespace_nr(client) namespaces. + * Recalc at least ldlm_namespace_nr_read(client) namespaces. */ for (nr = ldlm_namespace_nr_read(client); nr > 0; nr--) { int skip; @@ -1383,18 +1391,26 @@ static void ldlm_pools_thread_stop(void) ldlm_pools_thread = NULL; } +static struct shrinker ldlm_pools_srv_shrinker = { + .count_objects = ldlm_pools_srv_count, + .scan_objects = ldlm_pools_srv_scan, + .seeks = DEFAULT_SEEKS, +}; + +static struct shrinker ldlm_pools_cli_shrinker = { + .count_objects = ldlm_pools_cli_count, + .scan_objects = ldlm_pools_cli_scan, + .seeks = DEFAULT_SEEKS, +}; + int ldlm_pools_init(void) { int rc; rc = ldlm_pools_thread_start(); if (rc == 0) { - ldlm_pools_srv_shrinker = - set_shrinker(DEFAULT_SEEKS, - ldlm_pools_srv_shrink); - ldlm_pools_cli_shrinker = - set_shrinker(DEFAULT_SEEKS, - ldlm_pools_cli_shrink); + register_shrinker(&ldlm_pools_srv_shrinker); + register_shrinker(&ldlm_pools_cli_shrinker); } return rc; } @@ -1402,14 +1418,8 @@ EXPORT_SYMBOL(ldlm_pools_init); void ldlm_pools_fini(void) { - if (ldlm_pools_srv_shrinker != NULL) { - remove_shrinker(ldlm_pools_srv_shrinker); - ldlm_pools_srv_shrinker = NULL; - } - if (ldlm_pools_cli_shrinker != NULL) { - remove_shrinker(ldlm_pools_cli_shrinker); - ldlm_pools_cli_shrinker = NULL; - } + unregister_shrinker(&ldlm_pools_srv_shrinker); + unregister_shrinker(&ldlm_pools_cli_shrinker); ldlm_pools_thread_stop(); } EXPORT_SYMBOL(ldlm_pools_fini); diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c index 253f02688f4f..bc534db12431 100644 --- a/drivers/staging/lustre/lustre/llite/file.c +++ b/drivers/staging/lustre/lustre/llite/file.c @@ -1009,7 +1009,7 @@ static ssize_t ll_file_read(struct file *file, char *buf, size_t count, local_iov->iov_len = count; init_sync_kiocb(kiocb, file); kiocb->ki_pos = *ppos; - kiocb->ki_left = count; + kiocb->ki_nbytes = count; result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos); *ppos = kiocb->ki_pos; @@ -1068,7 +1068,7 @@ static ssize_t ll_file_write(struct file *file, const char *buf, size_t count, local_iov->iov_len = count; init_sync_kiocb(kiocb, file); kiocb->ki_pos = *ppos; - kiocb->ki_left = count; + kiocb->ki_nbytes = count; result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos); *ppos = kiocb->ki_pos; diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c index c29ac1c2defd..3a3d5bc5a628 100644 --- a/drivers/staging/lustre/lustre/obdclass/lu_object.c +++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c @@ -1779,7 +1779,6 @@ int lu_env_refill_by_tags(struct lu_env *env, __u32 ctags, } EXPORT_SYMBOL(lu_env_refill_by_tags); -static struct shrinker *lu_site_shrinker = NULL; typedef struct lu_site_stats{ unsigned lss_populated; @@ -1835,61 +1834,68 @@ static void lu_site_stats_get(cfs_hash_t *hs, * objects without taking the lu_sites_guard lock, but this is not * possible in the current implementation. */ -static int lu_cache_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) +static unsigned long lu_cache_shrink_count(struct shrinker *sk, + struct shrink_control *sc) { lu_site_stats_t stats; struct lu_site *s; struct lu_site *tmp; - int cached = 0; - int remain = shrink_param(sc, nr_to_scan); - LIST_HEAD(splice); - - if (!(shrink_param(sc, gfp_mask) & __GFP_FS)) { - if (remain != 0) - return -1; - else - /* We must not take the lu_sites_guard lock when - * __GFP_FS is *not* set because of the deadlock - * possibility detailed above. Additionally, - * since we cannot determine the number of - * objects in the cache without taking this - * lock, we're in a particularly tough spot. As - * a result, we'll just lie and say our cache is - * empty. This _should_ be ok, as we can't - * reclaim objects when __GFP_FS is *not* set - * anyways. - */ - return 0; - } + unsigned long cached = 0; - CDEBUG(D_INODE, "Shrink %d objects\n", remain); + if (!(sc->gfp_mask & __GFP_FS)) + return 0; mutex_lock(&lu_sites_guard); list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) { - if (shrink_param(sc, nr_to_scan) != 0) { - remain = lu_site_purge(&lu_shrink_env, s, remain); - /* - * Move just shrunk site to the tail of site list to - * assure shrinking fairness. - */ - list_move_tail(&s->ls_linkage, &splice); - } - memset(&stats, 0, sizeof(stats)); lu_site_stats_get(s->ls_obj_hash, &stats, 0); cached += stats.lss_total - stats.lss_busy; - if (shrink_param(sc, nr_to_scan) && remain <= 0) - break; } - list_splice(&splice, lu_sites.prev); mutex_unlock(&lu_sites_guard); cached = (cached / 100) * sysctl_vfs_cache_pressure; - if (shrink_param(sc, nr_to_scan) == 0) - CDEBUG(D_INODE, "%d objects cached\n", cached); + CDEBUG(D_INODE, "%ld objects cached\n", cached); return cached; } +static unsigned long lu_cache_shrink_scan(struct shrinker *sk, + struct shrink_control *sc) +{ + struct lu_site *s; + struct lu_site *tmp; + unsigned long remain = sc->nr_to_scan, freed = 0; + LIST_HEAD(splice); + + if (!(sc->gfp_mask & __GFP_FS)) + /* We must not take the lu_sites_guard lock when + * __GFP_FS is *not* set because of the deadlock + * possibility detailed above. Additionally, + * since we cannot determine the number of + * objects in the cache without taking this + * lock, we're in a particularly tough spot. As + * a result, we'll just lie and say our cache is + * empty. This _should_ be ok, as we can't + * reclaim objects when __GFP_FS is *not* set + * anyways. + */ + return SHRINK_STOP; + + mutex_lock(&lu_sites_guard); + list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) { + freed = lu_site_purge(&lu_shrink_env, s, remain); + remain -= freed; + /* + * Move just shrunk site to the tail of site list to + * assure shrinking fairness. + */ + list_move_tail(&s->ls_linkage, &splice); + } + list_splice(&splice, lu_sites.prev); + mutex_unlock(&lu_sites_guard); + + return sc->nr_to_scan - remain; +} + /* * Debugging stuff. */ @@ -1913,6 +1919,12 @@ int lu_printk_printer(const struct lu_env *env, return 0; } +static struct shrinker lu_site_shrinker = { + .count_objects = lu_cache_shrink_count, + .scan_objects = lu_cache_shrink_scan, + .seeks = DEFAULT_SEEKS, +}; + /** * Initialization of global lu_* data. */ @@ -1947,9 +1959,7 @@ int lu_global_init(void) * inode, one for ea. Unfortunately setting this high value results in * lu_object/inode cache consuming all the memory. */ - lu_site_shrinker = set_shrinker(DEFAULT_SEEKS, lu_cache_shrink); - if (lu_site_shrinker == NULL) - return -ENOMEM; + register_shrinker(&lu_site_shrinker); return result; } @@ -1959,11 +1969,7 @@ int lu_global_init(void) */ void lu_global_fini(void) { - if (lu_site_shrinker != NULL) { - remove_shrinker(lu_site_shrinker); - lu_site_shrinker = NULL; - } - + unregister_shrinker(&lu_site_shrinker); lu_context_key_degister(&lu_global_key); /* diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c index 9013745ab105..e90c8fb7da6a 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c +++ b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c @@ -121,13 +121,6 @@ static struct ptlrpc_enc_page_pool { } page_pools; /* - * memory shrinker - */ -const int pools_shrinker_seeks = DEFAULT_SEEKS; -static struct shrinker *pools_shrinker = NULL; - - -/* * /proc/fs/lustre/sptlrpc/encrypt_page_pools */ int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v) @@ -226,30 +219,46 @@ static void enc_pools_release_free_pages(long npages) } /* - * could be called frequently for query (@nr_to_scan == 0). * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool. */ -static int enc_pools_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) +static unsigned long enc_pools_shrink_count(struct shrinker *s, + struct shrink_control *sc) { - if (unlikely(shrink_param(sc, nr_to_scan) != 0)) { + /* + * if no pool access for a long time, we consider it's fully idle. + * a little race here is fine. + */ + if (unlikely(cfs_time_current_sec() - page_pools.epp_last_access > + CACHE_QUIESCENT_PERIOD)) { spin_lock(&page_pools.epp_lock); - shrink_param(sc, nr_to_scan) = min_t(unsigned long, - shrink_param(sc, nr_to_scan), - page_pools.epp_free_pages - - PTLRPC_MAX_BRW_PAGES); - if (shrink_param(sc, nr_to_scan) > 0) { - enc_pools_release_free_pages(shrink_param(sc, - nr_to_scan)); - CDEBUG(D_SEC, "released %ld pages, %ld left\n", - (long)shrink_param(sc, nr_to_scan), - page_pools.epp_free_pages); - - page_pools.epp_st_shrinks++; - page_pools.epp_last_shrink = cfs_time_current_sec(); - } + page_pools.epp_idle_idx = IDLE_IDX_MAX; spin_unlock(&page_pools.epp_lock); } + LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX); + return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) * + (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX; +} + +/* + * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool. + */ +static unsigned long enc_pools_shrink_scan(struct shrinker *s, + struct shrink_control *sc) +{ + spin_lock(&page_pools.epp_lock); + sc->nr_to_scan = min_t(unsigned long, sc->nr_to_scan, + page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES); + if (sc->nr_to_scan > 0) { + enc_pools_release_free_pages(sc->nr_to_scan); + CDEBUG(D_SEC, "released %ld pages, %ld left\n", + (long)sc->nr_to_scan, page_pools.epp_free_pages); + + page_pools.epp_st_shrinks++; + page_pools.epp_last_shrink = cfs_time_current_sec(); + } + spin_unlock(&page_pools.epp_lock); + /* * if no pool access for a long time, we consider it's fully idle. * a little race here is fine. @@ -262,8 +271,7 @@ static int enc_pools_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) } LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX); - return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) * - (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX; + return sc->nr_to_scan; } static inline @@ -699,6 +707,12 @@ static inline void enc_pools_free(void) sizeof(*page_pools.epp_pools)); } +static struct shrinker pools_shrinker = { + .count_objects = enc_pools_shrink_count, + .scan_objects = enc_pools_shrink_scan, + .seeks = DEFAULT_SEEKS, +}; + int sptlrpc_enc_pool_init(void) { /* @@ -736,12 +750,7 @@ int sptlrpc_enc_pool_init(void) if (page_pools.epp_pools == NULL) return -ENOMEM; - pools_shrinker = set_shrinker(pools_shrinker_seeks, - enc_pools_shrink); - if (pools_shrinker == NULL) { - enc_pools_free(); - return -ENOMEM; - } + register_shrinker(&pools_shrinker); return 0; } @@ -750,11 +759,10 @@ void sptlrpc_enc_pool_fini(void) { unsigned long cleaned, npools; - LASSERT(pools_shrinker); LASSERT(page_pools.epp_pools); LASSERT(page_pools.epp_total_pages == page_pools.epp_free_pages); - remove_shrinker(pools_shrinker); + unregister_shrinker(&pools_shrinker); npools = npages_to_npools(page_pools.epp_total_pages); cleaned = enc_pools_cleanup(page_pools.epp_pools, npools); diff --git a/drivers/staging/octeon/ethernet-mem.c b/drivers/staging/octeon/ethernet-mem.c index 78b6cb743769..199059d64c9b 100644 --- a/drivers/staging/octeon/ethernet-mem.c +++ b/drivers/staging/octeon/ethernet-mem.c @@ -48,13 +48,8 @@ static int cvm_oct_fill_hw_skbuff(int pool, int size, int elements) while (freed) { struct sk_buff *skb = dev_alloc_skb(size + 256); - if (unlikely(skb == NULL)) { - pr_warning - ("Failed to allocate skb for hardware pool %d\n", - pool); + if (unlikely(skb == NULL)) break; - } - skb_reserve(skb, 256 - (((unsigned long)skb->data) & 0x7f)); *(struct sk_buff **)(skb->data - sizeof(void *)) = skb; cvmx_fpa_free(skb->data, pool, DONT_WRITEBACK(size / 128)); diff --git a/drivers/staging/octeon/ethernet-rgmii.c b/drivers/staging/octeon/ethernet-rgmii.c index d8f5f694ec35..ea53af30dfa7 100644 --- a/drivers/staging/octeon/ethernet-rgmii.c +++ b/drivers/staging/octeon/ethernet-rgmii.c @@ -373,9 +373,7 @@ int cvm_oct_rgmii_init(struct net_device *dev) * Enable interrupts on inband status changes * for this port. */ - gmx_rx_int_en.u64 = - cvmx_read_csr(CVMX_GMXX_RXX_INT_EN - (index, interface)); + gmx_rx_int_en.u64 = 0; gmx_rx_int_en.s.phy_dupx = 1; gmx_rx_int_en.s.phy_link = 1; gmx_rx_int_en.s.phy_spd = 1; diff --git a/drivers/staging/octeon/ethernet-rx.c b/drivers/staging/octeon/ethernet-rx.c index 34afc16bc493..e14a1bb04361 100644 --- a/drivers/staging/octeon/ethernet-rx.c +++ b/drivers/staging/octeon/ethernet-rx.c @@ -303,6 +303,7 @@ static int cvm_oct_napi_poll(struct napi_struct *napi, int budget) if (backlog > budget * cores_in_use && napi != NULL) cvm_oct_enable_one_cpu(); } + rx_count++; skb_in_hw = USE_SKBUFFS_IN_HW && work->word2.s.bufs == 1; if (likely(skb_in_hw)) { @@ -336,9 +337,6 @@ static int cvm_oct_napi_poll(struct napi_struct *napi, int budget) */ skb = dev_alloc_skb(work->len); if (!skb) { - printk_ratelimited("Port %d failed to allocate " - "skbuff, packet dropped\n", - work->ipprt); cvm_oct_free_work(work); continue; } @@ -429,7 +427,6 @@ static int cvm_oct_napi_poll(struct napi_struct *napi, int budget) #endif } netif_receive_skb(skb); - rx_count++; } else { /* Drop any packet received for a device that isn't up */ /* diff --git a/drivers/target/Makefile b/drivers/target/Makefile index 9fdcb561422f..85b012d2f89b 100644 --- a/drivers/target/Makefile +++ b/drivers/target/Makefile @@ -13,7 +13,8 @@ target_core_mod-y := target_core_configfs.o \ target_core_spc.o \ target_core_ua.o \ target_core_rd.o \ - target_core_stat.o + target_core_stat.o \ + target_core_xcopy.o obj-$(CONFIG_TARGET_CORE) += target_core_mod.o diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 3a179302b904..35b61f7d6c63 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains main functions related to the iSCSI Target Core Driver. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -63,7 +61,6 @@ spinlock_t sess_idr_lock; struct iscsit_global *iscsit_global; -struct kmem_cache *lio_cmd_cache; struct kmem_cache *lio_qr_cache; struct kmem_cache *lio_dr_cache; struct kmem_cache *lio_ooo_cache; @@ -220,11 +217,6 @@ int iscsit_access_np(struct iscsi_np *np, struct iscsi_portal_group *tpg) spin_unlock_bh(&np->np_thread_lock); return -1; } - if (np->np_login_tpg) { - pr_err("np->np_login_tpg() is not NULL!\n"); - spin_unlock_bh(&np->np_thread_lock); - return -1; - } spin_unlock_bh(&np->np_thread_lock); /* * Determine if the portal group is accepting storage traffic. @@ -239,26 +231,38 @@ int iscsit_access_np(struct iscsi_np *np, struct iscsi_portal_group *tpg) /* * Here we serialize access across the TIQN+TPG Tuple. */ - ret = mutex_lock_interruptible(&tpg->np_login_lock); + ret = down_interruptible(&tpg->np_login_sem); if ((ret != 0) || signal_pending(current)) return -1; - spin_lock_bh(&np->np_thread_lock); - np->np_login_tpg = tpg; - spin_unlock_bh(&np->np_thread_lock); + spin_lock_bh(&tpg->tpg_state_lock); + if (tpg->tpg_state != TPG_STATE_ACTIVE) { + spin_unlock_bh(&tpg->tpg_state_lock); + up(&tpg->np_login_sem); + return -1; + } + spin_unlock_bh(&tpg->tpg_state_lock); return 0; } -int iscsit_deaccess_np(struct iscsi_np *np, struct iscsi_portal_group *tpg) +void iscsit_login_kref_put(struct kref *kref) +{ + struct iscsi_tpg_np *tpg_np = container_of(kref, + struct iscsi_tpg_np, tpg_np_kref); + + complete(&tpg_np->tpg_np_comp); +} + +int iscsit_deaccess_np(struct iscsi_np *np, struct iscsi_portal_group *tpg, + struct iscsi_tpg_np *tpg_np) { struct iscsi_tiqn *tiqn = tpg->tpg_tiqn; - spin_lock_bh(&np->np_thread_lock); - np->np_login_tpg = NULL; - spin_unlock_bh(&np->np_thread_lock); + up(&tpg->np_login_sem); - mutex_unlock(&tpg->np_login_lock); + if (tpg_np) + kref_put(&tpg_np->tpg_np_kref, iscsit_login_kref_put); if (tiqn) iscsit_put_tiqn_for_login(tiqn); @@ -410,20 +414,10 @@ struct iscsi_np *iscsit_add_np( int iscsit_reset_np_thread( struct iscsi_np *np, struct iscsi_tpg_np *tpg_np, - struct iscsi_portal_group *tpg) + struct iscsi_portal_group *tpg, + bool shutdown) { spin_lock_bh(&np->np_thread_lock); - if (tpg && tpg_np) { - /* - * The reset operation need only be performed when the - * passed struct iscsi_portal_group has a login in progress - * to one of the network portals. - */ - if (tpg_np->tpg_np->np_login_tpg != tpg) { - spin_unlock_bh(&np->np_thread_lock); - return 0; - } - } if (np->np_thread_state == ISCSI_NP_THREAD_INACTIVE) { spin_unlock_bh(&np->np_thread_lock); return 0; @@ -438,6 +432,12 @@ int iscsit_reset_np_thread( } spin_unlock_bh(&np->np_thread_lock); + if (tpg_np && shutdown) { + kref_put(&tpg_np->tpg_np_kref, iscsit_login_kref_put); + + wait_for_completion(&tpg_np->tpg_np_comp); + } + return 0; } @@ -497,7 +497,6 @@ static struct iscsit_transport iscsi_target_transport = { .iscsit_setup_np = iscsit_setup_np, .iscsit_accept_np = iscsit_accept_np, .iscsit_free_np = iscsit_free_np, - .iscsit_alloc_cmd = iscsit_alloc_cmd, .iscsit_get_login_rx = iscsit_get_login_rx, .iscsit_put_login_tx = iscsit_put_login_tx, .iscsit_get_dataout = iscsit_build_r2ts_for_cmd, @@ -538,22 +537,13 @@ static int __init iscsi_target_init_module(void) goto ts_out1; } - lio_cmd_cache = kmem_cache_create("lio_cmd_cache", - sizeof(struct iscsi_cmd), __alignof__(struct iscsi_cmd), - 0, NULL); - if (!lio_cmd_cache) { - pr_err("Unable to kmem_cache_create() for" - " lio_cmd_cache\n"); - goto ts_out2; - } - lio_qr_cache = kmem_cache_create("lio_qr_cache", sizeof(struct iscsi_queue_req), __alignof__(struct iscsi_queue_req), 0, NULL); if (!lio_qr_cache) { pr_err("nable to kmem_cache_create() for" " lio_qr_cache\n"); - goto cmd_out; + goto ts_out2; } lio_dr_cache = kmem_cache_create("lio_dr_cache", @@ -597,8 +587,6 @@ dr_out: kmem_cache_destroy(lio_dr_cache); qr_out: kmem_cache_destroy(lio_qr_cache); -cmd_out: - kmem_cache_destroy(lio_cmd_cache); ts_out2: iscsi_deallocate_thread_sets(); ts_out1: @@ -616,7 +604,6 @@ static void __exit iscsi_target_cleanup_module(void) iscsi_thread_set_free(); iscsit_release_discovery_tpg(); iscsit_unregister_transport(&iscsi_target_transport); - kmem_cache_destroy(lio_cmd_cache); kmem_cache_destroy(lio_qr_cache); kmem_cache_destroy(lio_dr_cache); kmem_cache_destroy(lio_ooo_cache); @@ -3447,12 +3434,10 @@ static int iscsit_build_sendtargets_response(struct iscsi_cmd *cmd) bool inaddr_any = iscsit_check_inaddr_any(np); len = sprintf(buf, "TargetAddress=" - "%s%s%s:%hu,%hu", - (np->np_sockaddr.ss_family == AF_INET6) ? - "[" : "", (inaddr_any == false) ? + "%s:%hu,%hu", + (inaddr_any == false) ? np->np_ip : conn->local_ip, - (np->np_sockaddr.ss_family == AF_INET6) ? - "]" : "", (inaddr_any == false) ? + (inaddr_any == false) ? np->np_port : conn->local_port, tpg->tpgt); len += 1; diff --git a/drivers/target/iscsi/iscsi_target.h b/drivers/target/iscsi/iscsi_target.h index 2c437cb8ca00..e936d56fb523 100644 --- a/drivers/target/iscsi/iscsi_target.h +++ b/drivers/target/iscsi/iscsi_target.h @@ -7,13 +7,15 @@ extern void iscsit_put_tiqn_for_login(struct iscsi_tiqn *); extern struct iscsi_tiqn *iscsit_add_tiqn(unsigned char *); extern void iscsit_del_tiqn(struct iscsi_tiqn *); extern int iscsit_access_np(struct iscsi_np *, struct iscsi_portal_group *); -extern int iscsit_deaccess_np(struct iscsi_np *, struct iscsi_portal_group *); +extern void iscsit_login_kref_put(struct kref *); +extern int iscsit_deaccess_np(struct iscsi_np *, struct iscsi_portal_group *, + struct iscsi_tpg_np *); extern bool iscsit_check_np_match(struct __kernel_sockaddr_storage *, struct iscsi_np *, int); extern struct iscsi_np *iscsit_add_np(struct __kernel_sockaddr_storage *, char *, int); extern int iscsit_reset_np_thread(struct iscsi_np *, struct iscsi_tpg_np *, - struct iscsi_portal_group *); + struct iscsi_portal_group *, bool); extern int iscsit_del_np(struct iscsi_np *); extern int iscsit_reject_cmd(struct iscsi_cmd *cmd, u8, unsigned char *); extern void iscsit_set_unsoliticed_dataout(struct iscsi_cmd *); @@ -37,7 +39,6 @@ extern struct target_fabric_configfs *lio_target_fabric_configfs; extern struct kmem_cache *lio_dr_cache; extern struct kmem_cache *lio_ooo_cache; -extern struct kmem_cache *lio_cmd_cache; extern struct kmem_cache *lio_qr_cache; extern struct kmem_cache *lio_r2t_cache; diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c index cee17543278c..7505fddca15f 100644 --- a/drivers/target/iscsi/iscsi_target_auth.c +++ b/drivers/target/iscsi/iscsi_target_auth.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file houses the main functions for the iSCSI CHAP support * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index bbfd28893164..fd145259361d 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -2,9 +2,7 @@ * This file contains the configfs implementation for iSCSI Target mode * from the LIO-Target Project. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -265,9 +263,9 @@ static struct se_tpg_np *lio_target_call_addnptotpg( *port_str = '\0'; /* Terminate string for IP */ port_str++; /* Skip over ":" */ - ret = strict_strtoul(port_str, 0, &port); + ret = kstrtoul(port_str, 0, &port); if (ret < 0) { - pr_err("strict_strtoul() failed for port_str: %d\n", ret); + pr_err("kstrtoul() failed for port_str: %d\n", ret); return ERR_PTR(ret); } sock_in6 = (struct sockaddr_in6 *)&sockaddr; @@ -290,9 +288,9 @@ static struct se_tpg_np *lio_target_call_addnptotpg( *port_str = '\0'; /* Terminate string for IP */ port_str++; /* Skip over ":" */ - ret = strict_strtoul(port_str, 0, &port); + ret = kstrtoul(port_str, 0, &port); if (ret < 0) { - pr_err("strict_strtoul() failed for port_str: %d\n", ret); + pr_err("kstrtoul() failed for port_str: %d\n", ret); return ERR_PTR(ret); } sock_in = (struct sockaddr_in *)&sockaddr; @@ -1481,7 +1479,7 @@ static ssize_t lio_target_wwn_show_attr_lio_version( struct target_fabric_configfs *tf, char *page) { - return sprintf(page, "RisingTide Systems Linux-iSCSI Target "ISCSIT_VERSION"\n"); + return sprintf(page, "Datera Inc. iSCSI Target "ISCSIT_VERSION"\n"); } TF_WWN_ATTR_RO(lio_target, lio_version); @@ -1925,7 +1923,7 @@ static void lio_release_cmd(struct se_cmd *se_cmd) struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); pr_debug("Entering lio_release_cmd for se_cmd: %p\n", se_cmd); - cmd->release_cmd(cmd); + iscsit_release_cmd(cmd); } /* End functions for target_core_fabric_ops */ diff --git a/drivers/target/iscsi/iscsi_target_core.h b/drivers/target/iscsi/iscsi_target_core.h index 4f77a78edef9..9a5721b8ff96 100644 --- a/drivers/target/iscsi/iscsi_target_core.h +++ b/drivers/target/iscsi/iscsi_target_core.h @@ -9,7 +9,7 @@ #include <scsi/iscsi_proto.h> #include <target/target_core_base.h> -#define ISCSIT_VERSION "v4.1.0-rc2" +#define ISCSIT_VERSION "v4.1.0" #define ISCSI_MAX_DATASN_MISSING_COUNT 16 #define ISCSI_TX_THREAD_TCP_TIMEOUT 2 #define ISCSI_RX_THREAD_TCP_TIMEOUT 2 @@ -17,6 +17,9 @@ #define SECONDS_FOR_ASYNC_TEXT 10 #define SECONDS_FOR_LOGOUT_COMP 15 #define WHITE_SPACE " \t\v\f\n\r" +#define ISCSIT_MIN_TAGS 16 +#define ISCSIT_EXTRA_TAGS 8 +#define ISCSIT_TCP_BACKLOG 256 /* struct iscsi_node_attrib sanity values */ #define NA_DATAOUT_TIMEOUT 3 @@ -47,7 +50,7 @@ #define TA_NETIF_TIMEOUT_MAX 15 #define TA_NETIF_TIMEOUT_MIN 2 #define TA_GENERATE_NODE_ACLS 0 -#define TA_DEFAULT_CMDSN_DEPTH 16 +#define TA_DEFAULT_CMDSN_DEPTH 64 #define TA_DEFAULT_CMDSN_DEPTH_MAX 512 #define TA_DEFAULT_CMDSN_DEPTH_MIN 1 #define TA_CACHE_DYNAMIC_ACLS 0 @@ -489,7 +492,6 @@ struct iscsi_cmd { u32 first_data_sg_off; u32 kmapped_nents; sense_reason_t sense_reason; - void (*release_cmd)(struct iscsi_cmd *); } ____cacheline_aligned; struct iscsi_tmr_req { @@ -554,9 +556,19 @@ struct iscsi_conn { struct completion rx_half_close_comp; /* socket used by this connection */ struct socket *sock; + void (*orig_data_ready)(struct sock *, int); + void (*orig_state_change)(struct sock *); +#define LOGIN_FLAGS_READ_ACTIVE 1 +#define LOGIN_FLAGS_CLOSED 2 +#define LOGIN_FLAGS_READY 4 + unsigned long login_flags; + struct delayed_work login_work; + struct delayed_work login_cleanup_work; + struct iscsi_login *login; struct timer_list nopin_timer; struct timer_list nopin_response_timer; struct timer_list transport_timer; + struct task_struct *login_kworker; /* Spinlock used for add/deleting cmd's from conn_cmd_list */ spinlock_t cmd_lock; spinlock_t conn_usage_lock; @@ -584,6 +596,7 @@ struct iscsi_conn { void *context; struct iscsi_login_thread_s *login_thread; struct iscsi_portal_group *tpg; + struct iscsi_tpg_np *tpg_np; /* Pointer to parent session */ struct iscsi_session *sess; /* Pointer to thread_set in use for this conn's threads */ @@ -682,6 +695,7 @@ struct iscsi_login { u8 version_max; u8 login_complete; u8 login_failed; + bool zero_tsih; char isid[6]; u32 cmd_sn; itt_t init_task_tag; @@ -694,6 +708,7 @@ struct iscsi_login { char *req_buf; char *rsp_buf; struct iscsi_conn *conn; + struct iscsi_np *np; } ____cacheline_aligned; struct iscsi_node_attrib { @@ -773,7 +788,6 @@ struct iscsi_np { struct __kernel_sockaddr_storage np_sockaddr; struct task_struct *np_thread; struct timer_list np_login_timer; - struct iscsi_portal_group *np_login_tpg; void *np_context; struct iscsit_transport *np_transport; struct list_head np_list; @@ -788,6 +802,8 @@ struct iscsi_tpg_np { struct list_head tpg_np_parent_list; struct se_tpg_np se_tpg_np; spinlock_t tpg_np_parent_lock; + struct completion tpg_np_comp; + struct kref tpg_np_kref; }; struct iscsi_portal_group { @@ -809,7 +825,7 @@ struct iscsi_portal_group { spinlock_t tpg_state_lock; struct se_portal_group tpg_se_tpg; struct mutex tpg_access_lock; - struct mutex np_login_lock; + struct semaphore np_login_sem; struct iscsi_tpg_attrib tpg_attrib; struct iscsi_node_auth tpg_demo_auth; /* Pointer to default list of iSCSI parameters for TPG */ diff --git a/drivers/target/iscsi/iscsi_target_datain_values.c b/drivers/target/iscsi/iscsi_target_datain_values.c index 848fee768948..e93d5a7a3f81 100644 --- a/drivers/target/iscsi/iscsi_target_datain_values.c +++ b/drivers/target/iscsi/iscsi_target_datain_values.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains the iSCSI Target DataIN value generation functions. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/iscsi/iscsi_target_device.c b/drivers/target/iscsi/iscsi_target_device.c index 1b74033510a0..6c7a5104a4cd 100644 --- a/drivers/target/iscsi/iscsi_target_device.c +++ b/drivers/target/iscsi/iscsi_target_device.c @@ -2,9 +2,7 @@ * This file contains the iSCSI Virtual Device and Disk Transport * agnostic related functions. * - \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c index 08bd87833321..41052e512d92 100644 --- a/drivers/target/iscsi/iscsi_target_erl0.c +++ b/drivers/target/iscsi/iscsi_target_erl0.c @@ -2,9 +2,7 @@ * This file contains error recovery level zero functions used by * the iSCSI Target driver. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/iscsi/iscsi_target_erl1.c b/drivers/target/iscsi/iscsi_target_erl1.c index 586c268679a4..e048d6439f4a 100644 --- a/drivers/target/iscsi/iscsi_target_erl1.c +++ b/drivers/target/iscsi/iscsi_target_erl1.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains error recovery level one used by the iSCSI Target driver. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/iscsi/iscsi_target_erl2.c b/drivers/target/iscsi/iscsi_target_erl2.c index 45a5afd5ea13..33be1fb1df32 100644 --- a/drivers/target/iscsi/iscsi_target_erl2.c +++ b/drivers/target/iscsi/iscsi_target_erl2.c @@ -2,9 +2,7 @@ * This file contains error recovery level two functions used by * the iSCSI Target driver. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index bc788c52b6cc..1794c753954a 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains the login functions used by the iSCSI Target driver. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -50,6 +48,7 @@ static struct iscsi_login *iscsi_login_init_conn(struct iscsi_conn *conn) pr_err("Unable to allocate memory for struct iscsi_login.\n"); return NULL; } + conn->login = login; login->conn = conn; login->first_request = 1; @@ -428,7 +427,7 @@ static int iscsi_login_zero_tsih_s2( ISCSI_LOGIN_STATUS_NO_RESOURCES); return -1; } - rc = strict_strtoul(param->value, 0, &mrdsl); + rc = kstrtoul(param->value, 0, &mrdsl); if (rc < 0) { iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_NO_RESOURCES); @@ -684,7 +683,7 @@ static void iscsi_post_login_start_timers(struct iscsi_conn *conn) iscsit_start_nopin_timer(conn); } -static int iscsi_post_login_handler( +int iscsi_post_login_handler( struct iscsi_np *np, struct iscsi_conn *conn, u8 zero_tsih) @@ -872,7 +871,7 @@ int iscsit_setup_np( struct __kernel_sockaddr_storage *sockaddr) { struct socket *sock = NULL; - int backlog = 5, ret, opt = 0, len; + int backlog = ISCSIT_TCP_BACKLOG, ret, opt = 0, len; switch (np->np_network_transport) { case ISCSI_TCP: @@ -1007,16 +1006,24 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn) rc = conn->sock->ops->getname(conn->sock, (struct sockaddr *)&sock_in6, &err, 1); if (!rc) { - snprintf(conn->login_ip, sizeof(conn->login_ip), "%pI6c", - &sock_in6.sin6_addr.in6_u); + if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr)) + snprintf(conn->login_ip, sizeof(conn->login_ip), "[%pI6c]", + &sock_in6.sin6_addr.in6_u); + else + snprintf(conn->login_ip, sizeof(conn->login_ip), "%pI4", + &sock_in6.sin6_addr.s6_addr32[3]); conn->login_port = ntohs(sock_in6.sin6_port); } rc = conn->sock->ops->getname(conn->sock, (struct sockaddr *)&sock_in6, &err, 0); if (!rc) { - snprintf(conn->local_ip, sizeof(conn->local_ip), "%pI6c", - &sock_in6.sin6_addr.in6_u); + if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr)) + snprintf(conn->local_ip, sizeof(conn->local_ip), "[%pI6c]", + &sock_in6.sin6_addr.in6_u); + else + snprintf(conn->local_ip, sizeof(conn->local_ip), "%pI4", + &sock_in6.sin6_addr.s6_addr32[3]); conn->local_port = ntohs(sock_in6.sin6_port); } } else { @@ -1116,6 +1123,77 @@ iscsit_conn_set_transport(struct iscsi_conn *conn, struct iscsit_transport *t) return 0; } +void iscsi_target_login_sess_out(struct iscsi_conn *conn, + struct iscsi_np *np, bool zero_tsih, bool new_sess) +{ + if (new_sess == false) + goto old_sess_out; + + pr_err("iSCSI Login negotiation failed.\n"); + iscsit_collect_login_stats(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_INIT_ERR); + if (!zero_tsih || !conn->sess) + goto old_sess_out; + if (conn->sess->se_sess) + transport_free_session(conn->sess->se_sess); + if (conn->sess->session_index != 0) { + spin_lock_bh(&sess_idr_lock); + idr_remove(&sess_idr, conn->sess->session_index); + spin_unlock_bh(&sess_idr_lock); + } + kfree(conn->sess->sess_ops); + kfree(conn->sess); + +old_sess_out: + iscsi_stop_login_thread_timer(np); + /* + * If login negotiation fails check if the Time2Retain timer + * needs to be restarted. + */ + if (!zero_tsih && conn->sess) { + spin_lock_bh(&conn->sess->conn_lock); + if (conn->sess->session_state == TARG_SESS_STATE_FAILED) { + struct se_portal_group *se_tpg = + &ISCSI_TPG_C(conn)->tpg_se_tpg; + + atomic_set(&conn->sess->session_continuation, 0); + spin_unlock_bh(&conn->sess->conn_lock); + spin_lock_bh(&se_tpg->session_lock); + iscsit_start_time2retain_handler(conn->sess); + spin_unlock_bh(&se_tpg->session_lock); + } else + spin_unlock_bh(&conn->sess->conn_lock); + iscsit_dec_session_usage_count(conn->sess); + } + + if (!IS_ERR(conn->conn_rx_hash.tfm)) + crypto_free_hash(conn->conn_rx_hash.tfm); + if (!IS_ERR(conn->conn_tx_hash.tfm)) + crypto_free_hash(conn->conn_tx_hash.tfm); + + if (conn->conn_cpumask) + free_cpumask_var(conn->conn_cpumask); + + kfree(conn->conn_ops); + + if (conn->param_list) { + iscsi_release_param_list(conn->param_list); + conn->param_list = NULL; + } + iscsi_target_nego_release(conn); + + if (conn->sock) { + sock_release(conn->sock); + conn->sock = NULL; + } + + if (conn->conn_transport->iscsit_free_conn) + conn->conn_transport->iscsit_free_conn(conn); + + iscsit_put_transport(conn->conn_transport); + kfree(conn); +} + static int __iscsi_target_login_thread(struct iscsi_np *np) { u8 *buffer, zero_tsih = 0; @@ -1124,6 +1202,8 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) struct iscsi_login *login; struct iscsi_portal_group *tpg = NULL; struct iscsi_login_req *pdu; + struct iscsi_tpg_np *tpg_np; + bool new_sess = false; flush_signals(current); @@ -1264,6 +1344,7 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) tpg = conn->tpg; goto new_sess_out; } + login->zero_tsih = zero_tsih; tpg = conn->tpg; if (!tpg) { @@ -1279,7 +1360,8 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) goto old_sess_out; } - if (iscsi_target_start_negotiation(login, conn) < 0) + ret = iscsi_target_start_negotiation(login, conn); + if (ret < 0) goto new_sess_out; if (!conn->sess) { @@ -1292,84 +1374,32 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) if (signal_pending(current)) goto new_sess_out; - ret = iscsi_post_login_handler(np, conn, zero_tsih); + if (ret == 1) { + tpg_np = conn->tpg_np; - if (ret < 0) - goto new_sess_out; + ret = iscsi_post_login_handler(np, conn, zero_tsih); + if (ret < 0) + goto new_sess_out; + + iscsit_deaccess_np(np, tpg, tpg_np); + } - iscsit_deaccess_np(np, tpg); tpg = NULL; + tpg_np = NULL; /* Get another socket */ return 1; new_sess_out: - pr_err("iSCSI Login negotiation failed.\n"); - iscsit_collect_login_stats(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, - ISCSI_LOGIN_STATUS_INIT_ERR); - if (!zero_tsih || !conn->sess) - goto old_sess_out; - if (conn->sess->se_sess) - transport_free_session(conn->sess->se_sess); - if (conn->sess->session_index != 0) { - spin_lock_bh(&sess_idr_lock); - idr_remove(&sess_idr, conn->sess->session_index); - spin_unlock_bh(&sess_idr_lock); - } - kfree(conn->sess->sess_ops); - kfree(conn->sess); + new_sess = true; old_sess_out: - iscsi_stop_login_thread_timer(np); - /* - * If login negotiation fails check if the Time2Retain timer - * needs to be restarted. - */ - if (!zero_tsih && conn->sess) { - spin_lock_bh(&conn->sess->conn_lock); - if (conn->sess->session_state == TARG_SESS_STATE_FAILED) { - struct se_portal_group *se_tpg = - &ISCSI_TPG_C(conn)->tpg_se_tpg; - - atomic_set(&conn->sess->session_continuation, 0); - spin_unlock_bh(&conn->sess->conn_lock); - spin_lock_bh(&se_tpg->session_lock); - iscsit_start_time2retain_handler(conn->sess); - spin_unlock_bh(&se_tpg->session_lock); - } else - spin_unlock_bh(&conn->sess->conn_lock); - iscsit_dec_session_usage_count(conn->sess); - } - - if (!IS_ERR(conn->conn_rx_hash.tfm)) - crypto_free_hash(conn->conn_rx_hash.tfm); - if (!IS_ERR(conn->conn_tx_hash.tfm)) - crypto_free_hash(conn->conn_tx_hash.tfm); - - if (conn->conn_cpumask) - free_cpumask_var(conn->conn_cpumask); - - kfree(conn->conn_ops); - - if (conn->param_list) { - iscsi_release_param_list(conn->param_list); - conn->param_list = NULL; - } - iscsi_target_nego_release(conn); - - if (conn->sock) { - sock_release(conn->sock); - conn->sock = NULL; - } - - if (conn->conn_transport->iscsit_free_conn) - conn->conn_transport->iscsit_free_conn(conn); - - iscsit_put_transport(conn->conn_transport); - - kfree(conn); + tpg_np = conn->tpg_np; + iscsi_target_login_sess_out(conn, np, zero_tsih, new_sess); + new_sess = false; if (tpg) { - iscsit_deaccess_np(np, tpg); + iscsit_deaccess_np(np, tpg, tpg_np); tpg = NULL; + tpg_np = NULL; } out: diff --git a/drivers/target/iscsi/iscsi_target_login.h b/drivers/target/iscsi/iscsi_target_login.h index 63efd2878451..29d098324b7f 100644 --- a/drivers/target/iscsi/iscsi_target_login.h +++ b/drivers/target/iscsi/iscsi_target_login.h @@ -12,6 +12,9 @@ extern int iscsit_accept_np(struct iscsi_np *, struct iscsi_conn *); extern int iscsit_get_login_rx(struct iscsi_conn *, struct iscsi_login *); extern int iscsit_put_login_tx(struct iscsi_conn *, struct iscsi_login *, u32); extern void iscsit_free_conn(struct iscsi_np *, struct iscsi_conn *); +extern int iscsi_post_login_handler(struct iscsi_np *, struct iscsi_conn *, u8); +extern void iscsi_target_login_sess_out(struct iscsi_conn *, struct iscsi_np *, + bool, bool); extern int iscsi_target_login_thread(void *); extern int iscsi_login_disable_FIM_keys(struct iscsi_param_list *, struct iscsi_conn *); diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index c4675b4ceb49..14d1aed5af1d 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains main functions related to iSCSI Parameter negotiation. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -377,15 +375,284 @@ static int iscsi_target_do_tx_login_io(struct iscsi_conn *conn, struct iscsi_log return 0; } -static int iscsi_target_do_login_io(struct iscsi_conn *conn, struct iscsi_login *login) +static void iscsi_target_sk_data_ready(struct sock *sk, int count) { - if (iscsi_target_do_tx_login_io(conn, login) < 0) - return -1; + struct iscsi_conn *conn = sk->sk_user_data; + bool rc; - if (conn->conn_transport->iscsit_get_login_rx(conn, login) < 0) - return -1; + pr_debug("Entering iscsi_target_sk_data_ready: conn: %p\n", conn); - return 0; + write_lock_bh(&sk->sk_callback_lock); + if (!sk->sk_user_data) { + write_unlock_bh(&sk->sk_callback_lock); + return; + } + if (!test_bit(LOGIN_FLAGS_READY, &conn->login_flags)) { + write_unlock_bh(&sk->sk_callback_lock); + pr_debug("Got LOGIN_FLAGS_READY=0, conn: %p >>>>\n", conn); + return; + } + if (test_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)) { + write_unlock_bh(&sk->sk_callback_lock); + pr_debug("Got LOGIN_FLAGS_CLOSED=1, conn: %p >>>>\n", conn); + return; + } + if (test_and_set_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags)) { + write_unlock_bh(&sk->sk_callback_lock); + pr_debug("Got LOGIN_FLAGS_READ_ACTIVE=1, conn: %p >>>>\n", conn); + return; + } + + rc = schedule_delayed_work(&conn->login_work, 0); + if (rc == false) { + pr_debug("iscsi_target_sk_data_ready, schedule_delayed_work" + " got false\n"); + } + write_unlock_bh(&sk->sk_callback_lock); +} + +static void iscsi_target_sk_state_change(struct sock *); + +static void iscsi_target_set_sock_callbacks(struct iscsi_conn *conn) +{ + struct sock *sk; + + if (!conn->sock) + return; + + sk = conn->sock->sk; + pr_debug("Entering iscsi_target_set_sock_callbacks: conn: %p\n", conn); + + write_lock_bh(&sk->sk_callback_lock); + sk->sk_user_data = conn; + conn->orig_data_ready = sk->sk_data_ready; + conn->orig_state_change = sk->sk_state_change; + sk->sk_data_ready = iscsi_target_sk_data_ready; + sk->sk_state_change = iscsi_target_sk_state_change; + write_unlock_bh(&sk->sk_callback_lock); + + sk->sk_sndtimeo = TA_LOGIN_TIMEOUT * HZ; + sk->sk_rcvtimeo = TA_LOGIN_TIMEOUT * HZ; +} + +static void iscsi_target_restore_sock_callbacks(struct iscsi_conn *conn) +{ + struct sock *sk; + + if (!conn->sock) + return; + + sk = conn->sock->sk; + pr_debug("Entering iscsi_target_restore_sock_callbacks: conn: %p\n", conn); + + write_lock_bh(&sk->sk_callback_lock); + if (!sk->sk_user_data) { + write_unlock_bh(&sk->sk_callback_lock); + return; + } + sk->sk_user_data = NULL; + sk->sk_data_ready = conn->orig_data_ready; + sk->sk_state_change = conn->orig_state_change; + write_unlock_bh(&sk->sk_callback_lock); + + sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; + sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; +} + +static int iscsi_target_do_login(struct iscsi_conn *, struct iscsi_login *); + +static bool iscsi_target_sk_state_check(struct sock *sk) +{ + if (sk->sk_state == TCP_CLOSE_WAIT || sk->sk_state == TCP_CLOSE) { + pr_debug("iscsi_target_sk_state_check: TCP_CLOSE_WAIT|TCP_CLOSE," + "returning FALSE\n"); + return false; + } + return true; +} + +static void iscsi_target_login_drop(struct iscsi_conn *conn, struct iscsi_login *login) +{ + struct iscsi_np *np = login->np; + bool zero_tsih = login->zero_tsih; + + iscsi_remove_failed_auth_entry(conn); + iscsi_target_nego_release(conn); + iscsi_target_login_sess_out(conn, np, zero_tsih, true); +} + +static void iscsi_target_login_timeout(unsigned long data) +{ + struct iscsi_conn *conn = (struct iscsi_conn *)data; + + pr_debug("Entering iscsi_target_login_timeout >>>>>>>>>>>>>>>>>>>\n"); + + if (conn->login_kworker) { + pr_debug("Sending SIGINT to conn->login_kworker %s/%d\n", + conn->login_kworker->comm, conn->login_kworker->pid); + send_sig(SIGINT, conn->login_kworker, 1); + } +} + +static void iscsi_target_do_login_rx(struct work_struct *work) +{ + struct iscsi_conn *conn = container_of(work, + struct iscsi_conn, login_work.work); + struct iscsi_login *login = conn->login; + struct iscsi_np *np = login->np; + struct iscsi_portal_group *tpg = conn->tpg; + struct iscsi_tpg_np *tpg_np = conn->tpg_np; + struct timer_list login_timer; + int rc, zero_tsih = login->zero_tsih; + bool state; + + pr_debug("entering iscsi_target_do_login_rx, conn: %p, %s:%d\n", + conn, current->comm, current->pid); + + spin_lock(&tpg->tpg_state_lock); + state = (tpg->tpg_state == TPG_STATE_ACTIVE); + spin_unlock(&tpg->tpg_state_lock); + + if (state == false) { + pr_debug("iscsi_target_do_login_rx: tpg_state != TPG_STATE_ACTIVE\n"); + iscsi_target_restore_sock_callbacks(conn); + iscsi_target_login_drop(conn, login); + iscsit_deaccess_np(np, tpg, tpg_np); + return; + } + + if (conn->sock) { + struct sock *sk = conn->sock->sk; + + read_lock_bh(&sk->sk_callback_lock); + state = iscsi_target_sk_state_check(sk); + read_unlock_bh(&sk->sk_callback_lock); + + if (state == false) { + pr_debug("iscsi_target_do_login_rx, TCP state CLOSE\n"); + iscsi_target_restore_sock_callbacks(conn); + iscsi_target_login_drop(conn, login); + iscsit_deaccess_np(np, tpg, tpg_np); + return; + } + } + + conn->login_kworker = current; + allow_signal(SIGINT); + + init_timer(&login_timer); + login_timer.expires = (get_jiffies_64() + TA_LOGIN_TIMEOUT * HZ); + login_timer.data = (unsigned long)conn; + login_timer.function = iscsi_target_login_timeout; + add_timer(&login_timer); + pr_debug("Starting login_timer for %s/%d\n", current->comm, current->pid); + + rc = conn->conn_transport->iscsit_get_login_rx(conn, login); + del_timer_sync(&login_timer); + flush_signals(current); + conn->login_kworker = NULL; + + if (rc < 0) { + iscsi_target_restore_sock_callbacks(conn); + iscsi_target_login_drop(conn, login); + iscsit_deaccess_np(np, tpg, tpg_np); + return; + } + + pr_debug("iscsi_target_do_login_rx after rx_login_io, %p, %s:%d\n", + conn, current->comm, current->pid); + + rc = iscsi_target_do_login(conn, login); + if (rc < 0) { + iscsi_target_restore_sock_callbacks(conn); + iscsi_target_login_drop(conn, login); + iscsit_deaccess_np(np, tpg, tpg_np); + } else if (!rc) { + if (conn->sock) { + struct sock *sk = conn->sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + clear_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags); + write_unlock_bh(&sk->sk_callback_lock); + } + } else if (rc == 1) { + iscsi_target_nego_release(conn); + iscsi_post_login_handler(np, conn, zero_tsih); + iscsit_deaccess_np(np, tpg, tpg_np); + } +} + +static void iscsi_target_do_cleanup(struct work_struct *work) +{ + struct iscsi_conn *conn = container_of(work, + struct iscsi_conn, login_cleanup_work.work); + struct sock *sk = conn->sock->sk; + struct iscsi_login *login = conn->login; + struct iscsi_np *np = login->np; + struct iscsi_portal_group *tpg = conn->tpg; + struct iscsi_tpg_np *tpg_np = conn->tpg_np; + + pr_debug("Entering iscsi_target_do_cleanup\n"); + + cancel_delayed_work_sync(&conn->login_work); + conn->orig_state_change(sk); + + iscsi_target_restore_sock_callbacks(conn); + iscsi_target_login_drop(conn, login); + iscsit_deaccess_np(np, tpg, tpg_np); + + pr_debug("iscsi_target_do_cleanup done()\n"); +} + +static void iscsi_target_sk_state_change(struct sock *sk) +{ + struct iscsi_conn *conn; + void (*orig_state_change)(struct sock *); + bool state; + + pr_debug("Entering iscsi_target_sk_state_change\n"); + + write_lock_bh(&sk->sk_callback_lock); + conn = sk->sk_user_data; + if (!conn) { + write_unlock_bh(&sk->sk_callback_lock); + return; + } + orig_state_change = conn->orig_state_change; + + if (!test_bit(LOGIN_FLAGS_READY, &conn->login_flags)) { + pr_debug("Got LOGIN_FLAGS_READY=0 sk_state_change conn: %p\n", + conn); + write_unlock_bh(&sk->sk_callback_lock); + orig_state_change(sk); + return; + } + if (test_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags)) { + pr_debug("Got LOGIN_FLAGS_READ_ACTIVE=1 sk_state_change" + " conn: %p\n", conn); + write_unlock_bh(&sk->sk_callback_lock); + orig_state_change(sk); + return; + } + if (test_and_set_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)) { + pr_debug("Got LOGIN_FLAGS_CLOSED=1 sk_state_change conn: %p\n", + conn); + write_unlock_bh(&sk->sk_callback_lock); + orig_state_change(sk); + return; + } + + state = iscsi_target_sk_state_check(sk); + write_unlock_bh(&sk->sk_callback_lock); + + pr_debug("iscsi_target_sk_state_change: state: %d\n", state); + + if (!state) { + pr_debug("iscsi_target_sk_state_change got failed state\n"); + schedule_delayed_work(&conn->login_cleanup_work, 0); + return; + } + orig_state_change(sk); } /* @@ -643,10 +910,11 @@ static int iscsi_target_do_login(struct iscsi_conn *conn, struct iscsi_login *lo if (login_rsp->flags & ISCSI_FLAG_LOGIN_TRANSIT) { login->tsih = conn->sess->tsih; login->login_complete = 1; + iscsi_target_restore_sock_callbacks(conn); if (iscsi_target_do_tx_login_io(conn, login) < 0) return -1; - return 0; + return 1; } break; default: @@ -656,13 +924,29 @@ static int iscsi_target_do_login(struct iscsi_conn *conn, struct iscsi_login *lo break; } - if (iscsi_target_do_login_io(conn, login) < 0) + if (iscsi_target_do_tx_login_io(conn, login) < 0) return -1; if (login_rsp->flags & ISCSI_FLAG_LOGIN_TRANSIT) { login_rsp->flags &= ~ISCSI_FLAG_LOGIN_TRANSIT; login_rsp->flags &= ~ISCSI_FLAG_LOGIN_NEXT_STAGE_MASK; } + break; + } + + if (conn->sock) { + struct sock *sk = conn->sock->sk; + bool state; + + read_lock_bh(&sk->sk_callback_lock); + state = iscsi_target_sk_state_check(sk); + read_unlock_bh(&sk->sk_callback_lock); + + if (!state) { + pr_debug("iscsi_target_do_login() failed state for" + " conn: %p\n", conn); + return -1; + } } return 0; @@ -695,9 +979,17 @@ int iscsi_target_locate_portal( char *tmpbuf, *start = NULL, *end = NULL, *key, *value; struct iscsi_session *sess = conn->sess; struct iscsi_tiqn *tiqn; + struct iscsi_tpg_np *tpg_np = NULL; struct iscsi_login_req *login_req; - u32 payload_length; - int sessiontype = 0, ret = 0; + struct se_node_acl *se_nacl; + u32 payload_length, queue_depth = 0; + int sessiontype = 0, ret = 0, tag_num, tag_size; + + INIT_DELAYED_WORK(&conn->login_work, iscsi_target_do_login_rx); + INIT_DELAYED_WORK(&conn->login_cleanup_work, iscsi_target_do_cleanup); + iscsi_target_set_sock_callbacks(conn); + + login->np = np; login_req = (struct iscsi_login_req *) login->req; payload_length = ntoh24(login_req->dlength); @@ -791,7 +1083,7 @@ int iscsi_target_locate_portal( goto out; } ret = 0; - goto out; + goto alloc_tags; } get_target: @@ -822,7 +1114,7 @@ get_target: /* * Locate Target Portal Group from Storage Node. */ - conn->tpg = iscsit_get_tpg_from_np(tiqn, np); + conn->tpg = iscsit_get_tpg_from_np(tiqn, np, &tpg_np); if (!conn->tpg) { pr_err("Unable to locate Target Portal Group" " on %s\n", tiqn->tiqn); @@ -832,12 +1124,16 @@ get_target: ret = -1; goto out; } + conn->tpg_np = tpg_np; pr_debug("Located Portal Group Object: %hu\n", conn->tpg->tpgt); /* * Setup crc32c modules from libcrypto */ if (iscsi_login_setup_crypto(conn) < 0) { pr_err("iscsi_login_setup_crypto() failed\n"); + kref_put(&tpg_np->tpg_np_kref, iscsit_login_kref_put); + iscsit_put_tiqn_for_login(tiqn); + conn->tpg = NULL; ret = -1; goto out; } @@ -846,11 +1142,12 @@ get_target: * process login attempt. */ if (iscsit_access_np(np, conn->tpg) < 0) { + kref_put(&tpg_np->tpg_np_kref, iscsit_login_kref_put); iscsit_put_tiqn_for_login(tiqn); iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE); - ret = -1; conn->tpg = NULL; + ret = -1; goto out; } @@ -883,8 +1180,27 @@ get_target: ret = -1; goto out; } + se_nacl = sess->se_sess->se_node_acl; + queue_depth = se_nacl->queue_depth; + /* + * Setup pre-allocated tags based upon allowed per NodeACL CmdSN + * depth for non immediate commands, plus extra tags for immediate + * commands. + * + * Also enforce a ISCSIT_MIN_TAGS to prevent unnecessary contention + * in per-cpu-ida tag allocation logic + small queue_depth. + */ +alloc_tags: + tag_num = max_t(u32, ISCSIT_MIN_TAGS, queue_depth); + tag_num += ISCSIT_EXTRA_TAGS; + tag_size = sizeof(struct iscsi_cmd) + conn->conn_transport->priv_size; - ret = 0; + ret = transport_alloc_session_tags(sess->se_sess, tag_num, tag_size); + if (ret < 0) { + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + ret = -1; + } out: kfree(tmpbuf); return ret; @@ -897,10 +1213,23 @@ int iscsi_target_start_negotiation( int ret; ret = iscsi_target_do_login(conn, login); - if (ret != 0) + if (!ret) { + if (conn->sock) { + struct sock *sk = conn->sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + set_bit(LOGIN_FLAGS_READY, &conn->login_flags); + write_unlock_bh(&sk->sk_callback_lock); + } + } else if (ret < 0) { + cancel_delayed_work_sync(&conn->login_work); + cancel_delayed_work_sync(&conn->login_cleanup_work); + iscsi_target_restore_sock_callbacks(conn); iscsi_remove_failed_auth_entry(conn); + } + if (ret != 0) + iscsi_target_nego_release(conn); - iscsi_target_nego_release(conn); return ret; } diff --git a/drivers/target/iscsi/iscsi_target_nodeattrib.c b/drivers/target/iscsi/iscsi_target_nodeattrib.c index 11dc2936af76..93bdc475eb00 100644 --- a/drivers/target/iscsi/iscsi_target_nodeattrib.c +++ b/drivers/target/iscsi/iscsi_target_nodeattrib.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains the main functions related to Initiator Node Attributes. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c index 35fd6439eb01..4d2e23fc76fd 100644 --- a/drivers/target/iscsi/iscsi_target_parameters.c +++ b/drivers/target/iscsi/iscsi_target_parameters.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains main functions related to iSCSI Parameter negotiation. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -1182,7 +1180,7 @@ static int iscsi_check_acceptor_state(struct iscsi_param *param, char *value, unsigned long long tmp; int rc; - rc = strict_strtoull(param->value, 0, &tmp); + rc = kstrtoull(param->value, 0, &tmp); if (rc < 0) return -1; diff --git a/drivers/target/iscsi/iscsi_target_seq_pdu_list.c b/drivers/target/iscsi/iscsi_target_seq_pdu_list.c index edb592a368ef..ca41b583f2f6 100644 --- a/drivers/target/iscsi/iscsi_target_seq_pdu_list.c +++ b/drivers/target/iscsi/iscsi_target_seq_pdu_list.c @@ -2,9 +2,7 @@ * This file contains main functions related to iSCSI DataSequenceInOrder=No * and DataPDUInOrder=No. * - \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/iscsi/iscsi_target_stat.c b/drivers/target/iscsi/iscsi_target_stat.c index 464b4206a51e..f788e8b5e855 100644 --- a/drivers/target/iscsi/iscsi_target_stat.c +++ b/drivers/target/iscsi/iscsi_target_stat.c @@ -2,9 +2,7 @@ * Modern ConfigFS group context specific iSCSI statistics based on original * iscsi_target_mib.c code * - * Copyright (c) 2011 Rising Tide Systems - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * Copyright (c) 2011-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -177,7 +175,7 @@ ISCSI_STAT_INSTANCE_ATTR_RO(description); static ssize_t iscsi_stat_instance_show_attr_vendor( struct iscsi_wwn_stat_grps *igrps, char *page) { - return snprintf(page, PAGE_SIZE, "RisingTide Systems iSCSI-Target\n"); + return snprintf(page, PAGE_SIZE, "Datera, Inc. iSCSI-Target\n"); } ISCSI_STAT_INSTANCE_ATTR_RO(vendor); @@ -432,13 +430,7 @@ static ssize_t iscsi_stat_tgt_attr_show_attr_fail_intr_addr( int ret; spin_lock(&lstat->lock); - if (lstat->last_intr_fail_ip_family == AF_INET6) { - ret = snprintf(page, PAGE_SIZE, "[%s]\n", - lstat->last_intr_fail_ip_addr); - } else { - ret = snprintf(page, PAGE_SIZE, "%s\n", - lstat->last_intr_fail_ip_addr); - } + ret = snprintf(page, PAGE_SIZE, "%s\n", lstat->last_intr_fail_ip_addr); spin_unlock(&lstat->lock); return ret; diff --git a/drivers/target/iscsi/iscsi_target_tmr.c b/drivers/target/iscsi/iscsi_target_tmr.c index b997e5da47d3..78404b1cc0bf 100644 --- a/drivers/target/iscsi/iscsi_target_tmr.c +++ b/drivers/target/iscsi/iscsi_target_tmr.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains the iSCSI Target specific Task Management functions. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 439260b7d87f..4faeb47fa5e1 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains iSCSI Target Portal Group related functions. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -49,7 +47,7 @@ struct iscsi_portal_group *iscsit_alloc_portal_group(struct iscsi_tiqn *tiqn, u1 INIT_LIST_HEAD(&tpg->tpg_gnp_list); INIT_LIST_HEAD(&tpg->tpg_list); mutex_init(&tpg->tpg_access_lock); - mutex_init(&tpg->np_login_lock); + sema_init(&tpg->np_login_sem, 1); spin_lock_init(&tpg->tpg_state_lock); spin_lock_init(&tpg->tpg_np_lock); @@ -129,7 +127,8 @@ void iscsit_release_discovery_tpg(void) struct iscsi_portal_group *iscsit_get_tpg_from_np( struct iscsi_tiqn *tiqn, - struct iscsi_np *np) + struct iscsi_np *np, + struct iscsi_tpg_np **tpg_np_out) { struct iscsi_portal_group *tpg = NULL; struct iscsi_tpg_np *tpg_np; @@ -147,6 +146,8 @@ struct iscsi_portal_group *iscsit_get_tpg_from_np( spin_lock(&tpg->tpg_np_lock); list_for_each_entry(tpg_np, &tpg->tpg_gnp_list, tpg_np_list) { if (tpg_np->tpg_np == np) { + *tpg_np_out = tpg_np; + kref_get(&tpg_np->tpg_np_kref); spin_unlock(&tpg->tpg_np_lock); spin_unlock(&tiqn->tiqn_tpg_lock); return tpg; @@ -175,18 +176,20 @@ void iscsit_put_tpg(struct iscsi_portal_group *tpg) static void iscsit_clear_tpg_np_login_thread( struct iscsi_tpg_np *tpg_np, - struct iscsi_portal_group *tpg) + struct iscsi_portal_group *tpg, + bool shutdown) { if (!tpg_np->tpg_np) { pr_err("struct iscsi_tpg_np->tpg_np is NULL!\n"); return; } - iscsit_reset_np_thread(tpg_np->tpg_np, tpg_np, tpg); + iscsit_reset_np_thread(tpg_np->tpg_np, tpg_np, tpg, shutdown); } void iscsit_clear_tpg_np_login_threads( - struct iscsi_portal_group *tpg) + struct iscsi_portal_group *tpg, + bool shutdown) { struct iscsi_tpg_np *tpg_np; @@ -197,7 +200,7 @@ void iscsit_clear_tpg_np_login_threads( continue; } spin_unlock(&tpg->tpg_np_lock); - iscsit_clear_tpg_np_login_thread(tpg_np, tpg); + iscsit_clear_tpg_np_login_thread(tpg_np, tpg, shutdown); spin_lock(&tpg->tpg_np_lock); } spin_unlock(&tpg->tpg_np_lock); @@ -268,6 +271,8 @@ int iscsit_tpg_del_portal_group( tpg->tpg_state = TPG_STATE_INACTIVE; spin_unlock(&tpg->tpg_state_lock); + iscsit_clear_tpg_np_login_threads(tpg, true); + if (iscsit_release_sessions_for_tpg(tpg, force) < 0) { pr_err("Unable to delete iSCSI Target Portal Group:" " %hu while active sessions exist, and force=0\n", @@ -368,7 +373,7 @@ int iscsit_tpg_disable_portal_group(struct iscsi_portal_group *tpg, int force) tpg->tpg_state = TPG_STATE_INACTIVE; spin_unlock(&tpg->tpg_state_lock); - iscsit_clear_tpg_np_login_threads(tpg); + iscsit_clear_tpg_np_login_threads(tpg, false); if (iscsit_release_sessions_for_tpg(tpg, force) < 0) { spin_lock(&tpg->tpg_state_lock); @@ -490,6 +495,8 @@ struct iscsi_tpg_np *iscsit_tpg_add_network_portal( INIT_LIST_HEAD(&tpg_np->tpg_np_child_list); INIT_LIST_HEAD(&tpg_np->tpg_np_parent_list); spin_lock_init(&tpg_np->tpg_np_parent_lock); + init_completion(&tpg_np->tpg_np_comp); + kref_init(&tpg_np->tpg_np_kref); tpg_np->tpg_np = np; tpg_np->tpg = tpg; @@ -520,7 +527,7 @@ static int iscsit_tpg_release_np( struct iscsi_portal_group *tpg, struct iscsi_np *np) { - iscsit_clear_tpg_np_login_thread(tpg_np, tpg); + iscsit_clear_tpg_np_login_thread(tpg_np, tpg, true); pr_debug("CORE[%s] - Removed Network Portal: %s:%hu,%hu on %s\n", tpg->tpg_tiqn->tiqn, np->np_ip, np->np_port, tpg->tpgt, diff --git a/drivers/target/iscsi/iscsi_target_tpg.h b/drivers/target/iscsi/iscsi_target_tpg.h index dda48c141a8c..b77693e2c209 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.h +++ b/drivers/target/iscsi/iscsi_target_tpg.h @@ -5,10 +5,10 @@ extern struct iscsi_portal_group *iscsit_alloc_portal_group(struct iscsi_tiqn *, extern int iscsit_load_discovery_tpg(void); extern void iscsit_release_discovery_tpg(void); extern struct iscsi_portal_group *iscsit_get_tpg_from_np(struct iscsi_tiqn *, - struct iscsi_np *); + struct iscsi_np *, struct iscsi_tpg_np **); extern int iscsit_get_tpg(struct iscsi_portal_group *); extern void iscsit_put_tpg(struct iscsi_portal_group *); -extern void iscsit_clear_tpg_np_login_threads(struct iscsi_portal_group *); +extern void iscsit_clear_tpg_np_login_threads(struct iscsi_portal_group *, bool); extern void iscsit_tpg_dump_params(struct iscsi_portal_group *); extern int iscsit_tpg_add_portal_group(struct iscsi_tiqn *, struct iscsi_portal_group *); extern int iscsit_tpg_del_portal_group(struct iscsi_tiqn *, struct iscsi_portal_group *, diff --git a/drivers/target/iscsi/iscsi_target_tq.c b/drivers/target/iscsi/iscsi_target_tq.c index 81289520f96b..601e9cc61e98 100644 --- a/drivers/target/iscsi/iscsi_target_tq.c +++ b/drivers/target/iscsi/iscsi_target_tq.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains the iSCSI Login Thread and Thread Queue functions. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -105,12 +103,11 @@ int iscsi_allocate_thread_sets(u32 thread_pair_count) ts->status = ISCSI_THREAD_SET_FREE; INIT_LIST_HEAD(&ts->ts_list); spin_lock_init(&ts->ts_state_lock); - init_completion(&ts->rx_post_start_comp); - init_completion(&ts->tx_post_start_comp); init_completion(&ts->rx_restart_comp); init_completion(&ts->tx_restart_comp); init_completion(&ts->rx_start_comp); init_completion(&ts->tx_start_comp); + sema_init(&ts->ts_activate_sem, 0); ts->create_threads = 1; ts->tx_thread = kthread_run(iscsi_target_tx_thread, ts, "%s", @@ -139,35 +136,44 @@ int iscsi_allocate_thread_sets(u32 thread_pair_count) return allocated_thread_pair_count; } -void iscsi_deallocate_thread_sets(void) +static void iscsi_deallocate_thread_one(struct iscsi_thread_set *ts) { - u32 released_count = 0; - struct iscsi_thread_set *ts = NULL; - - while ((ts = iscsi_get_ts_from_inactive_list())) { + spin_lock_bh(&ts->ts_state_lock); + ts->status = ISCSI_THREAD_SET_DIE; + if (ts->rx_thread) { + complete(&ts->rx_start_comp); + spin_unlock_bh(&ts->ts_state_lock); + kthread_stop(ts->rx_thread); spin_lock_bh(&ts->ts_state_lock); - ts->status = ISCSI_THREAD_SET_DIE; + } + if (ts->tx_thread) { + complete(&ts->tx_start_comp); spin_unlock_bh(&ts->ts_state_lock); + kthread_stop(ts->tx_thread); + spin_lock_bh(&ts->ts_state_lock); + } + spin_unlock_bh(&ts->ts_state_lock); + /* + * Release this thread_id in the thread_set_bitmap + */ + spin_lock(&ts_bitmap_lock); + bitmap_release_region(iscsit_global->ts_bitmap, + ts->thread_id, get_order(1)); + spin_unlock(&ts_bitmap_lock); - if (ts->rx_thread) { - send_sig(SIGINT, ts->rx_thread, 1); - kthread_stop(ts->rx_thread); - } - if (ts->tx_thread) { - send_sig(SIGINT, ts->tx_thread, 1); - kthread_stop(ts->tx_thread); - } - /* - * Release this thread_id in the thread_set_bitmap - */ - spin_lock(&ts_bitmap_lock); - bitmap_release_region(iscsit_global->ts_bitmap, - ts->thread_id, get_order(1)); - spin_unlock(&ts_bitmap_lock); + kfree(ts); +} +void iscsi_deallocate_thread_sets(void) +{ + struct iscsi_thread_set *ts = NULL; + u32 released_count = 0; + + while ((ts = iscsi_get_ts_from_inactive_list())) { + + iscsi_deallocate_thread_one(ts); released_count++; - kfree(ts); } if (released_count) @@ -187,34 +193,13 @@ static void iscsi_deallocate_extra_thread_sets(void) if (!ts) break; - spin_lock_bh(&ts->ts_state_lock); - ts->status = ISCSI_THREAD_SET_DIE; - spin_unlock_bh(&ts->ts_state_lock); - - if (ts->rx_thread) { - send_sig(SIGINT, ts->rx_thread, 1); - kthread_stop(ts->rx_thread); - } - if (ts->tx_thread) { - send_sig(SIGINT, ts->tx_thread, 1); - kthread_stop(ts->tx_thread); - } - /* - * Release this thread_id in the thread_set_bitmap - */ - spin_lock(&ts_bitmap_lock); - bitmap_release_region(iscsit_global->ts_bitmap, - ts->thread_id, get_order(1)); - spin_unlock(&ts_bitmap_lock); - + iscsi_deallocate_thread_one(ts); released_count++; - kfree(ts); } - if (released_count) { + if (released_count) pr_debug("Stopped %d thread set(s) (%d total threads)." "\n", released_count, released_count * 2); - } } void iscsi_activate_thread_set(struct iscsi_conn *conn, struct iscsi_thread_set *ts) @@ -224,37 +209,23 @@ void iscsi_activate_thread_set(struct iscsi_conn *conn, struct iscsi_thread_set spin_lock_bh(&ts->ts_state_lock); conn->thread_set = ts; ts->conn = conn; + ts->status = ISCSI_THREAD_SET_ACTIVE; spin_unlock_bh(&ts->ts_state_lock); - /* - * Start up the RX thread and wait on rx_post_start_comp. The RX - * Thread will then do the same for the TX Thread in - * iscsi_rx_thread_pre_handler(). - */ + complete(&ts->rx_start_comp); - wait_for_completion(&ts->rx_post_start_comp); + complete(&ts->tx_start_comp); + + down(&ts->ts_activate_sem); } struct iscsi_thread_set *iscsi_get_thread_set(void) { - int allocate_ts = 0; - struct completion comp; - struct iscsi_thread_set *ts = NULL; - /* - * If no inactive thread set is available on the first call to - * iscsi_get_ts_from_inactive_list(), sleep for a second and - * try again. If still none are available after two attempts, - * allocate a set ourselves. - */ + struct iscsi_thread_set *ts; + get_set: ts = iscsi_get_ts_from_inactive_list(); if (!ts) { - if (allocate_ts == 2) - iscsi_allocate_thread_sets(1); - - init_completion(&comp); - wait_for_completion_timeout(&comp, 1 * HZ); - - allocate_ts++; + iscsi_allocate_thread_sets(1); goto get_set; } @@ -263,6 +234,7 @@ get_set: ts->thread_count = 2; init_completion(&ts->rx_restart_comp); init_completion(&ts->tx_restart_comp); + sema_init(&ts->ts_activate_sem, 0); return ts; } @@ -400,7 +372,8 @@ static void iscsi_check_to_add_additional_sets(void) static int iscsi_signal_thread_pre_handler(struct iscsi_thread_set *ts) { spin_lock_bh(&ts->ts_state_lock); - if ((ts->status == ISCSI_THREAD_SET_DIE) || signal_pending(current)) { + if (ts->status == ISCSI_THREAD_SET_DIE || kthread_should_stop() || + signal_pending(current)) { spin_unlock_bh(&ts->ts_state_lock); return -1; } @@ -419,7 +392,8 @@ struct iscsi_conn *iscsi_rx_thread_pre_handler(struct iscsi_thread_set *ts) goto sleep; } - flush_signals(current); + if (ts->status != ISCSI_THREAD_SET_DIE) + flush_signals(current); if (ts->delay_inactive && (--ts->thread_count == 0)) { spin_unlock_bh(&ts->ts_state_lock); @@ -446,18 +420,19 @@ sleep: if (iscsi_signal_thread_pre_handler(ts) < 0) return NULL; + iscsi_check_to_add_additional_sets(); + + spin_lock_bh(&ts->ts_state_lock); if (!ts->conn) { pr_err("struct iscsi_thread_set->conn is NULL for" - " thread_id: %d, going back to sleep\n", ts->thread_id); - goto sleep; + " RX thread_id: %s/%d\n", current->comm, current->pid); + spin_unlock_bh(&ts->ts_state_lock); + return NULL; } - iscsi_check_to_add_additional_sets(); - /* - * The RX Thread starts up the TX Thread and sleeps. - */ ts->thread_clear |= ISCSI_CLEAR_RX_THREAD; - complete(&ts->tx_start_comp); - wait_for_completion(&ts->tx_post_start_comp); + spin_unlock_bh(&ts->ts_state_lock); + + up(&ts->ts_activate_sem); return ts->conn; } @@ -472,7 +447,8 @@ struct iscsi_conn *iscsi_tx_thread_pre_handler(struct iscsi_thread_set *ts) goto sleep; } - flush_signals(current); + if (ts->status != ISCSI_THREAD_SET_DIE) + flush_signals(current); if (ts->delay_inactive && (--ts->thread_count == 0)) { spin_unlock_bh(&ts->ts_state_lock); @@ -498,27 +474,20 @@ sleep: if (iscsi_signal_thread_pre_handler(ts) < 0) return NULL; - if (!ts->conn) { - pr_err("struct iscsi_thread_set->conn is NULL for " - " thread_id: %d, going back to sleep\n", - ts->thread_id); - goto sleep; - } - iscsi_check_to_add_additional_sets(); - /* - * From the TX thread, up the tx_post_start_comp that the RX Thread is - * sleeping on in iscsi_rx_thread_pre_handler(), then up the - * rx_post_start_comp that iscsi_activate_thread_set() is sleeping on. - */ - ts->thread_clear |= ISCSI_CLEAR_TX_THREAD; - complete(&ts->tx_post_start_comp); - complete(&ts->rx_post_start_comp); spin_lock_bh(&ts->ts_state_lock); - ts->status = ISCSI_THREAD_SET_ACTIVE; + if (!ts->conn) { + pr_err("struct iscsi_thread_set->conn is NULL for" + " TX thread_id: %s/%d\n", current->comm, current->pid); + spin_unlock_bh(&ts->ts_state_lock); + return NULL; + } + ts->thread_clear |= ISCSI_CLEAR_TX_THREAD; spin_unlock_bh(&ts->ts_state_lock); + up(&ts->ts_activate_sem); + return ts->conn; } diff --git a/drivers/target/iscsi/iscsi_target_tq.h b/drivers/target/iscsi/iscsi_target_tq.h index 547d11831282..cc1eede5ab3a 100644 --- a/drivers/target/iscsi/iscsi_target_tq.h +++ b/drivers/target/iscsi/iscsi_target_tq.h @@ -64,10 +64,6 @@ struct iscsi_thread_set { struct iscsi_conn *conn; /* used for controlling ts state accesses */ spinlock_t ts_state_lock; - /* Used for rx side post startup */ - struct completion rx_post_start_comp; - /* Used for tx side post startup */ - struct completion tx_post_start_comp; /* used for restarting thread queue */ struct completion rx_restart_comp; /* used for restarting thread queue */ @@ -82,6 +78,7 @@ struct iscsi_thread_set { struct task_struct *tx_thread; /* struct iscsi_thread_set in list list head*/ struct list_head ts_list; + struct semaphore ts_activate_sem; }; #endif /*** ISCSI_THREAD_QUEUE_H ***/ diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index 1df06d5e4e01..f2de28e178fd 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains the iSCSI Target specific utility functions. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -19,6 +17,7 @@ ******************************************************************************/ #include <linux/list.h> +#include <linux/percpu_ida.h> #include <scsi/scsi_tcq.h> #include <scsi/iscsi_proto.h> #include <target/target_core_base.h> @@ -149,18 +148,6 @@ void iscsit_free_r2ts_from_list(struct iscsi_cmd *cmd) spin_unlock_bh(&cmd->r2t_lock); } -struct iscsi_cmd *iscsit_alloc_cmd(struct iscsi_conn *conn, gfp_t gfp_mask) -{ - struct iscsi_cmd *cmd; - - cmd = kmem_cache_zalloc(lio_cmd_cache, gfp_mask); - if (!cmd) - return NULL; - - cmd->release_cmd = &iscsit_release_cmd; - return cmd; -} - /* * May be called from software interrupt (timer) context for allocating * iSCSI NopINs. @@ -168,12 +155,15 @@ struct iscsi_cmd *iscsit_alloc_cmd(struct iscsi_conn *conn, gfp_t gfp_mask) struct iscsi_cmd *iscsit_allocate_cmd(struct iscsi_conn *conn, gfp_t gfp_mask) { struct iscsi_cmd *cmd; + struct se_session *se_sess = conn->sess->se_sess; + int size, tag; - cmd = conn->conn_transport->iscsit_alloc_cmd(conn, gfp_mask); - if (!cmd) { - pr_err("Unable to allocate memory for struct iscsi_cmd.\n"); - return NULL; - } + tag = percpu_ida_alloc(&se_sess->sess_tag_pool, gfp_mask); + size = sizeof(struct iscsi_cmd) + conn->conn_transport->priv_size; + cmd = (struct iscsi_cmd *)(se_sess->sess_cmd_map + (tag * size)); + memset(cmd, 0, size); + + cmd->se_cmd.map_tag = tag; cmd->conn = conn; INIT_LIST_HEAD(&cmd->i_conn_node); INIT_LIST_HEAD(&cmd->datain_list); @@ -689,6 +679,16 @@ void iscsit_free_queue_reqs_for_conn(struct iscsi_conn *conn) void iscsit_release_cmd(struct iscsi_cmd *cmd) { + struct iscsi_session *sess; + struct se_cmd *se_cmd = &cmd->se_cmd; + + if (cmd->conn) + sess = cmd->conn->sess; + else + sess = cmd->sess; + + BUG_ON(!sess || !sess->se_sess); + kfree(cmd->buf_ptr); kfree(cmd->pdu_list); kfree(cmd->seq_list); @@ -696,8 +696,9 @@ void iscsit_release_cmd(struct iscsi_cmd *cmd) kfree(cmd->iov_data); kfree(cmd->text_in_ptr); - kmem_cache_free(lio_cmd_cache, cmd); + percpu_ida_free(&sess->se_sess->sess_tag_pool, se_cmd->map_tag); } +EXPORT_SYMBOL(iscsit_release_cmd); static void __iscsit_free_cmd(struct iscsi_cmd *cmd, bool scsi_cmd, bool check_queues) @@ -761,7 +762,7 @@ void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown) /* Fall-through */ default: __iscsit_free_cmd(cmd, false, shutdown); - cmd->release_cmd(cmd); + iscsit_release_cmd(cmd); break; } } diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 568ad25f25d3..0f6d69dabca1 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -3,7 +3,7 @@ * This file contains the Linux/SCSI LLD virtual SCSI initiator driver * for emulated SAS initiator ports * - * © Copyright 2011 RisingTide Systems LLC. + * © Copyright 2011-2013 Datera, Inc. * * Licensed to the Linux Foundation under the General Public License (GPL) version 2. * diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index cbe48ab41745..47244102281e 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -3,7 +3,7 @@ * * This file contains SPC-3 compliant asymmetric logical unit assigntment (ALUA) * - * (c) Copyright 2009-2012 RisingTide Systems LLC. + * (c) Copyright 2009-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -557,6 +557,9 @@ target_alua_state_check(struct se_cmd *cmd) * a ALUA logical unit group. */ tg_pt_gp_mem = port->sep_alua_tg_pt_gp_mem; + if (!tg_pt_gp_mem) + return 0; + spin_lock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); tg_pt_gp = tg_pt_gp_mem->tg_pt_gp; out_alua_state = atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state); @@ -730,7 +733,7 @@ static int core_alua_write_tpg_metadata( if (ret < 0) pr_err("Error writing ALUA metadata file: %s\n", path); fput(file); - return ret ? -EIO : 0; + return (ret < 0) ? -EIO : 0; } /* @@ -1756,10 +1759,10 @@ ssize_t core_alua_store_access_type( unsigned long tmp; int ret; - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract alua_access_type\n"); - return -EINVAL; + return ret; } if ((tmp != 0) && (tmp != 1) && (tmp != 2) && (tmp != 3)) { pr_err("Illegal value for alua_access_type:" @@ -1794,10 +1797,10 @@ ssize_t core_alua_store_nonop_delay_msecs( unsigned long tmp; int ret; - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract nonop_delay_msecs\n"); - return -EINVAL; + return ret; } if (tmp > ALUA_MAX_NONOP_DELAY_MSECS) { pr_err("Passed nonop_delay_msecs: %lu, exceeds" @@ -1825,10 +1828,10 @@ ssize_t core_alua_store_trans_delay_msecs( unsigned long tmp; int ret; - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract trans_delay_msecs\n"); - return -EINVAL; + return ret; } if (tmp > ALUA_MAX_TRANS_DELAY_MSECS) { pr_err("Passed trans_delay_msecs: %lu, exceeds" @@ -1856,10 +1859,10 @@ ssize_t core_alua_store_implict_trans_secs( unsigned long tmp; int ret; - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract implict_trans_secs\n"); - return -EINVAL; + return ret; } if (tmp > ALUA_MAX_IMPLICT_TRANS_SECS) { pr_err("Passed implict_trans_secs: %lu, exceeds" @@ -1887,10 +1890,10 @@ ssize_t core_alua_store_preferred_bit( unsigned long tmp; int ret; - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract preferred ALUA value\n"); - return -EINVAL; + return ret; } if ((tmp != 0) && (tmp != 1)) { pr_err("Illegal value for preferred ALUA: %lu\n", tmp); @@ -1922,10 +1925,10 @@ ssize_t core_alua_store_offline_bit( if (!lun->lun_sep) return -ENODEV; - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract alua_tg_pt_offline value\n"); - return -EINVAL; + return ret; } if ((tmp != 0) && (tmp != 1)) { pr_err("Illegal value for alua_tg_pt_offline: %lu\n", @@ -1961,10 +1964,10 @@ ssize_t core_alua_store_secondary_status( unsigned long tmp; int ret; - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract alua_tg_pt_status\n"); - return -EINVAL; + return ret; } if ((tmp != ALUA_STATUS_NONE) && (tmp != ALUA_STATUS_ALTERED_BY_EXPLICT_STPG) && @@ -1994,10 +1997,10 @@ ssize_t core_alua_store_secondary_write_metadata( unsigned long tmp; int ret; - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract alua_tg_pt_write_md\n"); - return -EINVAL; + return ret; } if ((tmp != 0) && (tmp != 1)) { pr_err("Illegal value for alua_tg_pt_write_md:" diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index e4d22933efaf..82e81c542e43 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -3,7 +3,7 @@ * * This file contains ConfigFS logic for the Generic Target Engine project. * - * (c) Copyright 2008-2012 RisingTide Systems LLC. + * (c) Copyright 2008-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -48,6 +48,7 @@ #include "target_core_alua.h" #include "target_core_pr.h" #include "target_core_rd.h" +#include "target_core_xcopy.h" extern struct t10_alua_lu_gp *default_lu_gp; @@ -268,7 +269,7 @@ static struct configfs_subsystem target_core_fabrics = { }, }; -static struct configfs_subsystem *target_core_subsystem[] = { +struct configfs_subsystem *target_core_subsystem[] = { &target_core_fabrics, NULL, }; @@ -577,9 +578,9 @@ static ssize_t target_core_dev_store_attr_##_name( \ unsigned long val; \ int ret; \ \ - ret = strict_strtoul(page, 0, &val); \ + ret = kstrtoul(page, 0, &val); \ if (ret < 0) { \ - pr_err("strict_strtoul() failed with" \ + pr_err("kstrtoul() failed with" \ " ret: %d\n", ret); \ return -EINVAL; \ } \ @@ -636,6 +637,12 @@ SE_DEV_ATTR(emulate_tpu, S_IRUGO | S_IWUSR); DEF_DEV_ATTRIB(emulate_tpws); SE_DEV_ATTR(emulate_tpws, S_IRUGO | S_IWUSR); +DEF_DEV_ATTRIB(emulate_caw); +SE_DEV_ATTR(emulate_caw, S_IRUGO | S_IWUSR); + +DEF_DEV_ATTRIB(emulate_3pc); +SE_DEV_ATTR(emulate_3pc, S_IRUGO | S_IWUSR); + DEF_DEV_ATTRIB(enforce_pr_isids); SE_DEV_ATTR(enforce_pr_isids, S_IRUGO | S_IWUSR); @@ -693,6 +700,8 @@ static struct configfs_attribute *target_core_dev_attrib_attrs[] = { &target_core_dev_attrib_emulate_tas.attr, &target_core_dev_attrib_emulate_tpu.attr, &target_core_dev_attrib_emulate_tpws.attr, + &target_core_dev_attrib_emulate_caw.attr, + &target_core_dev_attrib_emulate_3pc.attr, &target_core_dev_attrib_enforce_pr_isids.attr, &target_core_dev_attrib_is_nonrot.attr, &target_core_dev_attrib_emulate_rest_reord.attr, @@ -1310,9 +1319,9 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata( ret = -ENOMEM; goto out; } - ret = strict_strtoull(arg_p, 0, &tmp_ll); + ret = kstrtoull(arg_p, 0, &tmp_ll); if (ret < 0) { - pr_err("strict_strtoull() failed for" + pr_err("kstrtoull() failed for" " sa_res_key=\n"); goto out; } @@ -1836,11 +1845,11 @@ static ssize_t target_core_alua_lu_gp_store_attr_lu_gp_id( unsigned long lu_gp_id; int ret; - ret = strict_strtoul(page, 0, &lu_gp_id); + ret = kstrtoul(page, 0, &lu_gp_id); if (ret < 0) { - pr_err("strict_strtoul() returned %d for" + pr_err("kstrtoul() returned %d for" " lu_gp_id\n", ret); - return -EINVAL; + return ret; } if (lu_gp_id > 0x0000ffff) { pr_err("ALUA lu_gp_id: %lu exceeds maximum:" @@ -2032,11 +2041,11 @@ static ssize_t target_core_alua_tg_pt_gp_store_attr_alua_access_state( return -EINVAL; } - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract new ALUA access state from" " %s\n", page); - return -EINVAL; + return ret; } new_state = (int)tmp; @@ -2079,11 +2088,11 @@ static ssize_t target_core_alua_tg_pt_gp_store_attr_alua_access_status( return -EINVAL; } - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract new ALUA access status" " from %s\n", page); - return -EINVAL; + return ret; } new_status = (int)tmp; @@ -2139,10 +2148,10 @@ static ssize_t target_core_alua_tg_pt_gp_store_attr_alua_write_metadata( unsigned long tmp; int ret; - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract alua_write_metadata\n"); - return -EINVAL; + return ret; } if ((tmp != 0) && (tmp != 1)) { @@ -2263,11 +2272,11 @@ static ssize_t target_core_alua_tg_pt_gp_store_attr_tg_pt_gp_id( unsigned long tg_pt_gp_id; int ret; - ret = strict_strtoul(page, 0, &tg_pt_gp_id); + ret = kstrtoul(page, 0, &tg_pt_gp_id); if (ret < 0) { - pr_err("strict_strtoul() returned %d for" + pr_err("kstrtoul() returned %d for" " tg_pt_gp_id\n", ret); - return -EINVAL; + return ret; } if (tg_pt_gp_id > 0x0000ffff) { pr_err("ALUA tg_pt_gp_id: %lu exceeds maximum:" @@ -2676,10 +2685,10 @@ static ssize_t target_core_hba_store_attr_hba_mode(struct se_hba *hba, if (transport->pmode_enable_hba == NULL) return -EINVAL; - ret = strict_strtoul(page, 0, &mode_flag); + ret = kstrtoul(page, 0, &mode_flag); if (ret < 0) { pr_err("Unable to extract hba mode flag: %d\n", ret); - return -EINVAL; + return ret; } if (hba->dev_count) { @@ -2767,11 +2776,11 @@ static struct config_group *target_core_call_addhbatotarget( str++; /* Skip to start of plugin dependent ID */ } - ret = strict_strtoul(str, 0, &plugin_dep_id); + ret = kstrtoul(str, 0, &plugin_dep_id); if (ret < 0) { - pr_err("strict_strtoul() returned %d for" + pr_err("kstrtoul() returned %d for" " plugin_dep_id\n", ret); - return ERR_PTR(-EINVAL); + return ERR_PTR(ret); } /* * Load up TCM subsystem plugins if they have not already been loaded. @@ -2927,6 +2936,10 @@ static int __init target_core_init_configfs(void) if (ret < 0) goto out; + ret = target_xcopy_setup_pt(); + if (ret < 0) + goto out; + return 0; out: @@ -2999,6 +3012,7 @@ static void __exit target_core_exit_configfs(void) core_dev_release_virtual_lun0(); rd_module_exit(); + target_xcopy_release_pt(); release_se_kmem_caches(); } diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 8f4142fe5f19..d90dbb0f1a69 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -4,7 +4,7 @@ * This file contains the TCM Virtual Device and Disk Transport * agnostic related functions. * - * (c) Copyright 2003-2012 RisingTide Systems LLC. + * (c) Copyright 2003-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -47,6 +47,9 @@ #include "target_core_pr.h" #include "target_core_ua.h" +DEFINE_MUTEX(g_device_mutex); +LIST_HEAD(g_device_list); + static struct se_hba *lun0_hba; /* not static, needed by tpg.c */ struct se_device *g_lun0_dev; @@ -890,6 +893,32 @@ int se_dev_set_emulate_tpws(struct se_device *dev, int flag) return 0; } +int se_dev_set_emulate_caw(struct se_device *dev, int flag) +{ + if (flag != 0 && flag != 1) { + pr_err("Illegal value %d\n", flag); + return -EINVAL; + } + dev->dev_attrib.emulate_caw = flag; + pr_debug("dev[%p]: SE Device CompareAndWrite (AtomicTestandSet): %d\n", + dev, flag); + + return 0; +} + +int se_dev_set_emulate_3pc(struct se_device *dev, int flag) +{ + if (flag != 0 && flag != 1) { + pr_err("Illegal value %d\n", flag); + return -EINVAL; + } + dev->dev_attrib.emulate_3pc = flag; + pr_debug("dev[%p]: SE Device 3rd Party Copy (EXTENDED_COPY): %d\n", + dev, flag); + + return 0; +} + int se_dev_set_enforce_pr_isids(struct se_device *dev, int flag) { if ((flag != 0) && (flag != 1)) { @@ -1393,6 +1422,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) INIT_LIST_HEAD(&dev->delayed_cmd_list); INIT_LIST_HEAD(&dev->state_list); INIT_LIST_HEAD(&dev->qf_cmd_list); + INIT_LIST_HEAD(&dev->g_dev_node); spin_lock_init(&dev->stats_lock); spin_lock_init(&dev->execute_task_lock); spin_lock_init(&dev->delayed_cmd_lock); @@ -1400,6 +1430,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) spin_lock_init(&dev->se_port_lock); spin_lock_init(&dev->se_tmr_lock); spin_lock_init(&dev->qf_cmd_lock); + sema_init(&dev->caw_sem, 1); atomic_set(&dev->dev_ordered_id, 0); INIT_LIST_HEAD(&dev->t10_wwn.t10_vpd_list); spin_lock_init(&dev->t10_wwn.t10_vpd_lock); @@ -1423,6 +1454,8 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) dev->dev_attrib.emulate_tas = DA_EMULATE_TAS; dev->dev_attrib.emulate_tpu = DA_EMULATE_TPU; dev->dev_attrib.emulate_tpws = DA_EMULATE_TPWS; + dev->dev_attrib.emulate_caw = DA_EMULATE_CAW; + dev->dev_attrib.emulate_3pc = DA_EMULATE_3PC; dev->dev_attrib.enforce_pr_isids = DA_ENFORCE_PR_ISIDS; dev->dev_attrib.is_nonrot = DA_IS_NONROT; dev->dev_attrib.emulate_rest_reord = DA_EMULATE_REST_REORD; @@ -1510,6 +1543,11 @@ int target_configure_device(struct se_device *dev) spin_lock(&hba->device_lock); hba->dev_count++; spin_unlock(&hba->device_lock); + + mutex_lock(&g_device_mutex); + list_add_tail(&dev->g_dev_node, &g_device_list); + mutex_unlock(&g_device_mutex); + return 0; out_free_alua: @@ -1528,6 +1566,10 @@ void target_free_device(struct se_device *dev) if (dev->dev_flags & DF_CONFIGURED) { destroy_workqueue(dev->tmr_wq); + mutex_lock(&g_device_mutex); + list_del(&dev->g_dev_node); + mutex_unlock(&g_device_mutex); + spin_lock(&hba->device_lock); hba->dev_count--; spin_unlock(&hba->device_lock); diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c index eb56eb129563..3503996d7d10 100644 --- a/drivers/target/target_core_fabric_configfs.c +++ b/drivers/target/target_core_fabric_configfs.c @@ -4,7 +4,7 @@ * This file contains generic fabric module configfs infrastructure for * TCM v4.x code * - * (c) Copyright 2010-2012 RisingTide Systems LLC. + * (c) Copyright 2010-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -189,9 +189,11 @@ static ssize_t target_fabric_mappedlun_store_write_protect( struct se_node_acl *se_nacl = lacl->se_lun_nacl; struct se_portal_group *se_tpg = se_nacl->se_tpg; unsigned long op; + int ret; - if (strict_strtoul(page, 0, &op)) - return -EINVAL; + ret = kstrtoul(page, 0, &op); + if (ret) + return ret; if ((op != 1) && (op != 0)) return -EINVAL; @@ -350,7 +352,10 @@ static struct config_group *target_fabric_make_mappedlun( * Determine the Mapped LUN value. This is what the SCSI Initiator * Port will actually see. */ - if (strict_strtoul(buf + 4, 0, &mapped_lun) || mapped_lun > UINT_MAX) { + ret = kstrtoul(buf + 4, 0, &mapped_lun); + if (ret) + goto out; + if (mapped_lun > UINT_MAX) { ret = -EINVAL; goto out; } @@ -875,7 +880,10 @@ static struct config_group *target_fabric_make_lun( " \"lun_$LUN_NUMBER\"\n"); return ERR_PTR(-EINVAL); } - if (strict_strtoul(name + 4, 0, &unpacked_lun) || unpacked_lun > UINT_MAX) + errno = kstrtoul(name + 4, 0, &unpacked_lun); + if (errno) + return ERR_PTR(errno); + if (unpacked_lun > UINT_MAX) return ERR_PTR(-EINVAL); lun = core_get_lun_from_tpg(se_tpg, unpacked_lun); diff --git a/drivers/target/target_core_fabric_lib.c b/drivers/target/target_core_fabric_lib.c index 687b0b0a4aa6..0d1cf8b4f49f 100644 --- a/drivers/target/target_core_fabric_lib.c +++ b/drivers/target/target_core_fabric_lib.c @@ -4,7 +4,7 @@ * This file contains generic high level protocol identifier and PR * handlers for TCM fabric modules * - * (c) Copyright 2010-2012 RisingTide Systems LLC. + * (c) Copyright 2010-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index b11890d85120..b662f89dedac 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -3,7 +3,7 @@ * * This file contains the Storage Engine <-> FILEIO transport specific functions * - * (c) Copyright 2005-2012 RisingTide Systems LLC. + * (c) Copyright 2005-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -547,11 +547,9 @@ fd_execute_unmap(struct se_cmd *cmd) } static sense_reason_t -fd_execute_rw(struct se_cmd *cmd) +fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, + enum dma_data_direction data_direction) { - struct scatterlist *sgl = cmd->t_data_sg; - u32 sgl_nents = cmd->t_data_nents; - enum dma_data_direction data_direction = cmd->data_direction; struct se_device *dev = cmd->se_dev; int ret = 0; @@ -635,10 +633,10 @@ static ssize_t fd_set_configfs_dev_params(struct se_device *dev, ret = -ENOMEM; break; } - ret = strict_strtoull(arg_p, 0, &fd_dev->fd_dev_size); + ret = kstrtoull(arg_p, 0, &fd_dev->fd_dev_size); kfree(arg_p); if (ret < 0) { - pr_err("strict_strtoull() failed for" + pr_err("kstrtoull() failed for" " fd_dev_size=\n"); goto out; } diff --git a/drivers/target/target_core_hba.c b/drivers/target/target_core_hba.c index d2616cd48f1e..a25051a37dd7 100644 --- a/drivers/target/target_core_hba.c +++ b/drivers/target/target_core_hba.c @@ -3,7 +3,7 @@ * * This file contains the TCM HBA Transport related functions. * - * (c) Copyright 2003-2012 RisingTide Systems LLC. + * (c) Copyright 2003-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index aa1620abec6d..b9a3394fe479 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -4,7 +4,7 @@ * This file contains the Storage Engine <-> Linux BlockIO transport * specific functions. * - * (c) Copyright 2003-2012 RisingTide Systems LLC. + * (c) Copyright 2003-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -536,10 +536,10 @@ static ssize_t iblock_set_configfs_dev_params(struct se_device *dev, ret = -ENOMEM; break; } - ret = strict_strtoul(arg_p, 0, &tmp_readonly); + ret = kstrtoul(arg_p, 0, &tmp_readonly); kfree(arg_p); if (ret < 0) { - pr_err("strict_strtoul() failed for" + pr_err("kstrtoul() failed for" " readonly=\n"); goto out; } @@ -587,11 +587,9 @@ static ssize_t iblock_show_configfs_dev_params(struct se_device *dev, char *b) } static sense_reason_t -iblock_execute_rw(struct se_cmd *cmd) +iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, + enum dma_data_direction data_direction) { - struct scatterlist *sgl = cmd->t_data_sg; - u32 sgl_nents = cmd->t_data_nents; - enum dma_data_direction data_direction = cmd->data_direction; struct se_device *dev = cmd->se_dev; struct iblock_req *ibr; struct bio *bio; diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index 18d49df4d0ac..579128abe3f5 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -33,6 +33,8 @@ int se_dev_set_emulate_ua_intlck_ctrl(struct se_device *, int); int se_dev_set_emulate_tas(struct se_device *, int); int se_dev_set_emulate_tpu(struct se_device *, int); int se_dev_set_emulate_tpws(struct se_device *, int); +int se_dev_set_emulate_caw(struct se_device *, int); +int se_dev_set_emulate_3pc(struct se_device *, int); int se_dev_set_enforce_pr_isids(struct se_device *, int); int se_dev_set_is_nonrot(struct se_device *, int); int se_dev_set_emulate_rest_reord(struct se_device *dev, int); diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index bd78faf67c6b..d1ae4c5c3ffd 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -4,7 +4,7 @@ * This file contains SPC-3 compliant persistent reservations and * legacy SPC-2 reservations with compatible reservation handling (CRH=1) * - * (c) Copyright 2009-2012 RisingTide Systems LLC. + * (c) Copyright 2009-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -1949,7 +1949,7 @@ static int __core_scsi3_write_aptpl_to_file( pr_debug("Error writing APTPL metadata file: %s\n", path); fput(file); - return ret ? -EIO : 0; + return (ret < 0) ? -EIO : 0; } /* diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index e992b27aa090..551c96ca60ac 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -3,7 +3,7 @@ * * This file contains the generic target mode <-> Linux SCSI subsystem plugin. * - * (c) Copyright 2003-2012 RisingTide Systems LLC. + * (c) Copyright 2003-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -1050,9 +1050,8 @@ pscsi_execute_cmd(struct se_cmd *cmd) req = blk_get_request(pdv->pdv_sd->request_queue, (data_direction == DMA_TO_DEVICE), GFP_KERNEL); - if (!req || IS_ERR(req)) { - pr_err("PSCSI: blk_get_request() failed: %ld\n", - req ? IS_ERR(req) : -ENOMEM); + if (!req) { + pr_err("PSCSI: blk_get_request() failed\n"); ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; goto fail; } diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c index 51127d15d5c5..131327ac7f5b 100644 --- a/drivers/target/target_core_rd.c +++ b/drivers/target/target_core_rd.c @@ -4,7 +4,7 @@ * This file contains the Storage Engine <-> Ramdisk transport * specific functions. * - * (c) Copyright 2003-2012 RisingTide Systems LLC. + * (c) Copyright 2003-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -280,11 +280,9 @@ static struct rd_dev_sg_table *rd_get_sg_table(struct rd_dev *rd_dev, u32 page) } static sense_reason_t -rd_execute_rw(struct se_cmd *cmd) +rd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, + enum dma_data_direction data_direction) { - struct scatterlist *sgl = cmd->t_data_sg; - u32 sgl_nents = cmd->t_data_nents; - enum dma_data_direction data_direction = cmd->data_direction; struct se_device *se_dev = cmd->se_dev; struct rd_dev *dev = RD_DEV(se_dev); struct rd_dev_sg_table *table; diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 8a462773d0c8..6c17295e8d7c 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -1,7 +1,7 @@ /* * SCSI Block Commands (SBC) parsing and emulation. * - * (c) Copyright 2002-2012 RisingTide Systems LLC. + * (c) Copyright 2002-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -25,6 +25,7 @@ #include <linux/ratelimit.h> #include <asm/unaligned.h> #include <scsi/scsi.h> +#include <scsi/scsi_tcq.h> #include <target/target_core_base.h> #include <target/target_core_backend.h> @@ -280,13 +281,13 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o return 0; } -static void xdreadwrite_callback(struct se_cmd *cmd) +static sense_reason_t xdreadwrite_callback(struct se_cmd *cmd) { unsigned char *buf, *addr; struct scatterlist *sg; unsigned int offset; - int i; - int count; + sense_reason_t ret = TCM_NO_SENSE; + int i, count; /* * From sbc3r22.pdf section 5.48 XDWRITEREAD (10) command * @@ -301,7 +302,7 @@ static void xdreadwrite_callback(struct se_cmd *cmd) buf = kmalloc(cmd->data_length, GFP_KERNEL); if (!buf) { pr_err("Unable to allocate xor_callback buf\n"); - return; + return TCM_OUT_OF_RESOURCES; } /* * Copy the scatterlist WRITE buffer located at cmd->t_data_sg @@ -320,8 +321,10 @@ static void xdreadwrite_callback(struct se_cmd *cmd) offset = 0; for_each_sg(cmd->t_bidi_data_sg, sg, cmd->t_bidi_data_nents, count) { addr = kmap_atomic(sg_page(sg)); - if (!addr) + if (!addr) { + ret = TCM_OUT_OF_RESOURCES; goto out; + } for (i = 0; i < sg->length; i++) *(addr + sg->offset + i) ^= *(buf + offset + i); @@ -332,6 +335,193 @@ static void xdreadwrite_callback(struct se_cmd *cmd) out: kfree(buf); + return ret; +} + +static sense_reason_t +sbc_execute_rw(struct se_cmd *cmd) +{ + return cmd->execute_rw(cmd, cmd->t_data_sg, cmd->t_data_nents, + cmd->data_direction); +} + +static sense_reason_t compare_and_write_post(struct se_cmd *cmd) +{ + struct se_device *dev = cmd->se_dev; + + cmd->se_cmd_flags |= SCF_COMPARE_AND_WRITE_POST; + /* + * Unlock ->caw_sem originally obtained during sbc_compare_and_write() + * before the original READ I/O submission. + */ + up(&dev->caw_sem); + + return TCM_NO_SENSE; +} + +static sense_reason_t compare_and_write_callback(struct se_cmd *cmd) +{ + struct se_device *dev = cmd->se_dev; + struct scatterlist *write_sg = NULL, *sg; + unsigned char *buf, *addr; + struct sg_mapping_iter m; + unsigned int offset = 0, len; + unsigned int nlbas = cmd->t_task_nolb; + unsigned int block_size = dev->dev_attrib.block_size; + unsigned int compare_len = (nlbas * block_size); + sense_reason_t ret = TCM_NO_SENSE; + int rc, i; + + /* + * Handle early failure in transport_generic_request_failure(), + * which will not have taken ->caw_mutex yet.. + */ + if (!cmd->t_data_sg || !cmd->t_bidi_data_sg) + return TCM_NO_SENSE; + + buf = kzalloc(cmd->data_length, GFP_KERNEL); + if (!buf) { + pr_err("Unable to allocate compare_and_write buf\n"); + ret = TCM_OUT_OF_RESOURCES; + goto out; + } + + write_sg = kzalloc(sizeof(struct scatterlist) * cmd->t_data_nents, + GFP_KERNEL); + if (!write_sg) { + pr_err("Unable to allocate compare_and_write sg\n"); + ret = TCM_OUT_OF_RESOURCES; + goto out; + } + /* + * Setup verify and write data payloads from total NumberLBAs. + */ + rc = sg_copy_to_buffer(cmd->t_data_sg, cmd->t_data_nents, buf, + cmd->data_length); + if (!rc) { + pr_err("sg_copy_to_buffer() failed for compare_and_write\n"); + ret = TCM_OUT_OF_RESOURCES; + goto out; + } + /* + * Compare against SCSI READ payload against verify payload + */ + for_each_sg(cmd->t_bidi_data_sg, sg, cmd->t_bidi_data_nents, i) { + addr = (unsigned char *)kmap_atomic(sg_page(sg)); + if (!addr) { + ret = TCM_OUT_OF_RESOURCES; + goto out; + } + + len = min(sg->length, compare_len); + + if (memcmp(addr, buf + offset, len)) { + pr_warn("Detected MISCOMPARE for addr: %p buf: %p\n", + addr, buf + offset); + kunmap_atomic(addr); + goto miscompare; + } + kunmap_atomic(addr); + + offset += len; + compare_len -= len; + if (!compare_len) + break; + } + + i = 0; + len = cmd->t_task_nolb * block_size; + sg_miter_start(&m, cmd->t_data_sg, cmd->t_data_nents, SG_MITER_TO_SG); + /* + * Currently assumes NoLB=1 and SGLs are PAGE_SIZE.. + */ + while (len) { + sg_miter_next(&m); + + if (block_size < PAGE_SIZE) { + sg_set_page(&write_sg[i], m.page, block_size, + block_size); + } else { + sg_miter_next(&m); + sg_set_page(&write_sg[i], m.page, block_size, + 0); + } + len -= block_size; + i++; + } + sg_miter_stop(&m); + /* + * Save the original SGL + nents values before updating to new + * assignments, to be released in transport_free_pages() -> + * transport_reset_sgl_orig() + */ + cmd->t_data_sg_orig = cmd->t_data_sg; + cmd->t_data_sg = write_sg; + cmd->t_data_nents_orig = cmd->t_data_nents; + cmd->t_data_nents = 1; + + cmd->sam_task_attr = MSG_HEAD_TAG; + cmd->transport_complete_callback = compare_and_write_post; + /* + * Now reset ->execute_cmd() to the normal sbc_execute_rw() handler + * for submitting the adjusted SGL to write instance user-data. + */ + cmd->execute_cmd = sbc_execute_rw; + + spin_lock_irq(&cmd->t_state_lock); + cmd->t_state = TRANSPORT_PROCESSING; + cmd->transport_state |= CMD_T_ACTIVE|CMD_T_BUSY|CMD_T_SENT; + spin_unlock_irq(&cmd->t_state_lock); + + __target_execute_cmd(cmd); + + kfree(buf); + return ret; + +miscompare: + pr_warn("Target/%s: Send MISCOMPARE check condition and sense\n", + dev->transport->name); + ret = TCM_MISCOMPARE_VERIFY; +out: + /* + * In the MISCOMPARE or failure case, unlock ->caw_sem obtained in + * sbc_compare_and_write() before the original READ I/O submission. + */ + up(&dev->caw_sem); + kfree(write_sg); + kfree(buf); + return ret; +} + +static sense_reason_t +sbc_compare_and_write(struct se_cmd *cmd) +{ + struct se_device *dev = cmd->se_dev; + sense_reason_t ret; + int rc; + /* + * Submit the READ first for COMPARE_AND_WRITE to perform the + * comparision using SGLs at cmd->t_bidi_data_sg.. + */ + rc = down_interruptible(&dev->caw_sem); + if ((rc != 0) || signal_pending(current)) { + cmd->transport_complete_callback = NULL; + return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + } + + ret = cmd->execute_rw(cmd, cmd->t_bidi_data_sg, cmd->t_bidi_data_nents, + DMA_FROM_DEVICE); + if (ret) { + cmd->transport_complete_callback = NULL; + up(&dev->caw_sem); + return ret; + } + /* + * Unlock of dev->caw_sem to occur in compare_and_write_callback() + * upon MISCOMPARE, or in compare_and_write_done() upon completion + * of WRITE instance user-data. + */ + return TCM_NO_SENSE; } sense_reason_t @@ -348,31 +538,36 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) sectors = transport_get_sectors_6(cdb); cmd->t_task_lba = transport_lba_21(cdb); cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; break; case READ_10: sectors = transport_get_sectors_10(cdb); cmd->t_task_lba = transport_lba_32(cdb); cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; break; case READ_12: sectors = transport_get_sectors_12(cdb); cmd->t_task_lba = transport_lba_32(cdb); cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; break; case READ_16: sectors = transport_get_sectors_16(cdb); cmd->t_task_lba = transport_lba_64(cdb); cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; break; case WRITE_6: sectors = transport_get_sectors_6(cdb); cmd->t_task_lba = transport_lba_21(cdb); cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; break; case WRITE_10: case WRITE_VERIFY: @@ -381,7 +576,8 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) if (cdb[1] & 0x8) cmd->se_cmd_flags |= SCF_FUA; cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; break; case WRITE_12: sectors = transport_get_sectors_12(cdb); @@ -389,7 +585,8 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) if (cdb[1] & 0x8) cmd->se_cmd_flags |= SCF_FUA; cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; break; case WRITE_16: sectors = transport_get_sectors_16(cdb); @@ -397,7 +594,8 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) if (cdb[1] & 0x8) cmd->se_cmd_flags |= SCF_FUA; cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; break; case XDWRITEREAD_10: if (cmd->data_direction != DMA_TO_DEVICE || @@ -411,7 +609,8 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) /* * Setup BIDI XOR callback to be run after I/O completion. */ - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; cmd->transport_complete_callback = &xdreadwrite_callback; if (cdb[1] & 0x8) cmd->se_cmd_flags |= SCF_FUA; @@ -434,7 +633,8 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) * Setup BIDI XOR callback to be run during after I/O * completion. */ - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; cmd->transport_complete_callback = &xdreadwrite_callback; if (cdb[1] & 0x8) cmd->se_cmd_flags |= SCF_FUA; @@ -461,6 +661,28 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) } break; } + case COMPARE_AND_WRITE: + sectors = cdb[13]; + /* + * Currently enforce COMPARE_AND_WRITE for a single sector + */ + if (sectors > 1) { + pr_err("COMPARE_AND_WRITE contains NoLB: %u greater" + " than 1\n", sectors); + return TCM_INVALID_CDB_FIELD; + } + /* + * Double size because we have two buffers, note that + * zero is not an error.. + */ + size = 2 * sbc_get_size(cmd, sectors); + cmd->t_task_lba = get_unaligned_be64(&cdb[2]); + cmd->t_task_nolb = sectors; + cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB | SCF_COMPARE_AND_WRITE; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_compare_and_write; + cmd->transport_complete_callback = compare_and_write_callback; + break; case READ_CAPACITY: size = READ_CAP_LEN; cmd->execute_cmd = sbc_emulate_readcapacity; @@ -600,7 +822,8 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) return TCM_ADDRESS_OUT_OF_RANGE; } - size = sbc_get_size(cmd, sectors); + if (!(cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE)) + size = sbc_get_size(cmd, sectors); } return target_cmd_size_check(cmd, size); diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index 9fabbf7214cd..074539558a54 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -1,7 +1,7 @@ /* * SCSI Primary Commands (SPC) parsing and emulation. * - * (c) Copyright 2002-2012 RisingTide Systems LLC. + * (c) Copyright 2002-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -35,7 +35,7 @@ #include "target_core_alua.h" #include "target_core_pr.h" #include "target_core_ua.h" - +#include "target_core_xcopy.h" static void spc_fill_alua_data(struct se_port *port, unsigned char *buf) { @@ -95,6 +95,12 @@ spc_emulate_inquiry_std(struct se_cmd *cmd, unsigned char *buf) */ spc_fill_alua_data(lun->lun_sep, buf); + /* + * Set Third-Party Copy (3PC) bit to indicate support for EXTENDED_COPY + */ + if (dev->dev_attrib.emulate_3pc) + buf[5] |= 0x8; + buf[7] = 0x2; /* CmdQue=1 */ memcpy(&buf[8], "LIO-ORG ", 8); @@ -129,8 +135,8 @@ spc_emulate_evpd_80(struct se_cmd *cmd, unsigned char *buf) return 0; } -static void spc_parse_naa_6h_vendor_specific(struct se_device *dev, - unsigned char *buf) +void spc_parse_naa_6h_vendor_specific(struct se_device *dev, + unsigned char *buf) { unsigned char *p = &dev->t10_wwn.unit_serial[0]; int cnt; @@ -460,6 +466,11 @@ spc_emulate_evpd_b0(struct se_cmd *cmd, unsigned char *buf) /* Set WSNZ to 1 */ buf[4] = 0x01; + /* + * Set MAXIMUM COMPARE AND WRITE LENGTH + */ + if (dev->dev_attrib.emulate_caw) + buf[5] = 0x01; /* * Set OPTIMAL TRANSFER LENGTH GRANULARITY @@ -1250,8 +1261,14 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size) *size = (cdb[6] << 24) | (cdb[7] << 16) | (cdb[8] << 8) | cdb[9]; break; case EXTENDED_COPY: - case READ_ATTRIBUTE: + *size = get_unaligned_be32(&cdb[10]); + cmd->execute_cmd = target_do_xcopy; + break; case RECEIVE_COPY_RESULTS: + *size = get_unaligned_be32(&cdb[10]); + cmd->execute_cmd = target_do_receive_copy_results; + break; + case READ_ATTRIBUTE: case WRITE_ATTRIBUTE: *size = (cdb[10] << 24) | (cdb[11] << 16) | (cdb[12] << 8) | cdb[13]; diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c index d154ce797180..9c642e02cba1 100644 --- a/drivers/target/target_core_stat.c +++ b/drivers/target/target_core_stat.c @@ -4,7 +4,7 @@ * Modern ConfigFS group context specific statistics based on original * target_core_mib.c code * - * (c) Copyright 2006-2012 RisingTide Systems LLC. + * (c) Copyright 2006-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index 0d7cacb91107..250009909d49 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -3,7 +3,7 @@ * * This file contains SPC-3 task management infrastructure * - * (c) Copyright 2009-2012 RisingTide Systems LLC. + * (c) Copyright 2009-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index aac9d2727e3c..b9a6ec0aa5fe 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -3,7 +3,7 @@ * * This file contains generic Target Portal Group related functions. * - * (c) Copyright 2002-2012 RisingTide Systems LLC. + * (c) Copyright 2002-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index d8e49d79f8cc..84747cc1aac0 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -3,7 +3,7 @@ * * This file contains the Generic Target Engine Core. * - * (c) Copyright 2002-2012 RisingTide Systems LLC. + * (c) Copyright 2002-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -67,7 +67,6 @@ struct kmem_cache *t10_alua_tg_pt_gp_mem_cache; static void transport_complete_task_attr(struct se_cmd *cmd); static void transport_handle_queue_full(struct se_cmd *cmd, struct se_device *dev); -static int transport_generic_get_mem(struct se_cmd *cmd); static int transport_put_cmd(struct se_cmd *cmd); static void target_complete_ok_work(struct work_struct *work); @@ -232,6 +231,50 @@ struct se_session *transport_init_session(void) } EXPORT_SYMBOL(transport_init_session); +int transport_alloc_session_tags(struct se_session *se_sess, + unsigned int tag_num, unsigned int tag_size) +{ + int rc; + + se_sess->sess_cmd_map = kzalloc(tag_num * tag_size, GFP_KERNEL); + if (!se_sess->sess_cmd_map) { + pr_err("Unable to allocate se_sess->sess_cmd_map\n"); + return -ENOMEM; + } + + rc = percpu_ida_init(&se_sess->sess_tag_pool, tag_num); + if (rc < 0) { + pr_err("Unable to init se_sess->sess_tag_pool," + " tag_num: %u\n", tag_num); + kfree(se_sess->sess_cmd_map); + se_sess->sess_cmd_map = NULL; + return -ENOMEM; + } + + return 0; +} +EXPORT_SYMBOL(transport_alloc_session_tags); + +struct se_session *transport_init_session_tags(unsigned int tag_num, + unsigned int tag_size) +{ + struct se_session *se_sess; + int rc; + + se_sess = transport_init_session(); + if (IS_ERR(se_sess)) + return se_sess; + + rc = transport_alloc_session_tags(se_sess, tag_num, tag_size); + if (rc < 0) { + transport_free_session(se_sess); + return ERR_PTR(-ENOMEM); + } + + return se_sess; +} +EXPORT_SYMBOL(transport_init_session_tags); + /* * Called with spin_lock_irqsave(&struct se_portal_group->session_lock called. */ @@ -367,6 +410,10 @@ EXPORT_SYMBOL(transport_deregister_session_configfs); void transport_free_session(struct se_session *se_sess) { + if (se_sess->sess_cmd_map) { + percpu_ida_destroy(&se_sess->sess_tag_pool); + kfree(se_sess->sess_cmd_map); + } kmem_cache_free(se_sess_cache, se_sess); } EXPORT_SYMBOL(transport_free_session); @@ -1206,7 +1253,7 @@ int transport_handle_cdb_direct( } EXPORT_SYMBOL(transport_handle_cdb_direct); -static sense_reason_t +sense_reason_t transport_generic_map_mem_to_cmd(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_count, struct scatterlist *sgl_bidi, u32 sgl_bidi_count) { @@ -1512,6 +1559,13 @@ void transport_generic_request_failure(struct se_cmd *cmd, * For SAM Task Attribute emulation for failed struct se_cmd */ transport_complete_task_attr(cmd); + /* + * Handle special case for COMPARE_AND_WRITE failure, where the + * callback is expected to drop the per device ->caw_mutex. + */ + if ((cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) && + cmd->transport_complete_callback) + cmd->transport_complete_callback(cmd); switch (sense_reason) { case TCM_NON_EXISTENT_LUN: @@ -1579,7 +1633,7 @@ queue_full: } EXPORT_SYMBOL(transport_generic_request_failure); -static void __target_execute_cmd(struct se_cmd *cmd) +void __target_execute_cmd(struct se_cmd *cmd) { sense_reason_t ret; @@ -1784,7 +1838,7 @@ static void transport_complete_qf(struct se_cmd *cmd) ret = cmd->se_tfo->queue_data_in(cmd); break; case DMA_TO_DEVICE: - if (cmd->t_bidi_data_sg) { + if (cmd->se_cmd_flags & SCF_BIDI) { ret = cmd->se_tfo->queue_data_in(cmd); if (ret < 0) break; @@ -1856,10 +1910,25 @@ static void target_complete_ok_work(struct work_struct *work) } /* * Check for a callback, used by amongst other things - * XDWRITE_READ_10 emulation. + * XDWRITE_READ_10 and COMPARE_AND_WRITE emulation. */ - if (cmd->transport_complete_callback) - cmd->transport_complete_callback(cmd); + if (cmd->transport_complete_callback) { + sense_reason_t rc; + + rc = cmd->transport_complete_callback(cmd); + if (!rc && !(cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE_POST)) { + return; + } else if (rc) { + ret = transport_send_check_condition_and_sense(cmd, + rc, 0); + if (ret == -EAGAIN || ret == -ENOMEM) + goto queue_full; + + transport_lun_remove_cmd(cmd); + transport_cmd_check_stop_to_fabric(cmd); + return; + } + } switch (cmd->data_direction) { case DMA_FROM_DEVICE: @@ -1885,7 +1954,7 @@ static void target_complete_ok_work(struct work_struct *work) /* * Check if we need to send READ payload for BIDI-COMMAND */ - if (cmd->t_bidi_data_sg) { + if (cmd->se_cmd_flags & SCF_BIDI) { spin_lock(&cmd->se_lun->lun_sep_lock); if (cmd->se_lun->lun_sep) { cmd->se_lun->lun_sep->sep_stats.tx_data_octets += @@ -1930,10 +1999,29 @@ static inline void transport_free_sgl(struct scatterlist *sgl, int nents) kfree(sgl); } +static inline void transport_reset_sgl_orig(struct se_cmd *cmd) +{ + /* + * Check for saved t_data_sg that may be used for COMPARE_AND_WRITE + * emulation, and free + reset pointers if necessary.. + */ + if (!cmd->t_data_sg_orig) + return; + + kfree(cmd->t_data_sg); + cmd->t_data_sg = cmd->t_data_sg_orig; + cmd->t_data_sg_orig = NULL; + cmd->t_data_nents = cmd->t_data_nents_orig; + cmd->t_data_nents_orig = 0; +} + static inline void transport_free_pages(struct se_cmd *cmd) { - if (cmd->se_cmd_flags & SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC) + if (cmd->se_cmd_flags & SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC) { + transport_reset_sgl_orig(cmd); return; + } + transport_reset_sgl_orig(cmd); transport_free_sgl(cmd->t_data_sg, cmd->t_data_nents); cmd->t_data_sg = NULL; @@ -2029,24 +2117,22 @@ void transport_kunmap_data_sg(struct se_cmd *cmd) } EXPORT_SYMBOL(transport_kunmap_data_sg); -static int -transport_generic_get_mem(struct se_cmd *cmd) +int +target_alloc_sgl(struct scatterlist **sgl, unsigned int *nents, u32 length, + bool zero_page) { - u32 length = cmd->data_length; - unsigned int nents; + struct scatterlist *sg; struct page *page; - gfp_t zero_flag; + gfp_t zero_flag = (zero_page) ? __GFP_ZERO : 0; + unsigned int nent; int i = 0; - nents = DIV_ROUND_UP(length, PAGE_SIZE); - cmd->t_data_sg = kmalloc(sizeof(struct scatterlist) * nents, GFP_KERNEL); - if (!cmd->t_data_sg) + nent = DIV_ROUND_UP(length, PAGE_SIZE); + sg = kmalloc(sizeof(struct scatterlist) * nent, GFP_KERNEL); + if (!sg) return -ENOMEM; - cmd->t_data_nents = nents; - sg_init_table(cmd->t_data_sg, nents); - - zero_flag = cmd->se_cmd_flags & SCF_SCSI_DATA_CDB ? 0 : __GFP_ZERO; + sg_init_table(sg, nent); while (length) { u32 page_len = min_t(u32, length, PAGE_SIZE); @@ -2054,19 +2140,20 @@ transport_generic_get_mem(struct se_cmd *cmd) if (!page) goto out; - sg_set_page(&cmd->t_data_sg[i], page, page_len, 0); + sg_set_page(&sg[i], page, page_len, 0); length -= page_len; i++; } + *sgl = sg; + *nents = nent; return 0; out: while (i > 0) { i--; - __free_page(sg_page(&cmd->t_data_sg[i])); + __free_page(sg_page(&sg[i])); } - kfree(cmd->t_data_sg); - cmd->t_data_sg = NULL; + kfree(sg); return -ENOMEM; } @@ -2087,7 +2174,27 @@ transport_generic_new_cmd(struct se_cmd *cmd) */ if (!(cmd->se_cmd_flags & SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC) && cmd->data_length) { - ret = transport_generic_get_mem(cmd); + bool zero_flag = !(cmd->se_cmd_flags & SCF_SCSI_DATA_CDB); + + if ((cmd->se_cmd_flags & SCF_BIDI) || + (cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE)) { + u32 bidi_length; + + if (cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) + bidi_length = cmd->t_task_nolb * + cmd->se_dev->dev_attrib.block_size; + else + bidi_length = cmd->data_length; + + ret = target_alloc_sgl(&cmd->t_bidi_data_sg, + &cmd->t_bidi_data_nents, + bidi_length, zero_flag); + if (ret < 0) + return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + } + + ret = target_alloc_sgl(&cmd->t_data_sg, &cmd->t_data_nents, + cmd->data_length, zero_flag); if (ret < 0) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } @@ -2740,6 +2847,15 @@ transport_send_check_condition_and_sense(struct se_cmd *cmd, buffer[SPC_ASC_KEY_OFFSET] = asc; buffer[SPC_ASCQ_KEY_OFFSET] = ascq; break; + case TCM_MISCOMPARE_VERIFY: + /* CURRENT ERROR */ + buffer[0] = 0x70; + buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; + buffer[SPC_SENSE_KEY_OFFSET] = MISCOMPARE; + /* MISCOMPARE DURING VERIFY OPERATION */ + buffer[SPC_ASC_KEY_OFFSET] = 0x1d; + buffer[SPC_ASCQ_KEY_OFFSET] = 0x00; + break; case TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE: default: /* CURRENT ERROR */ diff --git a/drivers/target/target_core_ua.c b/drivers/target/target_core_ua.c index bf0e390ce2d7..b04467e7547c 100644 --- a/drivers/target/target_core_ua.c +++ b/drivers/target/target_core_ua.c @@ -3,7 +3,7 @@ * * This file contains logic for SPC-3 Unit Attention emulation * - * (c) Copyright 2009-2012 RisingTide Systems LLC. + * (c) Copyright 2009-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c new file mode 100644 index 000000000000..4d22e7d2adca --- /dev/null +++ b/drivers/target/target_core_xcopy.c @@ -0,0 +1,1081 @@ +/******************************************************************************* + * Filename: target_core_xcopy.c + * + * This file contains support for SPC-4 Extended-Copy offload with generic + * TCM backends. + * + * Copyright (c) 2011-2013 Datera, Inc. All rights reserved. + * + * Author: + * Nicholas A. Bellinger <nab@daterainc.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + ******************************************************************************/ + +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/list.h> +#include <linux/configfs.h> +#include <scsi/scsi.h> +#include <scsi/scsi_cmnd.h> +#include <asm/unaligned.h> + +#include <target/target_core_base.h> +#include <target/target_core_backend.h> +#include <target/target_core_fabric.h> +#include <target/target_core_configfs.h> + +#include "target_core_pr.h" +#include "target_core_ua.h" +#include "target_core_xcopy.h" + +static struct workqueue_struct *xcopy_wq = NULL; +/* + * From target_core_spc.c + */ +extern void spc_parse_naa_6h_vendor_specific(struct se_device *, unsigned char *); +/* + * From target_core_device.c + */ +extern struct mutex g_device_mutex; +extern struct list_head g_device_list; +/* + * From target_core_configfs.c + */ +extern struct configfs_subsystem *target_core_subsystem[]; + +static int target_xcopy_gen_naa_ieee(struct se_device *dev, unsigned char *buf) +{ + int off = 0; + + buf[off++] = (0x6 << 4); + buf[off++] = 0x01; + buf[off++] = 0x40; + buf[off] = (0x5 << 4); + + spc_parse_naa_6h_vendor_specific(dev, &buf[off]); + return 0; +} + +static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op *xop, + bool src) +{ + struct se_device *se_dev; + struct configfs_subsystem *subsys = target_core_subsystem[0]; + unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN], *dev_wwn; + int rc; + + if (src == true) + dev_wwn = &xop->dst_tid_wwn[0]; + else + dev_wwn = &xop->src_tid_wwn[0]; + + mutex_lock(&g_device_mutex); + list_for_each_entry(se_dev, &g_device_list, g_dev_node) { + + memset(&tmp_dev_wwn[0], 0, XCOPY_NAA_IEEE_REGEX_LEN); + target_xcopy_gen_naa_ieee(se_dev, &tmp_dev_wwn[0]); + + rc = memcmp(&tmp_dev_wwn[0], dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN); + if (rc != 0) + continue; + + if (src == true) { + xop->dst_dev = se_dev; + pr_debug("XCOPY 0xe4: Setting xop->dst_dev: %p from located" + " se_dev\n", xop->dst_dev); + } else { + xop->src_dev = se_dev; + pr_debug("XCOPY 0xe4: Setting xop->src_dev: %p from located" + " se_dev\n", xop->src_dev); + } + + rc = configfs_depend_item(subsys, + &se_dev->dev_group.cg_item); + if (rc != 0) { + pr_err("configfs_depend_item attempt failed:" + " %d for se_dev: %p\n", rc, se_dev); + mutex_unlock(&g_device_mutex); + return rc; + } + + pr_debug("Called configfs_depend_item for subsys: %p se_dev: %p" + " se_dev->se_dev_group: %p\n", subsys, se_dev, + &se_dev->dev_group); + + mutex_unlock(&g_device_mutex); + return 0; + } + mutex_unlock(&g_device_mutex); + + pr_err("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n"); + return -EINVAL; +} + +static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op *xop, + unsigned char *p, bool src) +{ + unsigned char *desc = p; + unsigned short ript; + u8 desig_len; + /* + * Extract RELATIVE INITIATOR PORT IDENTIFIER + */ + ript = get_unaligned_be16(&desc[2]); + pr_debug("XCOPY 0xe4: RELATIVE INITIATOR PORT IDENTIFIER: %hu\n", ript); + /* + * Check for supported code set, association, and designator type + */ + if ((desc[4] & 0x0f) != 0x1) { + pr_err("XCOPY 0xe4: code set of non binary type not supported\n"); + return -EINVAL; + } + if ((desc[5] & 0x30) != 0x00) { + pr_err("XCOPY 0xe4: association other than LUN not supported\n"); + return -EINVAL; + } + if ((desc[5] & 0x0f) != 0x3) { + pr_err("XCOPY 0xe4: designator type unsupported: 0x%02x\n", + (desc[5] & 0x0f)); + return -EINVAL; + } + /* + * Check for matching 16 byte length for NAA IEEE Registered Extended + * Assigned designator + */ + desig_len = desc[7]; + if (desig_len != 16) { + pr_err("XCOPY 0xe4: invalid desig_len: %d\n", (int)desig_len); + return -EINVAL; + } + pr_debug("XCOPY 0xe4: desig_len: %d\n", (int)desig_len); + /* + * Check for NAA IEEE Registered Extended Assigned header.. + */ + if ((desc[8] & 0xf0) != 0x60) { + pr_err("XCOPY 0xe4: Unsupported DESIGNATOR TYPE: 0x%02x\n", + (desc[8] & 0xf0)); + return -EINVAL; + } + + if (src == true) { + memcpy(&xop->src_tid_wwn[0], &desc[8], XCOPY_NAA_IEEE_REGEX_LEN); + /* + * Determine if the source designator matches the local device + */ + if (!memcmp(&xop->local_dev_wwn[0], &xop->src_tid_wwn[0], + XCOPY_NAA_IEEE_REGEX_LEN)) { + xop->op_origin = XCOL_SOURCE_RECV_OP; + xop->src_dev = se_cmd->se_dev; + pr_debug("XCOPY 0xe4: Set xop->src_dev %p from source" + " received xop\n", xop->src_dev); + } + } else { + memcpy(&xop->dst_tid_wwn[0], &desc[8], XCOPY_NAA_IEEE_REGEX_LEN); + /* + * Determine if the destination designator matches the local device + */ + if (!memcmp(&xop->local_dev_wwn[0], &xop->dst_tid_wwn[0], + XCOPY_NAA_IEEE_REGEX_LEN)) { + xop->op_origin = XCOL_DEST_RECV_OP; + xop->dst_dev = se_cmd->se_dev; + pr_debug("XCOPY 0xe4: Set xop->dst_dev: %p from destination" + " received xop\n", xop->dst_dev); + } + } + + return 0; +} + +static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd, + struct xcopy_op *xop, unsigned char *p, + unsigned short tdll) +{ + struct se_device *local_dev = se_cmd->se_dev; + unsigned char *desc = p; + int offset = tdll % XCOPY_TARGET_DESC_LEN, rc, ret = 0; + unsigned short start = 0; + bool src = true; + + if (offset != 0) { + pr_err("XCOPY target descriptor list length is not" + " multiple of %d\n", XCOPY_TARGET_DESC_LEN); + return -EINVAL; + } + if (tdll > 64) { + pr_err("XCOPY target descriptor supports a maximum" + " two src/dest descriptors, tdll: %hu too large..\n", tdll); + return -EINVAL; + } + /* + * Generate an IEEE Registered Extended designator based upon the + * se_device the XCOPY was received upon.. + */ + memset(&xop->local_dev_wwn[0], 0, XCOPY_NAA_IEEE_REGEX_LEN); + target_xcopy_gen_naa_ieee(local_dev, &xop->local_dev_wwn[0]); + + while (start < tdll) { + /* + * Check target descriptor identification with 0xE4 type with + * use VPD 0x83 WWPN matching .. + */ + switch (desc[0]) { + case 0xe4: + rc = target_xcopy_parse_tiddesc_e4(se_cmd, xop, + &desc[0], src); + if (rc != 0) + goto out; + /* + * Assume target descriptors are in source -> destination order.. + */ + if (src == true) + src = false; + else + src = true; + start += XCOPY_TARGET_DESC_LEN; + desc += XCOPY_TARGET_DESC_LEN; + ret++; + break; + default: + pr_err("XCOPY unsupported descriptor type code:" + " 0x%02x\n", desc[0]); + goto out; + } + } + + if (xop->op_origin == XCOL_SOURCE_RECV_OP) + rc = target_xcopy_locate_se_dev_e4(se_cmd, xop, true); + else + rc = target_xcopy_locate_se_dev_e4(se_cmd, xop, false); + + if (rc < 0) + goto out; + + pr_debug("XCOPY TGT desc: Source dev: %p NAA IEEE WWN: 0x%16phN\n", + xop->src_dev, &xop->src_tid_wwn[0]); + pr_debug("XCOPY TGT desc: Dest dev: %p NAA IEEE WWN: 0x%16phN\n", + xop->dst_dev, &xop->dst_tid_wwn[0]); + + return ret; + +out: + return -EINVAL; +} + +static int target_xcopy_parse_segdesc_02(struct se_cmd *se_cmd, struct xcopy_op *xop, + unsigned char *p) +{ + unsigned char *desc = p; + int dc = (desc[1] & 0x02); + unsigned short desc_len; + + desc_len = get_unaligned_be16(&desc[2]); + if (desc_len != 0x18) { + pr_err("XCOPY segment desc 0x02: Illegal desc_len:" + " %hu\n", desc_len); + return -EINVAL; + } + + xop->stdi = get_unaligned_be16(&desc[4]); + xop->dtdi = get_unaligned_be16(&desc[6]); + pr_debug("XCOPY seg desc 0x02: desc_len: %hu stdi: %hu dtdi: %hu, DC: %d\n", + desc_len, xop->stdi, xop->dtdi, dc); + + xop->nolb = get_unaligned_be16(&desc[10]); + xop->src_lba = get_unaligned_be64(&desc[12]); + xop->dst_lba = get_unaligned_be64(&desc[20]); + pr_debug("XCOPY seg desc 0x02: nolb: %hu src_lba: %llu dst_lba: %llu\n", + xop->nolb, (unsigned long long)xop->src_lba, + (unsigned long long)xop->dst_lba); + + if (dc != 0) { + xop->dbl = (desc[29] << 16) & 0xff; + xop->dbl |= (desc[30] << 8) & 0xff; + xop->dbl |= desc[31] & 0xff; + + pr_debug("XCOPY seg desc 0x02: DC=1 w/ dbl: %u\n", xop->dbl); + } + return 0; +} + +static int target_xcopy_parse_segment_descriptors(struct se_cmd *se_cmd, + struct xcopy_op *xop, unsigned char *p, + unsigned int sdll) +{ + unsigned char *desc = p; + unsigned int start = 0; + int offset = sdll % XCOPY_SEGMENT_DESC_LEN, rc, ret = 0; + + if (offset != 0) { + pr_err("XCOPY segment descriptor list length is not" + " multiple of %d\n", XCOPY_SEGMENT_DESC_LEN); + return -EINVAL; + } + + while (start < sdll) { + /* + * Check segment descriptor type code for block -> block + */ + switch (desc[0]) { + case 0x02: + rc = target_xcopy_parse_segdesc_02(se_cmd, xop, desc); + if (rc < 0) + goto out; + + ret++; + start += XCOPY_SEGMENT_DESC_LEN; + desc += XCOPY_SEGMENT_DESC_LEN; + break; + default: + pr_err("XCOPY unspported segment descriptor" + "type: 0x%02x\n", desc[0]); + goto out; + } + } + + return ret; + +out: + return -EINVAL; +} + +/* + * Start xcopy_pt ops + */ + +struct xcopy_pt_cmd { + bool remote_port; + struct se_cmd se_cmd; + struct xcopy_op *xcopy_op; + struct completion xpt_passthrough_sem; +}; + +static struct se_port xcopy_pt_port; +static struct se_portal_group xcopy_pt_tpg; +static struct se_session xcopy_pt_sess; +static struct se_node_acl xcopy_pt_nacl; + +static char *xcopy_pt_get_fabric_name(void) +{ + return "xcopy-pt"; +} + +static u32 xcopy_pt_get_tag(struct se_cmd *se_cmd) +{ + return 0; +} + +static int xcopy_pt_get_cmd_state(struct se_cmd *se_cmd) +{ + return 0; +} + +static void xcopy_pt_undepend_remotedev(struct xcopy_op *xop) +{ + struct configfs_subsystem *subsys = target_core_subsystem[0]; + struct se_device *remote_dev; + + if (xop->op_origin == XCOL_SOURCE_RECV_OP) + remote_dev = xop->dst_dev; + else + remote_dev = xop->src_dev; + + pr_debug("Calling configfs_undepend_item for subsys: %p" + " remote_dev: %p remote_dev->dev_group: %p\n", + subsys, remote_dev, &remote_dev->dev_group.cg_item); + + configfs_undepend_item(subsys, &remote_dev->dev_group.cg_item); +} + +static void xcopy_pt_release_cmd(struct se_cmd *se_cmd) +{ + struct xcopy_pt_cmd *xpt_cmd = container_of(se_cmd, + struct xcopy_pt_cmd, se_cmd); + + if (xpt_cmd->remote_port) + kfree(se_cmd->se_lun); + + kfree(xpt_cmd); +} + +static int xcopy_pt_check_stop_free(struct se_cmd *se_cmd) +{ + struct xcopy_pt_cmd *xpt_cmd = container_of(se_cmd, + struct xcopy_pt_cmd, se_cmd); + + complete(&xpt_cmd->xpt_passthrough_sem); + return 0; +} + +static int xcopy_pt_write_pending(struct se_cmd *se_cmd) +{ + return 0; +} + +static int xcopy_pt_write_pending_status(struct se_cmd *se_cmd) +{ + return 0; +} + +static int xcopy_pt_queue_data_in(struct se_cmd *se_cmd) +{ + return 0; +} + +static int xcopy_pt_queue_status(struct se_cmd *se_cmd) +{ + return 0; +} + +static struct target_core_fabric_ops xcopy_pt_tfo = { + .get_fabric_name = xcopy_pt_get_fabric_name, + .get_task_tag = xcopy_pt_get_tag, + .get_cmd_state = xcopy_pt_get_cmd_state, + .release_cmd = xcopy_pt_release_cmd, + .check_stop_free = xcopy_pt_check_stop_free, + .write_pending = xcopy_pt_write_pending, + .write_pending_status = xcopy_pt_write_pending_status, + .queue_data_in = xcopy_pt_queue_data_in, + .queue_status = xcopy_pt_queue_status, +}; + +/* + * End xcopy_pt_ops + */ + +int target_xcopy_setup_pt(void) +{ + xcopy_wq = alloc_workqueue("xcopy_wq", WQ_MEM_RECLAIM, 0); + if (!xcopy_wq) { + pr_err("Unable to allocate xcopy_wq\n"); + return -ENOMEM; + } + + memset(&xcopy_pt_port, 0, sizeof(struct se_port)); + INIT_LIST_HEAD(&xcopy_pt_port.sep_alua_list); + INIT_LIST_HEAD(&xcopy_pt_port.sep_list); + mutex_init(&xcopy_pt_port.sep_tg_pt_md_mutex); + + memset(&xcopy_pt_tpg, 0, sizeof(struct se_portal_group)); + INIT_LIST_HEAD(&xcopy_pt_tpg.se_tpg_node); + INIT_LIST_HEAD(&xcopy_pt_tpg.acl_node_list); + INIT_LIST_HEAD(&xcopy_pt_tpg.tpg_sess_list); + + xcopy_pt_port.sep_tpg = &xcopy_pt_tpg; + xcopy_pt_tpg.se_tpg_tfo = &xcopy_pt_tfo; + + memset(&xcopy_pt_nacl, 0, sizeof(struct se_node_acl)); + INIT_LIST_HEAD(&xcopy_pt_nacl.acl_list); + INIT_LIST_HEAD(&xcopy_pt_nacl.acl_sess_list); + memset(&xcopy_pt_sess, 0, sizeof(struct se_session)); + INIT_LIST_HEAD(&xcopy_pt_sess.sess_list); + INIT_LIST_HEAD(&xcopy_pt_sess.sess_acl_list); + + xcopy_pt_nacl.se_tpg = &xcopy_pt_tpg; + xcopy_pt_nacl.nacl_sess = &xcopy_pt_sess; + + xcopy_pt_sess.se_tpg = &xcopy_pt_tpg; + xcopy_pt_sess.se_node_acl = &xcopy_pt_nacl; + + return 0; +} + +void target_xcopy_release_pt(void) +{ + if (xcopy_wq) + destroy_workqueue(xcopy_wq); +} + +static void target_xcopy_setup_pt_port( + struct xcopy_pt_cmd *xpt_cmd, + struct xcopy_op *xop, + bool remote_port) +{ + struct se_cmd *ec_cmd = xop->xop_se_cmd; + struct se_cmd *pt_cmd = &xpt_cmd->se_cmd; + + if (xop->op_origin == XCOL_SOURCE_RECV_OP) { + /* + * Honor destination port reservations for X-COPY PUSH emulation + * when CDB is received on local source port, and READs blocks to + * WRITE on remote destination port. + */ + if (remote_port) { + xpt_cmd->remote_port = remote_port; + pt_cmd->se_lun->lun_sep = &xcopy_pt_port; + pr_debug("Setup emulated remote DEST xcopy_pt_port: %p to" + " cmd->se_lun->lun_sep for X-COPY data PUSH\n", + pt_cmd->se_lun->lun_sep); + } else { + pt_cmd->se_lun = ec_cmd->se_lun; + pt_cmd->se_dev = ec_cmd->se_dev; + + pr_debug("Honoring local SRC port from ec_cmd->se_dev:" + " %p\n", pt_cmd->se_dev); + pt_cmd->se_lun = ec_cmd->se_lun; + pr_debug("Honoring local SRC port from ec_cmd->se_lun: %p\n", + pt_cmd->se_lun); + } + } else { + /* + * Honor source port reservation for X-COPY PULL emulation + * when CDB is received on local desintation port, and READs + * blocks from the remote source port to WRITE on local + * destination port. + */ + if (remote_port) { + xpt_cmd->remote_port = remote_port; + pt_cmd->se_lun->lun_sep = &xcopy_pt_port; + pr_debug("Setup emulated remote SRC xcopy_pt_port: %p to" + " cmd->se_lun->lun_sep for X-COPY data PULL\n", + pt_cmd->se_lun->lun_sep); + } else { + pt_cmd->se_lun = ec_cmd->se_lun; + pt_cmd->se_dev = ec_cmd->se_dev; + + pr_debug("Honoring local DST port from ec_cmd->se_dev:" + " %p\n", pt_cmd->se_dev); + pt_cmd->se_lun = ec_cmd->se_lun; + pr_debug("Honoring local DST port from ec_cmd->se_lun: %p\n", + pt_cmd->se_lun); + } + } +} + +static int target_xcopy_init_pt_lun( + struct xcopy_pt_cmd *xpt_cmd, + struct xcopy_op *xop, + struct se_device *se_dev, + struct se_cmd *pt_cmd, + bool remote_port) +{ + /* + * Don't allocate + init an pt_cmd->se_lun if honoring local port for + * reservations. The pt_cmd->se_lun pointer will be setup from within + * target_xcopy_setup_pt_port() + */ + if (remote_port == false) { + pt_cmd->se_cmd_flags |= SCF_SE_LUN_CMD | SCF_CMD_XCOPY_PASSTHROUGH; + return 0; + } + + pt_cmd->se_lun = kzalloc(sizeof(struct se_lun), GFP_KERNEL); + if (!pt_cmd->se_lun) { + pr_err("Unable to allocate pt_cmd->se_lun\n"); + return -ENOMEM; + } + init_completion(&pt_cmd->se_lun->lun_shutdown_comp); + INIT_LIST_HEAD(&pt_cmd->se_lun->lun_cmd_list); + INIT_LIST_HEAD(&pt_cmd->se_lun->lun_acl_list); + spin_lock_init(&pt_cmd->se_lun->lun_acl_lock); + spin_lock_init(&pt_cmd->se_lun->lun_cmd_lock); + spin_lock_init(&pt_cmd->se_lun->lun_sep_lock); + + pt_cmd->se_dev = se_dev; + + pr_debug("Setup emulated se_dev: %p from se_dev\n", pt_cmd->se_dev); + pt_cmd->se_lun->lun_se_dev = se_dev; + pt_cmd->se_cmd_flags |= SCF_SE_LUN_CMD | SCF_CMD_XCOPY_PASSTHROUGH; + + pr_debug("Setup emulated se_dev: %p to pt_cmd->se_lun->lun_se_dev\n", + pt_cmd->se_lun->lun_se_dev); + + return 0; +} + +static int target_xcopy_setup_pt_cmd( + struct xcopy_pt_cmd *xpt_cmd, + struct xcopy_op *xop, + struct se_device *se_dev, + unsigned char *cdb, + bool remote_port, + bool alloc_mem) +{ + struct se_cmd *cmd = &xpt_cmd->se_cmd; + sense_reason_t sense_rc; + int ret = 0, rc; + /* + * Setup LUN+port to honor reservations based upon xop->op_origin for + * X-COPY PUSH or X-COPY PULL based upon where the CDB was received. + */ + rc = target_xcopy_init_pt_lun(xpt_cmd, xop, se_dev, cmd, remote_port); + if (rc < 0) { + ret = rc; + goto out; + } + xpt_cmd->xcopy_op = xop; + target_xcopy_setup_pt_port(xpt_cmd, xop, remote_port); + + sense_rc = target_setup_cmd_from_cdb(cmd, cdb); + if (sense_rc) { + ret = -EINVAL; + goto out; + } + + if (alloc_mem) { + rc = target_alloc_sgl(&cmd->t_data_sg, &cmd->t_data_nents, + cmd->data_length, false); + if (rc < 0) { + ret = rc; + goto out; + } + /* + * Set this bit so that transport_free_pages() allows the + * caller to release SGLs + physical memory allocated by + * transport_generic_get_mem().. + */ + cmd->se_cmd_flags |= SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC; + } else { + /* + * Here the previously allocated SGLs for the internal READ + * are mapped zero-copy to the internal WRITE. + */ + sense_rc = transport_generic_map_mem_to_cmd(cmd, + xop->xop_data_sg, xop->xop_data_nents, + NULL, 0); + if (sense_rc) { + ret = -EINVAL; + goto out; + } + + pr_debug("Setup PASSTHROUGH_NOALLOC t_data_sg: %p t_data_nents:" + " %u\n", cmd->t_data_sg, cmd->t_data_nents); + } + + return 0; + +out: + if (remote_port == true) + kfree(cmd->se_lun); + return ret; +} + +static int target_xcopy_issue_pt_cmd(struct xcopy_pt_cmd *xpt_cmd) +{ + struct se_cmd *se_cmd = &xpt_cmd->se_cmd; + sense_reason_t sense_rc; + + sense_rc = transport_generic_new_cmd(se_cmd); + if (sense_rc) + return -EINVAL; + + if (se_cmd->data_direction == DMA_TO_DEVICE) + target_execute_cmd(se_cmd); + + wait_for_completion_interruptible(&xpt_cmd->xpt_passthrough_sem); + + pr_debug("target_xcopy_issue_pt_cmd(): SCSI status: 0x%02x\n", + se_cmd->scsi_status); + return 0; +} + +static int target_xcopy_read_source( + struct se_cmd *ec_cmd, + struct xcopy_op *xop, + struct se_device *src_dev, + sector_t src_lba, + u32 src_sectors) +{ + struct xcopy_pt_cmd *xpt_cmd; + struct se_cmd *se_cmd; + u32 length = (src_sectors * src_dev->dev_attrib.block_size); + int rc; + unsigned char cdb[16]; + bool remote_port = (xop->op_origin == XCOL_DEST_RECV_OP); + + xpt_cmd = kzalloc(sizeof(struct xcopy_pt_cmd), GFP_KERNEL); + if (!xpt_cmd) { + pr_err("Unable to allocate xcopy_pt_cmd\n"); + return -ENOMEM; + } + init_completion(&xpt_cmd->xpt_passthrough_sem); + se_cmd = &xpt_cmd->se_cmd; + + memset(&cdb[0], 0, 16); + cdb[0] = READ_16; + put_unaligned_be64(src_lba, &cdb[2]); + put_unaligned_be32(src_sectors, &cdb[10]); + pr_debug("XCOPY: Built READ_16: LBA: %llu Sectors: %u Length: %u\n", + (unsigned long long)src_lba, src_sectors, length); + + transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, NULL, length, + DMA_FROM_DEVICE, 0, NULL); + xop->src_pt_cmd = xpt_cmd; + + rc = target_xcopy_setup_pt_cmd(xpt_cmd, xop, src_dev, &cdb[0], + remote_port, true); + if (rc < 0) { + transport_generic_free_cmd(se_cmd, 0); + return rc; + } + + xop->xop_data_sg = se_cmd->t_data_sg; + xop->xop_data_nents = se_cmd->t_data_nents; + pr_debug("XCOPY-READ: Saved xop->xop_data_sg: %p, num: %u for READ" + " memory\n", xop->xop_data_sg, xop->xop_data_nents); + + rc = target_xcopy_issue_pt_cmd(xpt_cmd); + if (rc < 0) { + transport_generic_free_cmd(se_cmd, 0); + return rc; + } + /* + * Clear off the allocated t_data_sg, that has been saved for + * zero-copy WRITE submission reuse in struct xcopy_op.. + */ + se_cmd->t_data_sg = NULL; + se_cmd->t_data_nents = 0; + + return 0; +} + +static int target_xcopy_write_destination( + struct se_cmd *ec_cmd, + struct xcopy_op *xop, + struct se_device *dst_dev, + sector_t dst_lba, + u32 dst_sectors) +{ + struct xcopy_pt_cmd *xpt_cmd; + struct se_cmd *se_cmd; + u32 length = (dst_sectors * dst_dev->dev_attrib.block_size); + int rc; + unsigned char cdb[16]; + bool remote_port = (xop->op_origin == XCOL_SOURCE_RECV_OP); + + xpt_cmd = kzalloc(sizeof(struct xcopy_pt_cmd), GFP_KERNEL); + if (!xpt_cmd) { + pr_err("Unable to allocate xcopy_pt_cmd\n"); + return -ENOMEM; + } + init_completion(&xpt_cmd->xpt_passthrough_sem); + se_cmd = &xpt_cmd->se_cmd; + + memset(&cdb[0], 0, 16); + cdb[0] = WRITE_16; + put_unaligned_be64(dst_lba, &cdb[2]); + put_unaligned_be32(dst_sectors, &cdb[10]); + pr_debug("XCOPY: Built WRITE_16: LBA: %llu Sectors: %u Length: %u\n", + (unsigned long long)dst_lba, dst_sectors, length); + + transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, NULL, length, + DMA_TO_DEVICE, 0, NULL); + xop->dst_pt_cmd = xpt_cmd; + + rc = target_xcopy_setup_pt_cmd(xpt_cmd, xop, dst_dev, &cdb[0], + remote_port, false); + if (rc < 0) { + struct se_cmd *src_cmd = &xop->src_pt_cmd->se_cmd; + /* + * If the failure happened before the t_mem_list hand-off in + * target_xcopy_setup_pt_cmd(), Reset memory + clear flag so that + * core releases this memory on error during X-COPY WRITE I/O. + */ + src_cmd->se_cmd_flags &= ~SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC; + src_cmd->t_data_sg = xop->xop_data_sg; + src_cmd->t_data_nents = xop->xop_data_nents; + + transport_generic_free_cmd(se_cmd, 0); + return rc; + } + + rc = target_xcopy_issue_pt_cmd(xpt_cmd); + if (rc < 0) { + se_cmd->se_cmd_flags &= ~SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC; + transport_generic_free_cmd(se_cmd, 0); + return rc; + } + + return 0; +} + +static void target_xcopy_do_work(struct work_struct *work) +{ + struct xcopy_op *xop = container_of(work, struct xcopy_op, xop_work); + struct se_device *src_dev = xop->src_dev, *dst_dev = xop->dst_dev; + struct se_cmd *ec_cmd = xop->xop_se_cmd; + sector_t src_lba = xop->src_lba, dst_lba = xop->dst_lba, end_lba; + unsigned int max_sectors; + int rc; + unsigned short nolb = xop->nolb, cur_nolb, max_nolb, copied_nolb = 0; + + end_lba = src_lba + nolb; + /* + * Break up XCOPY I/O into hw_max_sectors sized I/O based on the + * smallest max_sectors between src_dev + dev_dev, or + */ + max_sectors = min(src_dev->dev_attrib.hw_max_sectors, + dst_dev->dev_attrib.hw_max_sectors); + max_sectors = min_t(u32, max_sectors, XCOPY_MAX_SECTORS); + + max_nolb = min_t(u16, max_sectors, ((u16)(~0U))); + + pr_debug("target_xcopy_do_work: nolb: %hu, max_nolb: %hu end_lba: %llu\n", + nolb, max_nolb, (unsigned long long)end_lba); + pr_debug("target_xcopy_do_work: Starting src_lba: %llu, dst_lba: %llu\n", + (unsigned long long)src_lba, (unsigned long long)dst_lba); + + while (src_lba < end_lba) { + cur_nolb = min(nolb, max_nolb); + + pr_debug("target_xcopy_do_work: Calling read src_dev: %p src_lba: %llu," + " cur_nolb: %hu\n", src_dev, (unsigned long long)src_lba, cur_nolb); + + rc = target_xcopy_read_source(ec_cmd, xop, src_dev, src_lba, cur_nolb); + if (rc < 0) + goto out; + + src_lba += cur_nolb; + pr_debug("target_xcopy_do_work: Incremented READ src_lba to %llu\n", + (unsigned long long)src_lba); + + pr_debug("target_xcopy_do_work: Calling write dst_dev: %p dst_lba: %llu," + " cur_nolb: %hu\n", dst_dev, (unsigned long long)dst_lba, cur_nolb); + + rc = target_xcopy_write_destination(ec_cmd, xop, dst_dev, + dst_lba, cur_nolb); + if (rc < 0) { + transport_generic_free_cmd(&xop->src_pt_cmd->se_cmd, 0); + goto out; + } + + dst_lba += cur_nolb; + pr_debug("target_xcopy_do_work: Incremented WRITE dst_lba to %llu\n", + (unsigned long long)dst_lba); + + copied_nolb += cur_nolb; + nolb -= cur_nolb; + + transport_generic_free_cmd(&xop->src_pt_cmd->se_cmd, 0); + xop->dst_pt_cmd->se_cmd.se_cmd_flags &= ~SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC; + + transport_generic_free_cmd(&xop->dst_pt_cmd->se_cmd, 0); + } + + xcopy_pt_undepend_remotedev(xop); + kfree(xop); + + pr_debug("target_xcopy_do_work: Final src_lba: %llu, dst_lba: %llu\n", + (unsigned long long)src_lba, (unsigned long long)dst_lba); + pr_debug("target_xcopy_do_work: Blocks copied: %hu, Bytes Copied: %u\n", + copied_nolb, copied_nolb * dst_dev->dev_attrib.block_size); + + pr_debug("target_xcopy_do_work: Setting X-COPY GOOD status -> sending response\n"); + target_complete_cmd(ec_cmd, SAM_STAT_GOOD); + return; + +out: + xcopy_pt_undepend_remotedev(xop); + kfree(xop); + + pr_warn("target_xcopy_do_work: Setting X-COPY CHECK_CONDITION -> sending response\n"); + ec_cmd->scsi_status = SAM_STAT_CHECK_CONDITION; + target_complete_cmd(ec_cmd, SAM_STAT_CHECK_CONDITION); +} + +sense_reason_t target_do_xcopy(struct se_cmd *se_cmd) +{ + struct xcopy_op *xop = NULL; + unsigned char *p = NULL, *seg_desc; + unsigned int list_id, list_id_usage, sdll, inline_dl, sa; + int rc; + unsigned short tdll; + + sa = se_cmd->t_task_cdb[1] & 0x1f; + if (sa != 0x00) { + pr_err("EXTENDED_COPY(LID4) not supported\n"); + return TCM_UNSUPPORTED_SCSI_OPCODE; + } + + p = transport_kmap_data_sg(se_cmd); + if (!p) { + pr_err("transport_kmap_data_sg() failed in target_do_xcopy\n"); + return TCM_OUT_OF_RESOURCES; + } + + list_id = p[0]; + if (list_id != 0x00) { + pr_err("XCOPY with non zero list_id: 0x%02x\n", list_id); + goto out; + } + list_id_usage = (p[1] & 0x18); + /* + * Determine TARGET DESCRIPTOR LIST LENGTH + SEGMENT DESCRIPTOR LIST LENGTH + */ + tdll = get_unaligned_be16(&p[2]); + sdll = get_unaligned_be32(&p[8]); + + inline_dl = get_unaligned_be32(&p[12]); + if (inline_dl != 0) { + pr_err("XCOPY with non zero inline data length\n"); + goto out; + } + + xop = kzalloc(sizeof(struct xcopy_op), GFP_KERNEL); + if (!xop) { + pr_err("Unable to allocate xcopy_op\n"); + goto out; + } + xop->xop_se_cmd = se_cmd; + + pr_debug("Processing XCOPY with list_id: 0x%02x list_id_usage: 0x%02x" + " tdll: %hu sdll: %u inline_dl: %u\n", list_id, list_id_usage, + tdll, sdll, inline_dl); + + rc = target_xcopy_parse_target_descriptors(se_cmd, xop, &p[16], tdll); + if (rc <= 0) + goto out; + + pr_debug("XCOPY: Processed %d target descriptors, length: %u\n", rc, + rc * XCOPY_TARGET_DESC_LEN); + seg_desc = &p[16]; + seg_desc += (rc * XCOPY_TARGET_DESC_LEN); + + rc = target_xcopy_parse_segment_descriptors(se_cmd, xop, seg_desc, sdll); + if (rc <= 0) { + xcopy_pt_undepend_remotedev(xop); + goto out; + } + transport_kunmap_data_sg(se_cmd); + + pr_debug("XCOPY: Processed %d segment descriptors, length: %u\n", rc, + rc * XCOPY_SEGMENT_DESC_LEN); + INIT_WORK(&xop->xop_work, target_xcopy_do_work); + queue_work(xcopy_wq, &xop->xop_work); + return TCM_NO_SENSE; + +out: + if (p) + transport_kunmap_data_sg(se_cmd); + kfree(xop); + return TCM_INVALID_CDB_FIELD; +} + +static sense_reason_t target_rcr_operating_parameters(struct se_cmd *se_cmd) +{ + unsigned char *p; + + p = transport_kmap_data_sg(se_cmd); + if (!p) { + pr_err("transport_kmap_data_sg failed in" + " target_rcr_operating_parameters\n"); + return TCM_OUT_OF_RESOURCES; + } + + if (se_cmd->data_length < 54) { + pr_err("Receive Copy Results Op Parameters length" + " too small: %u\n", se_cmd->data_length); + transport_kunmap_data_sg(se_cmd); + return TCM_INVALID_CDB_FIELD; + } + /* + * Set SNLID=1 (Supports no List ID) + */ + p[4] = 0x1; + /* + * MAXIMUM TARGET DESCRIPTOR COUNT + */ + put_unaligned_be16(RCR_OP_MAX_TARGET_DESC_COUNT, &p[8]); + /* + * MAXIMUM SEGMENT DESCRIPTOR COUNT + */ + put_unaligned_be16(RCR_OP_MAX_SG_DESC_COUNT, &p[10]); + /* + * MAXIMUM DESCRIPTOR LIST LENGTH + */ + put_unaligned_be32(RCR_OP_MAX_DESC_LIST_LEN, &p[12]); + /* + * MAXIMUM SEGMENT LENGTH + */ + put_unaligned_be32(RCR_OP_MAX_SEGMENT_LEN, &p[16]); + /* + * MAXIMUM INLINE DATA LENGTH for SA 0x04 (NOT SUPPORTED) + */ + put_unaligned_be32(0x0, &p[20]); + /* + * HELD DATA LIMIT + */ + put_unaligned_be32(0x0, &p[24]); + /* + * MAXIMUM STREAM DEVICE TRANSFER SIZE + */ + put_unaligned_be32(0x0, &p[28]); + /* + * TOTAL CONCURRENT COPIES + */ + put_unaligned_be16(RCR_OP_TOTAL_CONCURR_COPIES, &p[34]); + /* + * MAXIMUM CONCURRENT COPIES + */ + p[36] = RCR_OP_MAX_CONCURR_COPIES; + /* + * DATA SEGMENT GRANULARITY (log 2) + */ + p[37] = RCR_OP_DATA_SEG_GRAN_LOG2; + /* + * INLINE DATA GRANULARITY log 2) + */ + p[38] = RCR_OP_INLINE_DATA_GRAN_LOG2; + /* + * HELD DATA GRANULARITY + */ + p[39] = RCR_OP_HELD_DATA_GRAN_LOG2; + /* + * IMPLEMENTED DESCRIPTOR LIST LENGTH + */ + p[43] = 0x2; + /* + * List of implemented descriptor type codes (ordered) + */ + p[44] = 0x02; /* Copy Block to Block device */ + p[45] = 0xe4; /* Identification descriptor target descriptor */ + + /* + * AVAILABLE DATA (n-3) + */ + put_unaligned_be32(42, &p[0]); + + transport_kunmap_data_sg(se_cmd); + target_complete_cmd(se_cmd, GOOD); + + return TCM_NO_SENSE; +} + +sense_reason_t target_do_receive_copy_results(struct se_cmd *se_cmd) +{ + unsigned char *cdb = &se_cmd->t_task_cdb[0]; + int sa = (cdb[1] & 0x1f), list_id = cdb[2]; + sense_reason_t rc = TCM_NO_SENSE; + + pr_debug("Entering target_do_receive_copy_results: SA: 0x%02x, List ID:" + " 0x%02x, AL: %u\n", sa, list_id, se_cmd->data_length); + + if (list_id != 0) { + pr_err("Receive Copy Results with non zero list identifier" + " not supported\n"); + return TCM_INVALID_CDB_FIELD; + } + + switch (sa) { + case RCR_SA_OPERATING_PARAMETERS: + rc = target_rcr_operating_parameters(se_cmd); + break; + case RCR_SA_COPY_STATUS: + case RCR_SA_RECEIVE_DATA: + case RCR_SA_FAILED_SEGMENT_DETAILS: + default: + pr_err("Unsupported SA for receive copy results: 0x%02x\n", sa); + return TCM_INVALID_CDB_FIELD; + } + + return rc; +} diff --git a/drivers/target/target_core_xcopy.h b/drivers/target/target_core_xcopy.h new file mode 100644 index 000000000000..700a981c7b41 --- /dev/null +++ b/drivers/target/target_core_xcopy.h @@ -0,0 +1,62 @@ +#define XCOPY_TARGET_DESC_LEN 32 +#define XCOPY_SEGMENT_DESC_LEN 28 +#define XCOPY_NAA_IEEE_REGEX_LEN 16 +#define XCOPY_MAX_SECTORS 1024 + +enum xcopy_origin_list { + XCOL_SOURCE_RECV_OP = 0x01, + XCOL_DEST_RECV_OP = 0x02, +}; + +struct xcopy_pt_cmd; + +struct xcopy_op { + int op_origin; + + struct se_cmd *xop_se_cmd; + struct se_device *src_dev; + unsigned char src_tid_wwn[XCOPY_NAA_IEEE_REGEX_LEN]; + struct se_device *dst_dev; + unsigned char dst_tid_wwn[XCOPY_NAA_IEEE_REGEX_LEN]; + unsigned char local_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN]; + + sector_t src_lba; + sector_t dst_lba; + unsigned short stdi; + unsigned short dtdi; + unsigned short nolb; + unsigned int dbl; + + struct xcopy_pt_cmd *src_pt_cmd; + struct xcopy_pt_cmd *dst_pt_cmd; + + u32 xop_data_nents; + struct scatterlist *xop_data_sg; + struct work_struct xop_work; +}; + +/* + * Receive Copy Results Sevice Actions + */ +#define RCR_SA_COPY_STATUS 0x00 +#define RCR_SA_RECEIVE_DATA 0x01 +#define RCR_SA_OPERATING_PARAMETERS 0x03 +#define RCR_SA_FAILED_SEGMENT_DETAILS 0x04 + +/* + * Receive Copy Results defs for Operating Parameters + */ +#define RCR_OP_MAX_TARGET_DESC_COUNT 0x2 +#define RCR_OP_MAX_SG_DESC_COUNT 0x1 +#define RCR_OP_MAX_DESC_LIST_LEN 1024 +#define RCR_OP_MAX_SEGMENT_LEN 268435456 /* 256 MB */ +#define RCR_OP_TOTAL_CONCURR_COPIES 0x1 /* Must be <= 16384 */ +#define RCR_OP_MAX_CONCURR_COPIES 0x1 /* Must be <= 255 */ +#define RCR_OP_DATA_SEG_GRAN_LOG2 9 /* 512 bytes in log 2 */ +#define RCR_OP_INLINE_DATA_GRAN_LOG2 9 /* 512 bytes in log 2 */ +#define RCR_OP_HELD_DATA_GRAN_LOG2 9 /* 512 bytes in log 2 */ + +extern int target_xcopy_setup_pt(void); +extern void target_xcopy_release_pt(void); +extern sense_reason_t target_do_xcopy(struct se_cmd *); +extern sense_reason_t target_do_receive_copy_results(struct se_cmd *); diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c index b74feb0d5133..4e0050840a72 100644 --- a/drivers/target/tcm_fc/tfc_conf.c +++ b/drivers/target/tcm_fc/tfc_conf.c @@ -311,7 +311,11 @@ static struct se_portal_group *ft_add_tpg( */ if (strstr(name, "tpgt_") != name) return NULL; - if (strict_strtoul(name + 5, 10, &index) || index > UINT_MAX) + + ret = kstrtoul(name + 5, 10, &index); + if (ret) + return NULL; + if (index > UINT_MAX) return NULL; lacl = container_of(wwn, struct ft_lport_acl, fc_lport_wwn); diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index e988c81d763c..dbfc390330ac 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -17,8 +17,17 @@ if THERMAL config THERMAL_HWMON bool + prompt "Expose thermal sensors as hwmon device" depends on HWMON=y || HWMON=THERMAL default y + help + In case a sensor is registered with the thermal + framework, this option will also register it + as a hwmon. The sensor will then have the common + hwmon sysfs interface. + + Say 'Y' here if you want all thermal sensors to + have hwmon sysfs interface too. choice prompt "Default Thermal governor" @@ -91,6 +100,17 @@ config THERMAL_EMULATION because userland can easily disable the thermal policy by simply flooding this sysfs node with low temperature values. +config IMX_THERMAL + tristate "Temperature sensor driver for Freescale i.MX SoCs" + depends on CPU_THERMAL + depends on MFD_SYSCON + depends on OF + help + Support for Temperature Monitor (TEMPMON) found on Freescale i.MX SoCs. + It supports one critical trip point and one passive trip point. The + cpufreq is used as the cooling device to throttle CPUs when the + passive trip is crossed. + config SPEAR_THERMAL bool "SPEAr thermal sensor driver" depends on PLAT_SPEAR @@ -114,14 +134,6 @@ config KIRKWOOD_THERMAL Support for the Kirkwood thermal sensor driver into the Linux thermal framework. Only kirkwood 88F6282 and 88F6283 have this sensor. -config EXYNOS_THERMAL - tristate "Temperature sensor on Samsung EXYNOS" - depends on (ARCH_EXYNOS4 || ARCH_EXYNOS5) - depends on CPU_THERMAL - help - If you say yes here you get support for TMU (Thermal Management - Unit) on SAMSUNG EXYNOS series of SoC. - config DOVE_THERMAL tristate "Temperature sensor on Marvell Dove SoCs" depends on ARCH_DOVE @@ -184,4 +196,9 @@ menu "Texas Instruments thermal drivers" source "drivers/thermal/ti-soc-thermal/Kconfig" endmenu +menu "Samsung thermal drivers" +depends on PLAT_SAMSUNG +source "drivers/thermal/samsung/Kconfig" +endmenu + endif diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index 67184a293e3f..584b36319d51 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile @@ -5,6 +5,9 @@ obj-$(CONFIG_THERMAL) += thermal_sys.o thermal_sys-y += thermal_core.o +# interface to/from other layers providing sensors +thermal_sys-$(CONFIG_THERMAL_HWMON) += thermal_hwmon.o + # governors thermal_sys-$(CONFIG_THERMAL_GOV_FAIR_SHARE) += fair_share.o thermal_sys-$(CONFIG_THERMAL_GOV_STEP_WISE) += step_wise.o @@ -17,10 +20,11 @@ thermal_sys-$(CONFIG_CPU_THERMAL) += cpu_cooling.o obj-$(CONFIG_SPEAR_THERMAL) += spear_thermal.o obj-$(CONFIG_RCAR_THERMAL) += rcar_thermal.o obj-$(CONFIG_KIRKWOOD_THERMAL) += kirkwood_thermal.o -obj-$(CONFIG_EXYNOS_THERMAL) += exynos_thermal.o +obj-y += samsung/ obj-$(CONFIG_DOVE_THERMAL) += dove_thermal.o obj-$(CONFIG_DB8500_THERMAL) += db8500_thermal.o obj-$(CONFIG_ARMADA_THERMAL) += armada_thermal.o +obj-$(CONFIG_IMX_THERMAL) += imx_thermal.o obj-$(CONFIG_DB8500_CPUFREQ_COOLING) += db8500_cpufreq_cooling.o obj-$(CONFIG_INTEL_POWERCLAMP) += intel_powerclamp.o obj-$(CONFIG_X86_PKG_TEMP_THERMAL) += x86_pkg_temp_thermal.o diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 82e15dbb3ac7..d17902886c3f 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -322,6 +322,8 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb, if (cpumask_test_cpu(policy->cpu, ¬ify_device->allowed_cpus)) max_freq = notify_device->cpufreq_val; + else + return 0; /* Never exceed user_policy.max */ if (max_freq > policy->user_policy.max) @@ -496,8 +498,12 @@ EXPORT_SYMBOL_GPL(cpufreq_cooling_register); */ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) { - struct cpufreq_cooling_device *cpufreq_dev = cdev->devdata; + struct cpufreq_cooling_device *cpufreq_dev; + + if (!cdev) + return; + cpufreq_dev = cdev->devdata; mutex_lock(&cooling_cpufreq_lock); cpufreq_dev_count--; diff --git a/drivers/thermal/exynos_thermal.c b/drivers/thermal/exynos_thermal.c deleted file mode 100644 index 9af4b93c9f86..000000000000 --- a/drivers/thermal/exynos_thermal.c +++ /dev/null @@ -1,1059 +0,0 @@ -/* - * exynos_thermal.c - Samsung EXYNOS TMU (Thermal Management Unit) - * - * Copyright (C) 2011 Samsung Electronics - * Donggeun Kim <dg77.kim@samsung.com> - * Amit Daniel Kachhap <amit.kachhap@linaro.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include <linux/module.h> -#include <linux/err.h> -#include <linux/kernel.h> -#include <linux/slab.h> -#include <linux/platform_device.h> -#include <linux/interrupt.h> -#include <linux/clk.h> -#include <linux/workqueue.h> -#include <linux/sysfs.h> -#include <linux/kobject.h> -#include <linux/io.h> -#include <linux/mutex.h> -#include <linux/platform_data/exynos_thermal.h> -#include <linux/thermal.h> -#include <linux/cpufreq.h> -#include <linux/cpu_cooling.h> -#include <linux/of.h> - -/* Exynos generic registers */ -#define EXYNOS_TMU_REG_TRIMINFO 0x0 -#define EXYNOS_TMU_REG_CONTROL 0x20 -#define EXYNOS_TMU_REG_STATUS 0x28 -#define EXYNOS_TMU_REG_CURRENT_TEMP 0x40 -#define EXYNOS_TMU_REG_INTEN 0x70 -#define EXYNOS_TMU_REG_INTSTAT 0x74 -#define EXYNOS_TMU_REG_INTCLEAR 0x78 - -#define EXYNOS_TMU_TRIM_TEMP_MASK 0xff -#define EXYNOS_TMU_GAIN_SHIFT 8 -#define EXYNOS_TMU_REF_VOLTAGE_SHIFT 24 -#define EXYNOS_TMU_CORE_ON 3 -#define EXYNOS_TMU_CORE_OFF 2 -#define EXYNOS_TMU_DEF_CODE_TO_TEMP_OFFSET 50 - -/* Exynos4210 specific registers */ -#define EXYNOS4210_TMU_REG_THRESHOLD_TEMP 0x44 -#define EXYNOS4210_TMU_REG_TRIG_LEVEL0 0x50 -#define EXYNOS4210_TMU_REG_TRIG_LEVEL1 0x54 -#define EXYNOS4210_TMU_REG_TRIG_LEVEL2 0x58 -#define EXYNOS4210_TMU_REG_TRIG_LEVEL3 0x5C -#define EXYNOS4210_TMU_REG_PAST_TEMP0 0x60 -#define EXYNOS4210_TMU_REG_PAST_TEMP1 0x64 -#define EXYNOS4210_TMU_REG_PAST_TEMP2 0x68 -#define EXYNOS4210_TMU_REG_PAST_TEMP3 0x6C - -#define EXYNOS4210_TMU_TRIG_LEVEL0_MASK 0x1 -#define EXYNOS4210_TMU_TRIG_LEVEL1_MASK 0x10 -#define EXYNOS4210_TMU_TRIG_LEVEL2_MASK 0x100 -#define EXYNOS4210_TMU_TRIG_LEVEL3_MASK 0x1000 -#define EXYNOS4210_TMU_INTCLEAR_VAL 0x1111 - -/* Exynos5250 and Exynos4412 specific registers */ -#define EXYNOS_TMU_TRIMINFO_CON 0x14 -#define EXYNOS_THD_TEMP_RISE 0x50 -#define EXYNOS_THD_TEMP_FALL 0x54 -#define EXYNOS_EMUL_CON 0x80 - -#define EXYNOS_TRIMINFO_RELOAD 0x1 -#define EXYNOS_TMU_CLEAR_RISE_INT 0x111 -#define EXYNOS_TMU_CLEAR_FALL_INT (0x111 << 12) -#define EXYNOS_MUX_ADDR_VALUE 6 -#define EXYNOS_MUX_ADDR_SHIFT 20 -#define EXYNOS_TMU_TRIP_MODE_SHIFT 13 - -#define EFUSE_MIN_VALUE 40 -#define EFUSE_MAX_VALUE 100 - -/* In-kernel thermal framework related macros & definations */ -#define SENSOR_NAME_LEN 16 -#define MAX_TRIP_COUNT 8 -#define MAX_COOLING_DEVICE 4 -#define MAX_THRESHOLD_LEVS 4 - -#define ACTIVE_INTERVAL 500 -#define IDLE_INTERVAL 10000 -#define MCELSIUS 1000 - -#ifdef CONFIG_THERMAL_EMULATION -#define EXYNOS_EMUL_TIME 0x57F0 -#define EXYNOS_EMUL_TIME_SHIFT 16 -#define EXYNOS_EMUL_DATA_SHIFT 8 -#define EXYNOS_EMUL_DATA_MASK 0xFF -#define EXYNOS_EMUL_ENABLE 0x1 -#endif /* CONFIG_THERMAL_EMULATION */ - -/* CPU Zone information */ -#define PANIC_ZONE 4 -#define WARN_ZONE 3 -#define MONITOR_ZONE 2 -#define SAFE_ZONE 1 - -#define GET_ZONE(trip) (trip + 2) -#define GET_TRIP(zone) (zone - 2) - -#define EXYNOS_ZONE_COUNT 3 - -struct exynos_tmu_data { - struct exynos_tmu_platform_data *pdata; - struct resource *mem; - void __iomem *base; - int irq; - enum soc_type soc; - struct work_struct irq_work; - struct mutex lock; - struct clk *clk; - u8 temp_error1, temp_error2; -}; - -struct thermal_trip_point_conf { - int trip_val[MAX_TRIP_COUNT]; - int trip_count; - u8 trigger_falling; -}; - -struct thermal_cooling_conf { - struct freq_clip_table freq_data[MAX_TRIP_COUNT]; - int freq_clip_count; -}; - -struct thermal_sensor_conf { - char name[SENSOR_NAME_LEN]; - int (*read_temperature)(void *data); - int (*write_emul_temp)(void *drv_data, unsigned long temp); - struct thermal_trip_point_conf trip_data; - struct thermal_cooling_conf cooling_data; - void *private_data; -}; - -struct exynos_thermal_zone { - enum thermal_device_mode mode; - struct thermal_zone_device *therm_dev; - struct thermal_cooling_device *cool_dev[MAX_COOLING_DEVICE]; - unsigned int cool_dev_size; - struct platform_device *exynos4_dev; - struct thermal_sensor_conf *sensor_conf; - bool bind; -}; - -static struct exynos_thermal_zone *th_zone; -static void exynos_unregister_thermal(void); -static int exynos_register_thermal(struct thermal_sensor_conf *sensor_conf); - -/* Get mode callback functions for thermal zone */ -static int exynos_get_mode(struct thermal_zone_device *thermal, - enum thermal_device_mode *mode) -{ - if (th_zone) - *mode = th_zone->mode; - return 0; -} - -/* Set mode callback functions for thermal zone */ -static int exynos_set_mode(struct thermal_zone_device *thermal, - enum thermal_device_mode mode) -{ - if (!th_zone->therm_dev) { - pr_notice("thermal zone not registered\n"); - return 0; - } - - mutex_lock(&th_zone->therm_dev->lock); - - if (mode == THERMAL_DEVICE_ENABLED && - !th_zone->sensor_conf->trip_data.trigger_falling) - th_zone->therm_dev->polling_delay = IDLE_INTERVAL; - else - th_zone->therm_dev->polling_delay = 0; - - mutex_unlock(&th_zone->therm_dev->lock); - - th_zone->mode = mode; - thermal_zone_device_update(th_zone->therm_dev); - pr_info("thermal polling set for duration=%d msec\n", - th_zone->therm_dev->polling_delay); - return 0; -} - - -/* Get trip type callback functions for thermal zone */ -static int exynos_get_trip_type(struct thermal_zone_device *thermal, int trip, - enum thermal_trip_type *type) -{ - switch (GET_ZONE(trip)) { - case MONITOR_ZONE: - case WARN_ZONE: - *type = THERMAL_TRIP_ACTIVE; - break; - case PANIC_ZONE: - *type = THERMAL_TRIP_CRITICAL; - break; - default: - return -EINVAL; - } - return 0; -} - -/* Get trip temperature callback functions for thermal zone */ -static int exynos_get_trip_temp(struct thermal_zone_device *thermal, int trip, - unsigned long *temp) -{ - if (trip < GET_TRIP(MONITOR_ZONE) || trip > GET_TRIP(PANIC_ZONE)) - return -EINVAL; - - *temp = th_zone->sensor_conf->trip_data.trip_val[trip]; - /* convert the temperature into millicelsius */ - *temp = *temp * MCELSIUS; - - return 0; -} - -/* Get critical temperature callback functions for thermal zone */ -static int exynos_get_crit_temp(struct thermal_zone_device *thermal, - unsigned long *temp) -{ - int ret; - /* Panic zone */ - ret = exynos_get_trip_temp(thermal, GET_TRIP(PANIC_ZONE), temp); - return ret; -} - -/* Bind callback functions for thermal zone */ -static int exynos_bind(struct thermal_zone_device *thermal, - struct thermal_cooling_device *cdev) -{ - int ret = 0, i, tab_size, level; - struct freq_clip_table *tab_ptr, *clip_data; - struct thermal_sensor_conf *data = th_zone->sensor_conf; - - tab_ptr = (struct freq_clip_table *)data->cooling_data.freq_data; - tab_size = data->cooling_data.freq_clip_count; - - if (tab_ptr == NULL || tab_size == 0) - return -EINVAL; - - /* find the cooling device registered*/ - for (i = 0; i < th_zone->cool_dev_size; i++) - if (cdev == th_zone->cool_dev[i]) - break; - - /* No matching cooling device */ - if (i == th_zone->cool_dev_size) - return 0; - - /* Bind the thermal zone to the cpufreq cooling device */ - for (i = 0; i < tab_size; i++) { - clip_data = (struct freq_clip_table *)&(tab_ptr[i]); - level = cpufreq_cooling_get_level(0, clip_data->freq_clip_max); - if (level == THERMAL_CSTATE_INVALID) - return 0; - switch (GET_ZONE(i)) { - case MONITOR_ZONE: - case WARN_ZONE: - if (thermal_zone_bind_cooling_device(thermal, i, cdev, - level, 0)) { - pr_err("error binding cdev inst %d\n", i); - ret = -EINVAL; - } - th_zone->bind = true; - break; - default: - ret = -EINVAL; - } - } - - return ret; -} - -/* Unbind callback functions for thermal zone */ -static int exynos_unbind(struct thermal_zone_device *thermal, - struct thermal_cooling_device *cdev) -{ - int ret = 0, i, tab_size; - struct thermal_sensor_conf *data = th_zone->sensor_conf; - - if (th_zone->bind == false) - return 0; - - tab_size = data->cooling_data.freq_clip_count; - - if (tab_size == 0) - return -EINVAL; - - /* find the cooling device registered*/ - for (i = 0; i < th_zone->cool_dev_size; i++) - if (cdev == th_zone->cool_dev[i]) - break; - - /* No matching cooling device */ - if (i == th_zone->cool_dev_size) - return 0; - - /* Bind the thermal zone to the cpufreq cooling device */ - for (i = 0; i < tab_size; i++) { - switch (GET_ZONE(i)) { - case MONITOR_ZONE: - case WARN_ZONE: - if (thermal_zone_unbind_cooling_device(thermal, i, - cdev)) { - pr_err("error unbinding cdev inst=%d\n", i); - ret = -EINVAL; - } - th_zone->bind = false; - break; - default: - ret = -EINVAL; - } - } - return ret; -} - -/* Get temperature callback functions for thermal zone */ -static int exynos_get_temp(struct thermal_zone_device *thermal, - unsigned long *temp) -{ - void *data; - - if (!th_zone->sensor_conf) { - pr_info("Temperature sensor not initialised\n"); - return -EINVAL; - } - data = th_zone->sensor_conf->private_data; - *temp = th_zone->sensor_conf->read_temperature(data); - /* convert the temperature into millicelsius */ - *temp = *temp * MCELSIUS; - return 0; -} - -/* Get temperature callback functions for thermal zone */ -static int exynos_set_emul_temp(struct thermal_zone_device *thermal, - unsigned long temp) -{ - void *data; - int ret = -EINVAL; - - if (!th_zone->sensor_conf) { - pr_info("Temperature sensor not initialised\n"); - return -EINVAL; - } - data = th_zone->sensor_conf->private_data; - if (th_zone->sensor_conf->write_emul_temp) - ret = th_zone->sensor_conf->write_emul_temp(data, temp); - return ret; -} - -/* Get the temperature trend */ -static int exynos_get_trend(struct thermal_zone_device *thermal, - int trip, enum thermal_trend *trend) -{ - int ret; - unsigned long trip_temp; - - ret = exynos_get_trip_temp(thermal, trip, &trip_temp); - if (ret < 0) - return ret; - - if (thermal->temperature >= trip_temp) - *trend = THERMAL_TREND_RAISE_FULL; - else - *trend = THERMAL_TREND_DROP_FULL; - - return 0; -} -/* Operation callback functions for thermal zone */ -static struct thermal_zone_device_ops const exynos_dev_ops = { - .bind = exynos_bind, - .unbind = exynos_unbind, - .get_temp = exynos_get_temp, - .set_emul_temp = exynos_set_emul_temp, - .get_trend = exynos_get_trend, - .get_mode = exynos_get_mode, - .set_mode = exynos_set_mode, - .get_trip_type = exynos_get_trip_type, - .get_trip_temp = exynos_get_trip_temp, - .get_crit_temp = exynos_get_crit_temp, -}; - -/* - * This function may be called from interrupt based temperature sensor - * when threshold is changed. - */ -static void exynos_report_trigger(void) -{ - unsigned int i; - char data[10]; - char *envp[] = { data, NULL }; - - if (!th_zone || !th_zone->therm_dev) - return; - if (th_zone->bind == false) { - for (i = 0; i < th_zone->cool_dev_size; i++) { - if (!th_zone->cool_dev[i]) - continue; - exynos_bind(th_zone->therm_dev, - th_zone->cool_dev[i]); - } - } - - thermal_zone_device_update(th_zone->therm_dev); - - mutex_lock(&th_zone->therm_dev->lock); - /* Find the level for which trip happened */ - for (i = 0; i < th_zone->sensor_conf->trip_data.trip_count; i++) { - if (th_zone->therm_dev->last_temperature < - th_zone->sensor_conf->trip_data.trip_val[i] * MCELSIUS) - break; - } - - if (th_zone->mode == THERMAL_DEVICE_ENABLED && - !th_zone->sensor_conf->trip_data.trigger_falling) { - if (i > 0) - th_zone->therm_dev->polling_delay = ACTIVE_INTERVAL; - else - th_zone->therm_dev->polling_delay = IDLE_INTERVAL; - } - - snprintf(data, sizeof(data), "%u", i); - kobject_uevent_env(&th_zone->therm_dev->device.kobj, KOBJ_CHANGE, envp); - mutex_unlock(&th_zone->therm_dev->lock); -} - -/* Register with the in-kernel thermal management */ -static int exynos_register_thermal(struct thermal_sensor_conf *sensor_conf) -{ - int ret; - struct cpumask mask_val; - - if (!sensor_conf || !sensor_conf->read_temperature) { - pr_err("Temperature sensor not initialised\n"); - return -EINVAL; - } - - th_zone = kzalloc(sizeof(struct exynos_thermal_zone), GFP_KERNEL); - if (!th_zone) - return -ENOMEM; - - th_zone->sensor_conf = sensor_conf; - cpumask_set_cpu(0, &mask_val); - th_zone->cool_dev[0] = cpufreq_cooling_register(&mask_val); - if (IS_ERR(th_zone->cool_dev[0])) { - pr_err("Failed to register cpufreq cooling device\n"); - ret = -EINVAL; - goto err_unregister; - } - th_zone->cool_dev_size++; - - th_zone->therm_dev = thermal_zone_device_register(sensor_conf->name, - EXYNOS_ZONE_COUNT, 0, NULL, &exynos_dev_ops, NULL, 0, - sensor_conf->trip_data.trigger_falling ? - 0 : IDLE_INTERVAL); - - if (IS_ERR(th_zone->therm_dev)) { - pr_err("Failed to register thermal zone device\n"); - ret = PTR_ERR(th_zone->therm_dev); - goto err_unregister; - } - th_zone->mode = THERMAL_DEVICE_ENABLED; - - pr_info("Exynos: Kernel Thermal management registered\n"); - - return 0; - -err_unregister: - exynos_unregister_thermal(); - return ret; -} - -/* Un-Register with the in-kernel thermal management */ -static void exynos_unregister_thermal(void) -{ - int i; - - if (!th_zone) - return; - - if (th_zone->therm_dev) - thermal_zone_device_unregister(th_zone->therm_dev); - - for (i = 0; i < th_zone->cool_dev_size; i++) { - if (th_zone->cool_dev[i]) - cpufreq_cooling_unregister(th_zone->cool_dev[i]); - } - - kfree(th_zone); - pr_info("Exynos: Kernel Thermal management unregistered\n"); -} - -/* - * TMU treats temperature as a mapped temperature code. - * The temperature is converted differently depending on the calibration type. - */ -static int temp_to_code(struct exynos_tmu_data *data, u8 temp) -{ - struct exynos_tmu_platform_data *pdata = data->pdata; - int temp_code; - - if (data->soc == SOC_ARCH_EXYNOS4210) - /* temp should range between 25 and 125 */ - if (temp < 25 || temp > 125) { - temp_code = -EINVAL; - goto out; - } - - switch (pdata->cal_type) { - case TYPE_TWO_POINT_TRIMMING: - temp_code = (temp - 25) * - (data->temp_error2 - data->temp_error1) / - (85 - 25) + data->temp_error1; - break; - case TYPE_ONE_POINT_TRIMMING: - temp_code = temp + data->temp_error1 - 25; - break; - default: - temp_code = temp + EXYNOS_TMU_DEF_CODE_TO_TEMP_OFFSET; - break; - } -out: - return temp_code; -} - -/* - * Calculate a temperature value from a temperature code. - * The unit of the temperature is degree Celsius. - */ -static int code_to_temp(struct exynos_tmu_data *data, u8 temp_code) -{ - struct exynos_tmu_platform_data *pdata = data->pdata; - int temp; - - if (data->soc == SOC_ARCH_EXYNOS4210) - /* temp_code should range between 75 and 175 */ - if (temp_code < 75 || temp_code > 175) { - temp = -ENODATA; - goto out; - } - - switch (pdata->cal_type) { - case TYPE_TWO_POINT_TRIMMING: - temp = (temp_code - data->temp_error1) * (85 - 25) / - (data->temp_error2 - data->temp_error1) + 25; - break; - case TYPE_ONE_POINT_TRIMMING: - temp = temp_code - data->temp_error1 + 25; - break; - default: - temp = temp_code - EXYNOS_TMU_DEF_CODE_TO_TEMP_OFFSET; - break; - } -out: - return temp; -} - -static int exynos_tmu_initialize(struct platform_device *pdev) -{ - struct exynos_tmu_data *data = platform_get_drvdata(pdev); - struct exynos_tmu_platform_data *pdata = data->pdata; - unsigned int status, trim_info; - unsigned int rising_threshold = 0, falling_threshold = 0; - int ret = 0, threshold_code, i, trigger_levs = 0; - - mutex_lock(&data->lock); - clk_enable(data->clk); - - status = readb(data->base + EXYNOS_TMU_REG_STATUS); - if (!status) { - ret = -EBUSY; - goto out; - } - - if (data->soc == SOC_ARCH_EXYNOS) { - __raw_writel(EXYNOS_TRIMINFO_RELOAD, - data->base + EXYNOS_TMU_TRIMINFO_CON); - } - /* Save trimming info in order to perform calibration */ - trim_info = readl(data->base + EXYNOS_TMU_REG_TRIMINFO); - data->temp_error1 = trim_info & EXYNOS_TMU_TRIM_TEMP_MASK; - data->temp_error2 = ((trim_info >> 8) & EXYNOS_TMU_TRIM_TEMP_MASK); - - if ((EFUSE_MIN_VALUE > data->temp_error1) || - (data->temp_error1 > EFUSE_MAX_VALUE) || - (data->temp_error2 != 0)) - data->temp_error1 = pdata->efuse_value; - - /* Count trigger levels to be enabled */ - for (i = 0; i < MAX_THRESHOLD_LEVS; i++) - if (pdata->trigger_levels[i]) - trigger_levs++; - - if (data->soc == SOC_ARCH_EXYNOS4210) { - /* Write temperature code for threshold */ - threshold_code = temp_to_code(data, pdata->threshold); - if (threshold_code < 0) { - ret = threshold_code; - goto out; - } - writeb(threshold_code, - data->base + EXYNOS4210_TMU_REG_THRESHOLD_TEMP); - for (i = 0; i < trigger_levs; i++) - writeb(pdata->trigger_levels[i], - data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL0 + i * 4); - - writel(EXYNOS4210_TMU_INTCLEAR_VAL, - data->base + EXYNOS_TMU_REG_INTCLEAR); - } else if (data->soc == SOC_ARCH_EXYNOS) { - /* Write temperature code for rising and falling threshold */ - for (i = 0; i < trigger_levs; i++) { - threshold_code = temp_to_code(data, - pdata->trigger_levels[i]); - if (threshold_code < 0) { - ret = threshold_code; - goto out; - } - rising_threshold |= threshold_code << 8 * i; - if (pdata->threshold_falling) { - threshold_code = temp_to_code(data, - pdata->trigger_levels[i] - - pdata->threshold_falling); - if (threshold_code > 0) - falling_threshold |= - threshold_code << 8 * i; - } - } - - writel(rising_threshold, - data->base + EXYNOS_THD_TEMP_RISE); - writel(falling_threshold, - data->base + EXYNOS_THD_TEMP_FALL); - - writel(EXYNOS_TMU_CLEAR_RISE_INT | EXYNOS_TMU_CLEAR_FALL_INT, - data->base + EXYNOS_TMU_REG_INTCLEAR); - } -out: - clk_disable(data->clk); - mutex_unlock(&data->lock); - - return ret; -} - -static void exynos_tmu_control(struct platform_device *pdev, bool on) -{ - struct exynos_tmu_data *data = platform_get_drvdata(pdev); - struct exynos_tmu_platform_data *pdata = data->pdata; - unsigned int con, interrupt_en; - - mutex_lock(&data->lock); - clk_enable(data->clk); - - con = pdata->reference_voltage << EXYNOS_TMU_REF_VOLTAGE_SHIFT | - pdata->gain << EXYNOS_TMU_GAIN_SHIFT; - - if (data->soc == SOC_ARCH_EXYNOS) { - con |= pdata->noise_cancel_mode << EXYNOS_TMU_TRIP_MODE_SHIFT; - con |= (EXYNOS_MUX_ADDR_VALUE << EXYNOS_MUX_ADDR_SHIFT); - } - - if (on) { - con |= EXYNOS_TMU_CORE_ON; - interrupt_en = pdata->trigger_level3_en << 12 | - pdata->trigger_level2_en << 8 | - pdata->trigger_level1_en << 4 | - pdata->trigger_level0_en; - if (pdata->threshold_falling) - interrupt_en |= interrupt_en << 16; - } else { - con |= EXYNOS_TMU_CORE_OFF; - interrupt_en = 0; /* Disable all interrupts */ - } - writel(interrupt_en, data->base + EXYNOS_TMU_REG_INTEN); - writel(con, data->base + EXYNOS_TMU_REG_CONTROL); - - clk_disable(data->clk); - mutex_unlock(&data->lock); -} - -static int exynos_tmu_read(struct exynos_tmu_data *data) -{ - u8 temp_code; - int temp; - - mutex_lock(&data->lock); - clk_enable(data->clk); - - temp_code = readb(data->base + EXYNOS_TMU_REG_CURRENT_TEMP); - temp = code_to_temp(data, temp_code); - - clk_disable(data->clk); - mutex_unlock(&data->lock); - - return temp; -} - -#ifdef CONFIG_THERMAL_EMULATION -static int exynos_tmu_set_emulation(void *drv_data, unsigned long temp) -{ - struct exynos_tmu_data *data = drv_data; - unsigned int reg; - int ret = -EINVAL; - - if (data->soc == SOC_ARCH_EXYNOS4210) - goto out; - - if (temp && temp < MCELSIUS) - goto out; - - mutex_lock(&data->lock); - clk_enable(data->clk); - - reg = readl(data->base + EXYNOS_EMUL_CON); - - if (temp) { - temp /= MCELSIUS; - - reg = (EXYNOS_EMUL_TIME << EXYNOS_EMUL_TIME_SHIFT) | - (temp_to_code(data, temp) - << EXYNOS_EMUL_DATA_SHIFT) | EXYNOS_EMUL_ENABLE; - } else { - reg &= ~EXYNOS_EMUL_ENABLE; - } - - writel(reg, data->base + EXYNOS_EMUL_CON); - - clk_disable(data->clk); - mutex_unlock(&data->lock); - return 0; -out: - return ret; -} -#else -static int exynos_tmu_set_emulation(void *drv_data, unsigned long temp) - { return -EINVAL; } -#endif/*CONFIG_THERMAL_EMULATION*/ - -static void exynos_tmu_work(struct work_struct *work) -{ - struct exynos_tmu_data *data = container_of(work, - struct exynos_tmu_data, irq_work); - - exynos_report_trigger(); - mutex_lock(&data->lock); - clk_enable(data->clk); - if (data->soc == SOC_ARCH_EXYNOS) - writel(EXYNOS_TMU_CLEAR_RISE_INT | - EXYNOS_TMU_CLEAR_FALL_INT, - data->base + EXYNOS_TMU_REG_INTCLEAR); - else - writel(EXYNOS4210_TMU_INTCLEAR_VAL, - data->base + EXYNOS_TMU_REG_INTCLEAR); - clk_disable(data->clk); - mutex_unlock(&data->lock); - - enable_irq(data->irq); -} - -static irqreturn_t exynos_tmu_irq(int irq, void *id) -{ - struct exynos_tmu_data *data = id; - - disable_irq_nosync(irq); - schedule_work(&data->irq_work); - - return IRQ_HANDLED; -} -static struct thermal_sensor_conf exynos_sensor_conf = { - .name = "exynos-therm", - .read_temperature = (int (*)(void *))exynos_tmu_read, - .write_emul_temp = exynos_tmu_set_emulation, -}; - -#if defined(CONFIG_CPU_EXYNOS4210) -static struct exynos_tmu_platform_data const exynos4210_default_tmu_data = { - .threshold = 80, - .trigger_levels[0] = 5, - .trigger_levels[1] = 20, - .trigger_levels[2] = 30, - .trigger_level0_en = 1, - .trigger_level1_en = 1, - .trigger_level2_en = 1, - .trigger_level3_en = 0, - .gain = 15, - .reference_voltage = 7, - .cal_type = TYPE_ONE_POINT_TRIMMING, - .freq_tab[0] = { - .freq_clip_max = 800 * 1000, - .temp_level = 85, - }, - .freq_tab[1] = { - .freq_clip_max = 200 * 1000, - .temp_level = 100, - }, - .freq_tab_count = 2, - .type = SOC_ARCH_EXYNOS4210, -}; -#define EXYNOS4210_TMU_DRV_DATA (&exynos4210_default_tmu_data) -#else -#define EXYNOS4210_TMU_DRV_DATA (NULL) -#endif - -#if defined(CONFIG_SOC_EXYNOS5250) || defined(CONFIG_SOC_EXYNOS4412) || \ - defined(CONFIG_SOC_EXYNOS4212) -static struct exynos_tmu_platform_data const exynos_default_tmu_data = { - .threshold_falling = 10, - .trigger_levels[0] = 85, - .trigger_levels[1] = 103, - .trigger_levels[2] = 110, - .trigger_level0_en = 1, - .trigger_level1_en = 1, - .trigger_level2_en = 1, - .trigger_level3_en = 0, - .gain = 8, - .reference_voltage = 16, - .noise_cancel_mode = 4, - .cal_type = TYPE_ONE_POINT_TRIMMING, - .efuse_value = 55, - .freq_tab[0] = { - .freq_clip_max = 800 * 1000, - .temp_level = 85, - }, - .freq_tab[1] = { - .freq_clip_max = 200 * 1000, - .temp_level = 103, - }, - .freq_tab_count = 2, - .type = SOC_ARCH_EXYNOS, -}; -#define EXYNOS_TMU_DRV_DATA (&exynos_default_tmu_data) -#else -#define EXYNOS_TMU_DRV_DATA (NULL) -#endif - -#ifdef CONFIG_OF -static const struct of_device_id exynos_tmu_match[] = { - { - .compatible = "samsung,exynos4210-tmu", - .data = (void *)EXYNOS4210_TMU_DRV_DATA, - }, - { - .compatible = "samsung,exynos4412-tmu", - .data = (void *)EXYNOS_TMU_DRV_DATA, - }, - { - .compatible = "samsung,exynos5250-tmu", - .data = (void *)EXYNOS_TMU_DRV_DATA, - }, - {}, -}; -MODULE_DEVICE_TABLE(of, exynos_tmu_match); -#endif - -static struct platform_device_id exynos_tmu_driver_ids[] = { - { - .name = "exynos4210-tmu", - .driver_data = (kernel_ulong_t)EXYNOS4210_TMU_DRV_DATA, - }, - { - .name = "exynos5250-tmu", - .driver_data = (kernel_ulong_t)EXYNOS_TMU_DRV_DATA, - }, - { }, -}; -MODULE_DEVICE_TABLE(platform, exynos_tmu_driver_ids); - -static inline struct exynos_tmu_platform_data *exynos_get_driver_data( - struct platform_device *pdev) -{ -#ifdef CONFIG_OF - if (pdev->dev.of_node) { - const struct of_device_id *match; - match = of_match_node(exynos_tmu_match, pdev->dev.of_node); - if (!match) - return NULL; - return (struct exynos_tmu_platform_data *) match->data; - } -#endif - return (struct exynos_tmu_platform_data *) - platform_get_device_id(pdev)->driver_data; -} - -static int exynos_tmu_probe(struct platform_device *pdev) -{ - struct exynos_tmu_data *data; - struct exynos_tmu_platform_data *pdata = pdev->dev.platform_data; - int ret, i; - - if (!pdata) - pdata = exynos_get_driver_data(pdev); - - if (!pdata) { - dev_err(&pdev->dev, "No platform init data supplied.\n"); - return -ENODEV; - } - data = devm_kzalloc(&pdev->dev, sizeof(struct exynos_tmu_data), - GFP_KERNEL); - if (!data) { - dev_err(&pdev->dev, "Failed to allocate driver structure\n"); - return -ENOMEM; - } - - data->irq = platform_get_irq(pdev, 0); - if (data->irq < 0) { - dev_err(&pdev->dev, "Failed to get platform irq\n"); - return data->irq; - } - - INIT_WORK(&data->irq_work, exynos_tmu_work); - - data->mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); - data->base = devm_ioremap_resource(&pdev->dev, data->mem); - if (IS_ERR(data->base)) - return PTR_ERR(data->base); - - ret = devm_request_irq(&pdev->dev, data->irq, exynos_tmu_irq, - IRQF_TRIGGER_RISING, "exynos-tmu", data); - if (ret) { - dev_err(&pdev->dev, "Failed to request irq: %d\n", data->irq); - return ret; - } - - data->clk = devm_clk_get(&pdev->dev, "tmu_apbif"); - if (IS_ERR(data->clk)) { - dev_err(&pdev->dev, "Failed to get clock\n"); - return PTR_ERR(data->clk); - } - - ret = clk_prepare(data->clk); - if (ret) - return ret; - - if (pdata->type == SOC_ARCH_EXYNOS || - pdata->type == SOC_ARCH_EXYNOS4210) - data->soc = pdata->type; - else { - ret = -EINVAL; - dev_err(&pdev->dev, "Platform not supported\n"); - goto err_clk; - } - - data->pdata = pdata; - platform_set_drvdata(pdev, data); - mutex_init(&data->lock); - - ret = exynos_tmu_initialize(pdev); - if (ret) { - dev_err(&pdev->dev, "Failed to initialize TMU\n"); - goto err_clk; - } - - exynos_tmu_control(pdev, true); - - /* Register the sensor with thermal management interface */ - (&exynos_sensor_conf)->private_data = data; - exynos_sensor_conf.trip_data.trip_count = pdata->trigger_level0_en + - pdata->trigger_level1_en + pdata->trigger_level2_en + - pdata->trigger_level3_en; - - for (i = 0; i < exynos_sensor_conf.trip_data.trip_count; i++) - exynos_sensor_conf.trip_data.trip_val[i] = - pdata->threshold + pdata->trigger_levels[i]; - - exynos_sensor_conf.trip_data.trigger_falling = pdata->threshold_falling; - - exynos_sensor_conf.cooling_data.freq_clip_count = - pdata->freq_tab_count; - for (i = 0; i < pdata->freq_tab_count; i++) { - exynos_sensor_conf.cooling_data.freq_data[i].freq_clip_max = - pdata->freq_tab[i].freq_clip_max; - exynos_sensor_conf.cooling_data.freq_data[i].temp_level = - pdata->freq_tab[i].temp_level; - } - - ret = exynos_register_thermal(&exynos_sensor_conf); - if (ret) { - dev_err(&pdev->dev, "Failed to register thermal interface\n"); - goto err_clk; - } - - return 0; -err_clk: - clk_unprepare(data->clk); - return ret; -} - -static int exynos_tmu_remove(struct platform_device *pdev) -{ - struct exynos_tmu_data *data = platform_get_drvdata(pdev); - - exynos_tmu_control(pdev, false); - - exynos_unregister_thermal(); - - clk_unprepare(data->clk); - - return 0; -} - -#ifdef CONFIG_PM_SLEEP -static int exynos_tmu_suspend(struct device *dev) -{ - exynos_tmu_control(to_platform_device(dev), false); - - return 0; -} - -static int exynos_tmu_resume(struct device *dev) -{ - struct platform_device *pdev = to_platform_device(dev); - - exynos_tmu_initialize(pdev); - exynos_tmu_control(pdev, true); - - return 0; -} - -static SIMPLE_DEV_PM_OPS(exynos_tmu_pm, - exynos_tmu_suspend, exynos_tmu_resume); -#define EXYNOS_TMU_PM (&exynos_tmu_pm) -#else -#define EXYNOS_TMU_PM NULL -#endif - -static struct platform_driver exynos_tmu_driver = { - .driver = { - .name = "exynos-tmu", - .owner = THIS_MODULE, - .pm = EXYNOS_TMU_PM, - .of_match_table = of_match_ptr(exynos_tmu_match), - }, - .probe = exynos_tmu_probe, - .remove = exynos_tmu_remove, - .id_table = exynos_tmu_driver_ids, -}; - -module_platform_driver(exynos_tmu_driver); - -MODULE_DESCRIPTION("EXYNOS TMU Driver"); -MODULE_AUTHOR("Donggeun Kim <dg77.kim@samsung.com>"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform:exynos-tmu"); diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c new file mode 100644 index 000000000000..1d6c801c1eb9 --- /dev/null +++ b/drivers/thermal/imx_thermal.c @@ -0,0 +1,541 @@ +/* + * Copyright 2013 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/cpu_cooling.h> +#include <linux/cpufreq.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/mfd/syscon.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include <linux/slab.h> +#include <linux/thermal.h> +#include <linux/types.h> + +#define REG_SET 0x4 +#define REG_CLR 0x8 +#define REG_TOG 0xc + +#define MISC0 0x0150 +#define MISC0_REFTOP_SELBIASOFF (1 << 3) + +#define TEMPSENSE0 0x0180 +#define TEMPSENSE0_ALARM_VALUE_SHIFT 20 +#define TEMPSENSE0_ALARM_VALUE_MASK (0xfff << TEMPSENSE0_ALARM_VALUE_SHIFT) +#define TEMPSENSE0_TEMP_CNT_SHIFT 8 +#define TEMPSENSE0_TEMP_CNT_MASK (0xfff << TEMPSENSE0_TEMP_CNT_SHIFT) +#define TEMPSENSE0_FINISHED (1 << 2) +#define TEMPSENSE0_MEASURE_TEMP (1 << 1) +#define TEMPSENSE0_POWER_DOWN (1 << 0) + +#define TEMPSENSE1 0x0190 +#define TEMPSENSE1_MEASURE_FREQ 0xffff + +#define OCOTP_ANA1 0x04e0 + +/* The driver supports 1 passive trip point and 1 critical trip point */ +enum imx_thermal_trip { + IMX_TRIP_PASSIVE, + IMX_TRIP_CRITICAL, + IMX_TRIP_NUM, +}; + +/* + * It defines the temperature in millicelsius for passive trip point + * that will trigger cooling action when crossed. + */ +#define IMX_TEMP_PASSIVE 85000 + +#define IMX_POLLING_DELAY 2000 /* millisecond */ +#define IMX_PASSIVE_DELAY 1000 + +struct imx_thermal_data { + struct thermal_zone_device *tz; + struct thermal_cooling_device *cdev; + enum thermal_device_mode mode; + struct regmap *tempmon; + int c1, c2; /* See formula in imx_get_sensor_data() */ + unsigned long temp_passive; + unsigned long temp_critical; + unsigned long alarm_temp; + unsigned long last_temp; + bool irq_enabled; + int irq; +}; + +static void imx_set_alarm_temp(struct imx_thermal_data *data, + signed long alarm_temp) +{ + struct regmap *map = data->tempmon; + int alarm_value; + + data->alarm_temp = alarm_temp; + alarm_value = (alarm_temp - data->c2) / data->c1; + regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_ALARM_VALUE_MASK); + regmap_write(map, TEMPSENSE0 + REG_SET, alarm_value << + TEMPSENSE0_ALARM_VALUE_SHIFT); +} + +static int imx_get_temp(struct thermal_zone_device *tz, unsigned long *temp) +{ + struct imx_thermal_data *data = tz->devdata; + struct regmap *map = data->tempmon; + unsigned int n_meas; + bool wait; + u32 val; + + if (data->mode == THERMAL_DEVICE_ENABLED) { + /* Check if a measurement is currently in progress */ + regmap_read(map, TEMPSENSE0, &val); + wait = !(val & TEMPSENSE0_FINISHED); + } else { + /* + * Every time we measure the temperature, we will power on the + * temperature sensor, enable measurements, take a reading, + * disable measurements, power off the temperature sensor. + */ + regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_POWER_DOWN); + regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_MEASURE_TEMP); + + wait = true; + } + + /* + * According to the temp sensor designers, it may require up to ~17us + * to complete a measurement. + */ + if (wait) + usleep_range(20, 50); + + regmap_read(map, TEMPSENSE0, &val); + + if (data->mode != THERMAL_DEVICE_ENABLED) { + regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_MEASURE_TEMP); + regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_POWER_DOWN); + } + + if ((val & TEMPSENSE0_FINISHED) == 0) { + dev_dbg(&tz->device, "temp measurement never finished\n"); + return -EAGAIN; + } + + n_meas = (val & TEMPSENSE0_TEMP_CNT_MASK) >> TEMPSENSE0_TEMP_CNT_SHIFT; + + /* See imx_get_sensor_data() for formula derivation */ + *temp = data->c2 + data->c1 * n_meas; + + /* Update alarm value to next higher trip point */ + if (data->alarm_temp == data->temp_passive && *temp >= data->temp_passive) + imx_set_alarm_temp(data, data->temp_critical); + if (data->alarm_temp == data->temp_critical && *temp < data->temp_passive) { + imx_set_alarm_temp(data, data->temp_passive); + dev_dbg(&tz->device, "thermal alarm off: T < %lu\n", + data->alarm_temp / 1000); + } + + if (*temp != data->last_temp) { + dev_dbg(&tz->device, "millicelsius: %ld\n", *temp); + data->last_temp = *temp; + } + + /* Reenable alarm IRQ if temperature below alarm temperature */ + if (!data->irq_enabled && *temp < data->alarm_temp) { + data->irq_enabled = true; + enable_irq(data->irq); + } + + return 0; +} + +static int imx_get_mode(struct thermal_zone_device *tz, + enum thermal_device_mode *mode) +{ + struct imx_thermal_data *data = tz->devdata; + + *mode = data->mode; + + return 0; +} + +static int imx_set_mode(struct thermal_zone_device *tz, + enum thermal_device_mode mode) +{ + struct imx_thermal_data *data = tz->devdata; + struct regmap *map = data->tempmon; + + if (mode == THERMAL_DEVICE_ENABLED) { + tz->polling_delay = IMX_POLLING_DELAY; + tz->passive_delay = IMX_PASSIVE_DELAY; + + regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_POWER_DOWN); + regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_MEASURE_TEMP); + + if (!data->irq_enabled) { + data->irq_enabled = true; + enable_irq(data->irq); + } + } else { + regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_MEASURE_TEMP); + regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_POWER_DOWN); + + tz->polling_delay = 0; + tz->passive_delay = 0; + + if (data->irq_enabled) { + disable_irq(data->irq); + data->irq_enabled = false; + } + } + + data->mode = mode; + thermal_zone_device_update(tz); + + return 0; +} + +static int imx_get_trip_type(struct thermal_zone_device *tz, int trip, + enum thermal_trip_type *type) +{ + *type = (trip == IMX_TRIP_PASSIVE) ? THERMAL_TRIP_PASSIVE : + THERMAL_TRIP_CRITICAL; + return 0; +} + +static int imx_get_crit_temp(struct thermal_zone_device *tz, + unsigned long *temp) +{ + struct imx_thermal_data *data = tz->devdata; + + *temp = data->temp_critical; + return 0; +} + +static int imx_get_trip_temp(struct thermal_zone_device *tz, int trip, + unsigned long *temp) +{ + struct imx_thermal_data *data = tz->devdata; + + *temp = (trip == IMX_TRIP_PASSIVE) ? data->temp_passive : + data->temp_critical; + return 0; +} + +static int imx_set_trip_temp(struct thermal_zone_device *tz, int trip, + unsigned long temp) +{ + struct imx_thermal_data *data = tz->devdata; + + if (trip == IMX_TRIP_CRITICAL) + return -EPERM; + + if (temp > IMX_TEMP_PASSIVE) + return -EINVAL; + + data->temp_passive = temp; + + imx_set_alarm_temp(data, temp); + + return 0; +} + +static int imx_bind(struct thermal_zone_device *tz, + struct thermal_cooling_device *cdev) +{ + int ret; + + ret = thermal_zone_bind_cooling_device(tz, IMX_TRIP_PASSIVE, cdev, + THERMAL_NO_LIMIT, + THERMAL_NO_LIMIT); + if (ret) { + dev_err(&tz->device, + "binding zone %s with cdev %s failed:%d\n", + tz->type, cdev->type, ret); + return ret; + } + + return 0; +} + +static int imx_unbind(struct thermal_zone_device *tz, + struct thermal_cooling_device *cdev) +{ + int ret; + + ret = thermal_zone_unbind_cooling_device(tz, IMX_TRIP_PASSIVE, cdev); + if (ret) { + dev_err(&tz->device, + "unbinding zone %s with cdev %s failed:%d\n", + tz->type, cdev->type, ret); + return ret; + } + + return 0; +} + +static const struct thermal_zone_device_ops imx_tz_ops = { + .bind = imx_bind, + .unbind = imx_unbind, + .get_temp = imx_get_temp, + .get_mode = imx_get_mode, + .set_mode = imx_set_mode, + .get_trip_type = imx_get_trip_type, + .get_trip_temp = imx_get_trip_temp, + .get_crit_temp = imx_get_crit_temp, + .set_trip_temp = imx_set_trip_temp, +}; + +static int imx_get_sensor_data(struct platform_device *pdev) +{ + struct imx_thermal_data *data = platform_get_drvdata(pdev); + struct regmap *map; + int t1, t2, n1, n2; + int ret; + u32 val; + + map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + "fsl,tempmon-data"); + if (IS_ERR(map)) { + ret = PTR_ERR(map); + dev_err(&pdev->dev, "failed to get sensor regmap: %d\n", ret); + return ret; + } + + ret = regmap_read(map, OCOTP_ANA1, &val); + if (ret) { + dev_err(&pdev->dev, "failed to read sensor data: %d\n", ret); + return ret; + } + + if (val == 0 || val == ~0) { + dev_err(&pdev->dev, "invalid sensor calibration data\n"); + return -EINVAL; + } + + /* + * Sensor data layout: + * [31:20] - sensor value @ 25C + * [19:8] - sensor value of hot + * [7:0] - hot temperature value + */ + n1 = val >> 20; + n2 = (val & 0xfff00) >> 8; + t2 = val & 0xff; + t1 = 25; /* t1 always 25C */ + + /* + * Derived from linear interpolation, + * Tmeas = T2 + (Nmeas - N2) * (T1 - T2) / (N1 - N2) + * We want to reduce this down to the minimum computation necessary + * for each temperature read. Also, we want Tmeas in millicelsius + * and we don't want to lose precision from integer division. So... + * milli_Tmeas = 1000 * T2 + 1000 * (Nmeas - N2) * (T1 - T2) / (N1 - N2) + * Let constant c1 = 1000 * (T1 - T2) / (N1 - N2) + * milli_Tmeas = (1000 * T2) + c1 * (Nmeas - N2) + * milli_Tmeas = (1000 * T2) + (c1 * Nmeas) - (c1 * N2) + * Let constant c2 = (1000 * T2) - (c1 * N2) + * milli_Tmeas = c2 + (c1 * Nmeas) + */ + data->c1 = 1000 * (t1 - t2) / (n1 - n2); + data->c2 = 1000 * t2 - data->c1 * n2; + + /* + * Set the default passive cooling trip point to 20 °C below the + * maximum die temperature. Can be changed from userspace. + */ + data->temp_passive = 1000 * (t2 - 20); + + /* + * The maximum die temperature is t2, let's give 5 °C cushion + * for noise and possible temperature rise between measurements. + */ + data->temp_critical = 1000 * (t2 - 5); + + return 0; +} + +static irqreturn_t imx_thermal_alarm_irq(int irq, void *dev) +{ + struct imx_thermal_data *data = dev; + + disable_irq_nosync(irq); + data->irq_enabled = false; + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t imx_thermal_alarm_irq_thread(int irq, void *dev) +{ + struct imx_thermal_data *data = dev; + + dev_dbg(&data->tz->device, "THERMAL ALARM: T > %lu\n", + data->alarm_temp / 1000); + + thermal_zone_device_update(data->tz); + + return IRQ_HANDLED; +} + +static int imx_thermal_probe(struct platform_device *pdev) +{ + struct imx_thermal_data *data; + struct cpumask clip_cpus; + struct regmap *map; + int measure_freq; + int ret; + + data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "fsl,tempmon"); + if (IS_ERR(map)) { + ret = PTR_ERR(map); + dev_err(&pdev->dev, "failed to get tempmon regmap: %d\n", ret); + return ret; + } + data->tempmon = map; + + data->irq = platform_get_irq(pdev, 0); + if (data->irq < 0) + return data->irq; + + ret = devm_request_threaded_irq(&pdev->dev, data->irq, + imx_thermal_alarm_irq, imx_thermal_alarm_irq_thread, + 0, "imx_thermal", data); + if (ret < 0) { + dev_err(&pdev->dev, "failed to request alarm irq: %d\n", ret); + return ret; + } + + platform_set_drvdata(pdev, data); + + ret = imx_get_sensor_data(pdev); + if (ret) { + dev_err(&pdev->dev, "failed to get sensor data\n"); + return ret; + } + + /* Make sure sensor is in known good state for measurements */ + regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_POWER_DOWN); + regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_MEASURE_TEMP); + regmap_write(map, TEMPSENSE1 + REG_CLR, TEMPSENSE1_MEASURE_FREQ); + regmap_write(map, MISC0 + REG_SET, MISC0_REFTOP_SELBIASOFF); + regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_POWER_DOWN); + + cpumask_set_cpu(0, &clip_cpus); + data->cdev = cpufreq_cooling_register(&clip_cpus); + if (IS_ERR(data->cdev)) { + ret = PTR_ERR(data->cdev); + dev_err(&pdev->dev, + "failed to register cpufreq cooling device: %d\n", ret); + return ret; + } + + data->tz = thermal_zone_device_register("imx_thermal_zone", + IMX_TRIP_NUM, + BIT(IMX_TRIP_PASSIVE), data, + &imx_tz_ops, NULL, + IMX_PASSIVE_DELAY, + IMX_POLLING_DELAY); + if (IS_ERR(data->tz)) { + ret = PTR_ERR(data->tz); + dev_err(&pdev->dev, + "failed to register thermal zone device %d\n", ret); + cpufreq_cooling_unregister(data->cdev); + return ret; + } + + /* Enable measurements at ~ 10 Hz */ + regmap_write(map, TEMPSENSE1 + REG_CLR, TEMPSENSE1_MEASURE_FREQ); + measure_freq = DIV_ROUND_UP(32768, 10); /* 10 Hz */ + regmap_write(map, TEMPSENSE1 + REG_SET, measure_freq); + imx_set_alarm_temp(data, data->temp_passive); + regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_POWER_DOWN); + regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_MEASURE_TEMP); + + data->irq_enabled = true; + data->mode = THERMAL_DEVICE_ENABLED; + + return 0; +} + +static int imx_thermal_remove(struct platform_device *pdev) +{ + struct imx_thermal_data *data = platform_get_drvdata(pdev); + struct regmap *map = data->tempmon; + + /* Disable measurements */ + regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_POWER_DOWN); + + thermal_zone_device_unregister(data->tz); + cpufreq_cooling_unregister(data->cdev); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int imx_thermal_suspend(struct device *dev) +{ + struct imx_thermal_data *data = dev_get_drvdata(dev); + struct regmap *map = data->tempmon; + u32 val; + + regmap_read(map, TEMPSENSE0, &val); + if ((val & TEMPSENSE0_POWER_DOWN) == 0) { + /* + * If a measurement is taking place, wait for a long enough + * time for it to finish, and then check again. If it still + * does not finish, something must go wrong. + */ + udelay(50); + regmap_read(map, TEMPSENSE0, &val); + if ((val & TEMPSENSE0_POWER_DOWN) == 0) + return -ETIMEDOUT; + } + + return 0; +} + +static int imx_thermal_resume(struct device *dev) +{ + /* Nothing to do for now */ + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(imx_thermal_pm_ops, + imx_thermal_suspend, imx_thermal_resume); + +static const struct of_device_id of_imx_thermal_match[] = { + { .compatible = "fsl,imx6q-tempmon", }, + { /* end */ } +}; + +static struct platform_driver imx_thermal = { + .driver = { + .name = "imx_thermal", + .owner = THIS_MODULE, + .pm = &imx_thermal_pm_ops, + .of_match_table = of_imx_thermal_match, + }, + .probe = imx_thermal_probe, + .remove = imx_thermal_remove, +}; +module_platform_driver(imx_thermal); + +MODULE_AUTHOR("Freescale Semiconductor, Inc."); +MODULE_DESCRIPTION("Thermal driver for Freescale i.MX SoCs"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:imx-thermal"); diff --git a/drivers/thermal/samsung/Kconfig b/drivers/thermal/samsung/Kconfig new file mode 100644 index 000000000000..f760389a204c --- /dev/null +++ b/drivers/thermal/samsung/Kconfig @@ -0,0 +1,18 @@ +config EXYNOS_THERMAL + tristate "Exynos thermal management unit driver" + depends on ARCH_HAS_BANDGAP && OF + help + If you say yes here you get support for the TMU (Thermal Management + Unit) driver for SAMSUNG EXYNOS series of SoCs. This driver initialises + the TMU, reports temperature and handles cooling action if defined. + This driver uses the Exynos core thermal APIs and TMU configuration + data from the supported SoCs. + +config EXYNOS_THERMAL_CORE + bool "Core thermal framework support for EXYNOS SOCs" + depends on EXYNOS_THERMAL + help + If you say yes here you get support for EXYNOS TMU + (Thermal Management Unit) common registration/unregistration + functions to the core thermal layer and also to use the generic + CPU cooling APIs. diff --git a/drivers/thermal/samsung/Makefile b/drivers/thermal/samsung/Makefile new file mode 100644 index 000000000000..c09d83095dc2 --- /dev/null +++ b/drivers/thermal/samsung/Makefile @@ -0,0 +1,7 @@ +# +# Samsung thermal specific Makefile +# +obj-$(CONFIG_EXYNOS_THERMAL) += exynos_thermal.o +exynos_thermal-y := exynos_tmu.o +exynos_thermal-y += exynos_tmu_data.o +exynos_thermal-$(CONFIG_EXYNOS_THERMAL_CORE) += exynos_thermal_common.o diff --git a/drivers/thermal/samsung/exynos_thermal_common.c b/drivers/thermal/samsung/exynos_thermal_common.c new file mode 100644 index 000000000000..f10a6ad37c06 --- /dev/null +++ b/drivers/thermal/samsung/exynos_thermal_common.c @@ -0,0 +1,432 @@ +/* + * exynos_thermal_common.c - Samsung EXYNOS common thermal file + * + * Copyright (C) 2013 Samsung Electronics + * Amit Daniel Kachhap <amit.daniel@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/cpu_cooling.h> +#include <linux/err.h> +#include <linux/slab.h> +#include <linux/thermal.h> + +#include "exynos_thermal_common.h" + +struct exynos_thermal_zone { + enum thermal_device_mode mode; + struct thermal_zone_device *therm_dev; + struct thermal_cooling_device *cool_dev[MAX_COOLING_DEVICE]; + unsigned int cool_dev_size; + struct platform_device *exynos4_dev; + struct thermal_sensor_conf *sensor_conf; + bool bind; +}; + +/* Get mode callback functions for thermal zone */ +static int exynos_get_mode(struct thermal_zone_device *thermal, + enum thermal_device_mode *mode) +{ + struct exynos_thermal_zone *th_zone = thermal->devdata; + if (th_zone) + *mode = th_zone->mode; + return 0; +} + +/* Set mode callback functions for thermal zone */ +static int exynos_set_mode(struct thermal_zone_device *thermal, + enum thermal_device_mode mode) +{ + struct exynos_thermal_zone *th_zone = thermal->devdata; + if (!th_zone) { + dev_err(&thermal->device, + "thermal zone not registered\n"); + return 0; + } + + mutex_lock(&thermal->lock); + + if (mode == THERMAL_DEVICE_ENABLED && + !th_zone->sensor_conf->trip_data.trigger_falling) + thermal->polling_delay = IDLE_INTERVAL; + else + thermal->polling_delay = 0; + + mutex_unlock(&thermal->lock); + + th_zone->mode = mode; + thermal_zone_device_update(thermal); + dev_dbg(th_zone->sensor_conf->dev, + "thermal polling set for duration=%d msec\n", + thermal->polling_delay); + return 0; +} + + +/* Get trip type callback functions for thermal zone */ +static int exynos_get_trip_type(struct thermal_zone_device *thermal, int trip, + enum thermal_trip_type *type) +{ + struct exynos_thermal_zone *th_zone = thermal->devdata; + int max_trip = th_zone->sensor_conf->trip_data.trip_count; + int trip_type; + + if (trip < 0 || trip >= max_trip) + return -EINVAL; + + trip_type = th_zone->sensor_conf->trip_data.trip_type[trip]; + + if (trip_type == SW_TRIP) + *type = THERMAL_TRIP_CRITICAL; + else if (trip_type == THROTTLE_ACTIVE) + *type = THERMAL_TRIP_ACTIVE; + else if (trip_type == THROTTLE_PASSIVE) + *type = THERMAL_TRIP_PASSIVE; + else + return -EINVAL; + + return 0; +} + +/* Get trip temperature callback functions for thermal zone */ +static int exynos_get_trip_temp(struct thermal_zone_device *thermal, int trip, + unsigned long *temp) +{ + struct exynos_thermal_zone *th_zone = thermal->devdata; + int max_trip = th_zone->sensor_conf->trip_data.trip_count; + + if (trip < 0 || trip >= max_trip) + return -EINVAL; + + *temp = th_zone->sensor_conf->trip_data.trip_val[trip]; + /* convert the temperature into millicelsius */ + *temp = *temp * MCELSIUS; + + return 0; +} + +/* Get critical temperature callback functions for thermal zone */ +static int exynos_get_crit_temp(struct thermal_zone_device *thermal, + unsigned long *temp) +{ + struct exynos_thermal_zone *th_zone = thermal->devdata; + int max_trip = th_zone->sensor_conf->trip_data.trip_count; + /* Get the temp of highest trip*/ + return exynos_get_trip_temp(thermal, max_trip - 1, temp); +} + +/* Bind callback functions for thermal zone */ +static int exynos_bind(struct thermal_zone_device *thermal, + struct thermal_cooling_device *cdev) +{ + int ret = 0, i, tab_size, level; + struct freq_clip_table *tab_ptr, *clip_data; + struct exynos_thermal_zone *th_zone = thermal->devdata; + struct thermal_sensor_conf *data = th_zone->sensor_conf; + + tab_ptr = (struct freq_clip_table *)data->cooling_data.freq_data; + tab_size = data->cooling_data.freq_clip_count; + + if (tab_ptr == NULL || tab_size == 0) + return 0; + + /* find the cooling device registered*/ + for (i = 0; i < th_zone->cool_dev_size; i++) + if (cdev == th_zone->cool_dev[i]) + break; + + /* No matching cooling device */ + if (i == th_zone->cool_dev_size) + return 0; + + /* Bind the thermal zone to the cpufreq cooling device */ + for (i = 0; i < tab_size; i++) { + clip_data = (struct freq_clip_table *)&(tab_ptr[i]); + level = cpufreq_cooling_get_level(0, clip_data->freq_clip_max); + if (level == THERMAL_CSTATE_INVALID) + return 0; + switch (GET_ZONE(i)) { + case MONITOR_ZONE: + case WARN_ZONE: + if (thermal_zone_bind_cooling_device(thermal, i, cdev, + level, 0)) { + dev_err(data->dev, + "error unbinding cdev inst=%d\n", i); + ret = -EINVAL; + } + th_zone->bind = true; + break; + default: + ret = -EINVAL; + } + } + + return ret; +} + +/* Unbind callback functions for thermal zone */ +static int exynos_unbind(struct thermal_zone_device *thermal, + struct thermal_cooling_device *cdev) +{ + int ret = 0, i, tab_size; + struct exynos_thermal_zone *th_zone = thermal->devdata; + struct thermal_sensor_conf *data = th_zone->sensor_conf; + + if (th_zone->bind == false) + return 0; + + tab_size = data->cooling_data.freq_clip_count; + + if (tab_size == 0) + return 0; + + /* find the cooling device registered*/ + for (i = 0; i < th_zone->cool_dev_size; i++) + if (cdev == th_zone->cool_dev[i]) + break; + + /* No matching cooling device */ + if (i == th_zone->cool_dev_size) + return 0; + + /* Bind the thermal zone to the cpufreq cooling device */ + for (i = 0; i < tab_size; i++) { + switch (GET_ZONE(i)) { + case MONITOR_ZONE: + case WARN_ZONE: + if (thermal_zone_unbind_cooling_device(thermal, i, + cdev)) { + dev_err(data->dev, + "error unbinding cdev inst=%d\n", i); + ret = -EINVAL; + } + th_zone->bind = false; + break; + default: + ret = -EINVAL; + } + } + return ret; +} + +/* Get temperature callback functions for thermal zone */ +static int exynos_get_temp(struct thermal_zone_device *thermal, + unsigned long *temp) +{ + struct exynos_thermal_zone *th_zone = thermal->devdata; + void *data; + + if (!th_zone->sensor_conf) { + dev_err(&thermal->device, + "Temperature sensor not initialised\n"); + return -EINVAL; + } + data = th_zone->sensor_conf->driver_data; + *temp = th_zone->sensor_conf->read_temperature(data); + /* convert the temperature into millicelsius */ + *temp = *temp * MCELSIUS; + return 0; +} + +/* Get temperature callback functions for thermal zone */ +static int exynos_set_emul_temp(struct thermal_zone_device *thermal, + unsigned long temp) +{ + void *data; + int ret = -EINVAL; + struct exynos_thermal_zone *th_zone = thermal->devdata; + + if (!th_zone->sensor_conf) { + dev_err(&thermal->device, + "Temperature sensor not initialised\n"); + return -EINVAL; + } + data = th_zone->sensor_conf->driver_data; + if (th_zone->sensor_conf->write_emul_temp) + ret = th_zone->sensor_conf->write_emul_temp(data, temp); + return ret; +} + +/* Get the temperature trend */ +static int exynos_get_trend(struct thermal_zone_device *thermal, + int trip, enum thermal_trend *trend) +{ + int ret; + unsigned long trip_temp; + + ret = exynos_get_trip_temp(thermal, trip, &trip_temp); + if (ret < 0) + return ret; + + if (thermal->temperature >= trip_temp) + *trend = THERMAL_TREND_RAISE_FULL; + else + *trend = THERMAL_TREND_DROP_FULL; + + return 0; +} +/* Operation callback functions for thermal zone */ +static struct thermal_zone_device_ops const exynos_dev_ops = { + .bind = exynos_bind, + .unbind = exynos_unbind, + .get_temp = exynos_get_temp, + .set_emul_temp = exynos_set_emul_temp, + .get_trend = exynos_get_trend, + .get_mode = exynos_get_mode, + .set_mode = exynos_set_mode, + .get_trip_type = exynos_get_trip_type, + .get_trip_temp = exynos_get_trip_temp, + .get_crit_temp = exynos_get_crit_temp, +}; + +/* + * This function may be called from interrupt based temperature sensor + * when threshold is changed. + */ +void exynos_report_trigger(struct thermal_sensor_conf *conf) +{ + unsigned int i; + char data[10]; + char *envp[] = { data, NULL }; + struct exynos_thermal_zone *th_zone; + + if (!conf || !conf->pzone_data) { + pr_err("Invalid temperature sensor configuration data\n"); + return; + } + + th_zone = conf->pzone_data; + if (th_zone->therm_dev) + return; + + if (th_zone->bind == false) { + for (i = 0; i < th_zone->cool_dev_size; i++) { + if (!th_zone->cool_dev[i]) + continue; + exynos_bind(th_zone->therm_dev, + th_zone->cool_dev[i]); + } + } + + thermal_zone_device_update(th_zone->therm_dev); + + mutex_lock(&th_zone->therm_dev->lock); + /* Find the level for which trip happened */ + for (i = 0; i < th_zone->sensor_conf->trip_data.trip_count; i++) { + if (th_zone->therm_dev->last_temperature < + th_zone->sensor_conf->trip_data.trip_val[i] * MCELSIUS) + break; + } + + if (th_zone->mode == THERMAL_DEVICE_ENABLED && + !th_zone->sensor_conf->trip_data.trigger_falling) { + if (i > 0) + th_zone->therm_dev->polling_delay = ACTIVE_INTERVAL; + else + th_zone->therm_dev->polling_delay = IDLE_INTERVAL; + } + + snprintf(data, sizeof(data), "%u", i); + kobject_uevent_env(&th_zone->therm_dev->device.kobj, KOBJ_CHANGE, envp); + mutex_unlock(&th_zone->therm_dev->lock); +} + +/* Register with the in-kernel thermal management */ +int exynos_register_thermal(struct thermal_sensor_conf *sensor_conf) +{ + int ret; + struct cpumask mask_val; + struct exynos_thermal_zone *th_zone; + + if (!sensor_conf || !sensor_conf->read_temperature) { + pr_err("Temperature sensor not initialised\n"); + return -EINVAL; + } + + th_zone = devm_kzalloc(sensor_conf->dev, + sizeof(struct exynos_thermal_zone), GFP_KERNEL); + if (!th_zone) + return -ENOMEM; + + th_zone->sensor_conf = sensor_conf; + /* + * TODO: 1) Handle multiple cooling devices in a thermal zone + * 2) Add a flag/name in cooling info to map to specific + * sensor + */ + if (sensor_conf->cooling_data.freq_clip_count > 0) { + cpumask_set_cpu(0, &mask_val); + th_zone->cool_dev[th_zone->cool_dev_size] = + cpufreq_cooling_register(&mask_val); + if (IS_ERR(th_zone->cool_dev[th_zone->cool_dev_size])) { + dev_err(sensor_conf->dev, + "Failed to register cpufreq cooling device\n"); + ret = -EINVAL; + goto err_unregister; + } + th_zone->cool_dev_size++; + } + + th_zone->therm_dev = thermal_zone_device_register( + sensor_conf->name, sensor_conf->trip_data.trip_count, + 0, th_zone, &exynos_dev_ops, NULL, 0, + sensor_conf->trip_data.trigger_falling ? 0 : + IDLE_INTERVAL); + + if (IS_ERR(th_zone->therm_dev)) { + dev_err(sensor_conf->dev, + "Failed to register thermal zone device\n"); + ret = PTR_ERR(th_zone->therm_dev); + goto err_unregister; + } + th_zone->mode = THERMAL_DEVICE_ENABLED; + sensor_conf->pzone_data = th_zone; + + dev_info(sensor_conf->dev, + "Exynos: Thermal zone(%s) registered\n", sensor_conf->name); + + return 0; + +err_unregister: + exynos_unregister_thermal(sensor_conf); + return ret; +} + +/* Un-Register with the in-kernel thermal management */ +void exynos_unregister_thermal(struct thermal_sensor_conf *sensor_conf) +{ + int i; + struct exynos_thermal_zone *th_zone; + + if (!sensor_conf || !sensor_conf->pzone_data) { + pr_err("Invalid temperature sensor configuration data\n"); + return; + } + + th_zone = sensor_conf->pzone_data; + + if (th_zone->therm_dev) + thermal_zone_device_unregister(th_zone->therm_dev); + + for (i = 0; i < th_zone->cool_dev_size; i++) { + if (th_zone->cool_dev[i]) + cpufreq_cooling_unregister(th_zone->cool_dev[i]); + } + + dev_info(sensor_conf->dev, + "Exynos: Kernel Thermal management unregistered\n"); +} diff --git a/drivers/thermal/samsung/exynos_thermal_common.h b/drivers/thermal/samsung/exynos_thermal_common.h new file mode 100644 index 000000000000..3eb2ed9ea3a4 --- /dev/null +++ b/drivers/thermal/samsung/exynos_thermal_common.h @@ -0,0 +1,107 @@ +/* + * exynos_thermal_common.h - Samsung EXYNOS common header file + * + * Copyright (C) 2013 Samsung Electronics + * Amit Daniel Kachhap <amit.daniel@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _EXYNOS_THERMAL_COMMON_H +#define _EXYNOS_THERMAL_COMMON_H + +/* In-kernel thermal framework related macros & definations */ +#define SENSOR_NAME_LEN 16 +#define MAX_TRIP_COUNT 8 +#define MAX_COOLING_DEVICE 4 +#define MAX_THRESHOLD_LEVS 5 + +#define ACTIVE_INTERVAL 500 +#define IDLE_INTERVAL 10000 +#define MCELSIUS 1000 + +/* CPU Zone information */ +#define PANIC_ZONE 4 +#define WARN_ZONE 3 +#define MONITOR_ZONE 2 +#define SAFE_ZONE 1 + +#define GET_ZONE(trip) (trip + 2) +#define GET_TRIP(zone) (zone - 2) + +enum trigger_type { + THROTTLE_ACTIVE = 1, + THROTTLE_PASSIVE, + SW_TRIP, + HW_TRIP, +}; + +/** + * struct freq_clip_table + * @freq_clip_max: maximum frequency allowed for this cooling state. + * @temp_level: Temperature level at which the temperature clipping will + * happen. + * @mask_val: cpumask of the allowed cpu's where the clipping will take place. + * + * This structure is required to be filled and passed to the + * cpufreq_cooling_unregister function. + */ +struct freq_clip_table { + unsigned int freq_clip_max; + unsigned int temp_level; + const struct cpumask *mask_val; +}; + +struct thermal_trip_point_conf { + int trip_val[MAX_TRIP_COUNT]; + int trip_type[MAX_TRIP_COUNT]; + int trip_count; + unsigned char trigger_falling; +}; + +struct thermal_cooling_conf { + struct freq_clip_table freq_data[MAX_TRIP_COUNT]; + int freq_clip_count; +}; + +struct thermal_sensor_conf { + char name[SENSOR_NAME_LEN]; + int (*read_temperature)(void *data); + int (*write_emul_temp)(void *drv_data, unsigned long temp); + struct thermal_trip_point_conf trip_data; + struct thermal_cooling_conf cooling_data; + void *driver_data; + void *pzone_data; + struct device *dev; +}; + +/*Functions used exynos based thermal sensor driver*/ +#ifdef CONFIG_EXYNOS_THERMAL_CORE +void exynos_unregister_thermal(struct thermal_sensor_conf *sensor_conf); +int exynos_register_thermal(struct thermal_sensor_conf *sensor_conf); +void exynos_report_trigger(struct thermal_sensor_conf *sensor_conf); +#else +static inline void +exynos_unregister_thermal(struct thermal_sensor_conf *sensor_conf) { return; } + +static inline int +exynos_register_thermal(struct thermal_sensor_conf *sensor_conf) { return 0; } + +static inline void +exynos_report_trigger(struct thermal_sensor_conf *sensor_conf) { return; } + +#endif /* CONFIG_EXYNOS_THERMAL_CORE */ +#endif /* _EXYNOS_THERMAL_COMMON_H */ diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c new file mode 100644 index 000000000000..b43afda8acd1 --- /dev/null +++ b/drivers/thermal/samsung/exynos_tmu.c @@ -0,0 +1,762 @@ +/* + * exynos_tmu.c - Samsung EXYNOS TMU (Thermal Management Unit) + * + * Copyright (C) 2011 Samsung Electronics + * Donggeun Kim <dg77.kim@samsung.com> + * Amit Daniel Kachhap <amit.kachhap@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/clk.h> +#include <linux/io.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> +#include <linux/platform_device.h> +#include <linux/regulator/consumer.h> + +#include "exynos_thermal_common.h" +#include "exynos_tmu.h" +#include "exynos_tmu_data.h" + +/** + * struct exynos_tmu_data : A structure to hold the private data of the TMU + driver + * @id: identifier of the one instance of the TMU controller. + * @pdata: pointer to the tmu platform/configuration data + * @base: base address of the single instance of the TMU controller. + * @base_common: base address of the common registers of the TMU controller. + * @irq: irq number of the TMU controller. + * @soc: id of the SOC type. + * @irq_work: pointer to the irq work structure. + * @lock: lock to implement synchronization. + * @clk: pointer to the clock structure. + * @temp_error1: fused value of the first point trim. + * @temp_error2: fused value of the second point trim. + * @regulator: pointer to the TMU regulator structure. + * @reg_conf: pointer to structure to register with core thermal. + */ +struct exynos_tmu_data { + int id; + struct exynos_tmu_platform_data *pdata; + void __iomem *base; + void __iomem *base_common; + int irq; + enum soc_type soc; + struct work_struct irq_work; + struct mutex lock; + struct clk *clk; + u8 temp_error1, temp_error2; + struct regulator *regulator; + struct thermal_sensor_conf *reg_conf; +}; + +/* + * TMU treats temperature as a mapped temperature code. + * The temperature is converted differently depending on the calibration type. + */ +static int temp_to_code(struct exynos_tmu_data *data, u8 temp) +{ + struct exynos_tmu_platform_data *pdata = data->pdata; + int temp_code; + + if (pdata->cal_mode == HW_MODE) + return temp; + + if (data->soc == SOC_ARCH_EXYNOS4210) + /* temp should range between 25 and 125 */ + if (temp < 25 || temp > 125) { + temp_code = -EINVAL; + goto out; + } + + switch (pdata->cal_type) { + case TYPE_TWO_POINT_TRIMMING: + temp_code = (temp - pdata->first_point_trim) * + (data->temp_error2 - data->temp_error1) / + (pdata->second_point_trim - pdata->first_point_trim) + + data->temp_error1; + break; + case TYPE_ONE_POINT_TRIMMING: + temp_code = temp + data->temp_error1 - pdata->first_point_trim; + break; + default: + temp_code = temp + pdata->default_temp_offset; + break; + } +out: + return temp_code; +} + +/* + * Calculate a temperature value from a temperature code. + * The unit of the temperature is degree Celsius. + */ +static int code_to_temp(struct exynos_tmu_data *data, u8 temp_code) +{ + struct exynos_tmu_platform_data *pdata = data->pdata; + int temp; + + if (pdata->cal_mode == HW_MODE) + return temp_code; + + if (data->soc == SOC_ARCH_EXYNOS4210) + /* temp_code should range between 75 and 175 */ + if (temp_code < 75 || temp_code > 175) { + temp = -ENODATA; + goto out; + } + + switch (pdata->cal_type) { + case TYPE_TWO_POINT_TRIMMING: + temp = (temp_code - data->temp_error1) * + (pdata->second_point_trim - pdata->first_point_trim) / + (data->temp_error2 - data->temp_error1) + + pdata->first_point_trim; + break; + case TYPE_ONE_POINT_TRIMMING: + temp = temp_code - data->temp_error1 + pdata->first_point_trim; + break; + default: + temp = temp_code - pdata->default_temp_offset; + break; + } +out: + return temp; +} + +static int exynos_tmu_initialize(struct platform_device *pdev) +{ + struct exynos_tmu_data *data = platform_get_drvdata(pdev); + struct exynos_tmu_platform_data *pdata = data->pdata; + const struct exynos_tmu_registers *reg = pdata->registers; + unsigned int status, trim_info = 0, con; + unsigned int rising_threshold = 0, falling_threshold = 0; + int ret = 0, threshold_code, i, trigger_levs = 0; + + mutex_lock(&data->lock); + clk_enable(data->clk); + + if (TMU_SUPPORTS(pdata, READY_STATUS)) { + status = readb(data->base + reg->tmu_status); + if (!status) { + ret = -EBUSY; + goto out; + } + } + + if (TMU_SUPPORTS(pdata, TRIM_RELOAD)) + __raw_writel(1, data->base + reg->triminfo_ctrl); + + if (pdata->cal_mode == HW_MODE) + goto skip_calib_data; + + /* Save trimming info in order to perform calibration */ + if (data->soc == SOC_ARCH_EXYNOS5440) { + /* + * For exynos5440 soc triminfo value is swapped between TMU0 and + * TMU2, so the below logic is needed. + */ + switch (data->id) { + case 0: + trim_info = readl(data->base + + EXYNOS5440_EFUSE_SWAP_OFFSET + reg->triminfo_data); + break; + case 1: + trim_info = readl(data->base + reg->triminfo_data); + break; + case 2: + trim_info = readl(data->base - + EXYNOS5440_EFUSE_SWAP_OFFSET + reg->triminfo_data); + } + } else { + trim_info = readl(data->base + reg->triminfo_data); + } + data->temp_error1 = trim_info & EXYNOS_TMU_TEMP_MASK; + data->temp_error2 = ((trim_info >> reg->triminfo_85_shift) & + EXYNOS_TMU_TEMP_MASK); + + if (!data->temp_error1 || + (pdata->min_efuse_value > data->temp_error1) || + (data->temp_error1 > pdata->max_efuse_value)) + data->temp_error1 = pdata->efuse_value & EXYNOS_TMU_TEMP_MASK; + + if (!data->temp_error2) + data->temp_error2 = + (pdata->efuse_value >> reg->triminfo_85_shift) & + EXYNOS_TMU_TEMP_MASK; + +skip_calib_data: + if (pdata->max_trigger_level > MAX_THRESHOLD_LEVS) { + dev_err(&pdev->dev, "Invalid max trigger level\n"); + goto out; + } + + for (i = 0; i < pdata->max_trigger_level; i++) { + if (!pdata->trigger_levels[i]) + continue; + + if ((pdata->trigger_type[i] == HW_TRIP) && + (!pdata->trigger_levels[pdata->max_trigger_level - 1])) { + dev_err(&pdev->dev, "Invalid hw trigger level\n"); + ret = -EINVAL; + goto out; + } + + /* Count trigger levels except the HW trip*/ + if (!(pdata->trigger_type[i] == HW_TRIP)) + trigger_levs++; + } + + if (data->soc == SOC_ARCH_EXYNOS4210) { + /* Write temperature code for threshold */ + threshold_code = temp_to_code(data, pdata->threshold); + if (threshold_code < 0) { + ret = threshold_code; + goto out; + } + writeb(threshold_code, + data->base + reg->threshold_temp); + for (i = 0; i < trigger_levs; i++) + writeb(pdata->trigger_levels[i], data->base + + reg->threshold_th0 + i * sizeof(reg->threshold_th0)); + + writel(reg->inten_rise_mask, data->base + reg->tmu_intclear); + } else { + /* Write temperature code for rising and falling threshold */ + for (i = 0; + i < trigger_levs && i < EXYNOS_MAX_TRIGGER_PER_REG; i++) { + threshold_code = temp_to_code(data, + pdata->trigger_levels[i]); + if (threshold_code < 0) { + ret = threshold_code; + goto out; + } + rising_threshold |= threshold_code << 8 * i; + if (pdata->threshold_falling) { + threshold_code = temp_to_code(data, + pdata->trigger_levels[i] - + pdata->threshold_falling); + if (threshold_code > 0) + falling_threshold |= + threshold_code << 8 * i; + } + } + + writel(rising_threshold, + data->base + reg->threshold_th0); + writel(falling_threshold, + data->base + reg->threshold_th1); + + writel((reg->inten_rise_mask << reg->inten_rise_shift) | + (reg->inten_fall_mask << reg->inten_fall_shift), + data->base + reg->tmu_intclear); + + /* if last threshold limit is also present */ + i = pdata->max_trigger_level - 1; + if (pdata->trigger_levels[i] && + (pdata->trigger_type[i] == HW_TRIP)) { + threshold_code = temp_to_code(data, + pdata->trigger_levels[i]); + if (threshold_code < 0) { + ret = threshold_code; + goto out; + } + if (i == EXYNOS_MAX_TRIGGER_PER_REG - 1) { + /* 1-4 level to be assigned in th0 reg */ + rising_threshold |= threshold_code << 8 * i; + writel(rising_threshold, + data->base + reg->threshold_th0); + } else if (i == EXYNOS_MAX_TRIGGER_PER_REG) { + /* 5th level to be assigned in th2 reg */ + rising_threshold = + threshold_code << reg->threshold_th3_l0_shift; + writel(rising_threshold, + data->base + reg->threshold_th2); + } + con = readl(data->base + reg->tmu_ctrl); + con |= (1 << reg->therm_trip_en_shift); + writel(con, data->base + reg->tmu_ctrl); + } + } + /*Clear the PMIN in the common TMU register*/ + if (reg->tmu_pmin && !data->id) + writel(0, data->base_common + reg->tmu_pmin); +out: + clk_disable(data->clk); + mutex_unlock(&data->lock); + + return ret; +} + +static void exynos_tmu_control(struct platform_device *pdev, bool on) +{ + struct exynos_tmu_data *data = platform_get_drvdata(pdev); + struct exynos_tmu_platform_data *pdata = data->pdata; + const struct exynos_tmu_registers *reg = pdata->registers; + unsigned int con, interrupt_en, cal_val; + + mutex_lock(&data->lock); + clk_enable(data->clk); + + con = readl(data->base + reg->tmu_ctrl); + + if (pdata->reference_voltage) { + con &= ~(reg->buf_vref_sel_mask << reg->buf_vref_sel_shift); + con |= pdata->reference_voltage << reg->buf_vref_sel_shift; + } + + if (pdata->gain) { + con &= ~(reg->buf_slope_sel_mask << reg->buf_slope_sel_shift); + con |= (pdata->gain << reg->buf_slope_sel_shift); + } + + if (pdata->noise_cancel_mode) { + con &= ~(reg->therm_trip_mode_mask << + reg->therm_trip_mode_shift); + con |= (pdata->noise_cancel_mode << reg->therm_trip_mode_shift); + } + + if (pdata->cal_mode == HW_MODE) { + con &= ~(reg->calib_mode_mask << reg->calib_mode_shift); + cal_val = 0; + switch (pdata->cal_type) { + case TYPE_TWO_POINT_TRIMMING: + cal_val = 3; + break; + case TYPE_ONE_POINT_TRIMMING_85: + cal_val = 2; + break; + case TYPE_ONE_POINT_TRIMMING_25: + cal_val = 1; + break; + case TYPE_NONE: + break; + default: + dev_err(&pdev->dev, "Invalid calibration type, using none\n"); + } + con |= cal_val << reg->calib_mode_shift; + } + + if (on) { + con |= (1 << reg->core_en_shift); + interrupt_en = + pdata->trigger_enable[3] << reg->inten_rise3_shift | + pdata->trigger_enable[2] << reg->inten_rise2_shift | + pdata->trigger_enable[1] << reg->inten_rise1_shift | + pdata->trigger_enable[0] << reg->inten_rise0_shift; + if (TMU_SUPPORTS(pdata, FALLING_TRIP)) + interrupt_en |= + interrupt_en << reg->inten_fall0_shift; + } else { + con &= ~(1 << reg->core_en_shift); + interrupt_en = 0; /* Disable all interrupts */ + } + writel(interrupt_en, data->base + reg->tmu_inten); + writel(con, data->base + reg->tmu_ctrl); + + clk_disable(data->clk); + mutex_unlock(&data->lock); +} + +static int exynos_tmu_read(struct exynos_tmu_data *data) +{ + struct exynos_tmu_platform_data *pdata = data->pdata; + const struct exynos_tmu_registers *reg = pdata->registers; + u8 temp_code; + int temp; + + mutex_lock(&data->lock); + clk_enable(data->clk); + + temp_code = readb(data->base + reg->tmu_cur_temp); + temp = code_to_temp(data, temp_code); + + clk_disable(data->clk); + mutex_unlock(&data->lock); + + return temp; +} + +#ifdef CONFIG_THERMAL_EMULATION +static int exynos_tmu_set_emulation(void *drv_data, unsigned long temp) +{ + struct exynos_tmu_data *data = drv_data; + struct exynos_tmu_platform_data *pdata = data->pdata; + const struct exynos_tmu_registers *reg = pdata->registers; + unsigned int val; + int ret = -EINVAL; + + if (!TMU_SUPPORTS(pdata, EMULATION)) + goto out; + + if (temp && temp < MCELSIUS) + goto out; + + mutex_lock(&data->lock); + clk_enable(data->clk); + + val = readl(data->base + reg->emul_con); + + if (temp) { + temp /= MCELSIUS; + + if (TMU_SUPPORTS(pdata, EMUL_TIME)) { + val &= ~(EXYNOS_EMUL_TIME_MASK << reg->emul_time_shift); + val |= (EXYNOS_EMUL_TIME << reg->emul_time_shift); + } + val &= ~(EXYNOS_EMUL_DATA_MASK << reg->emul_temp_shift); + val |= (temp_to_code(data, temp) << reg->emul_temp_shift) | + EXYNOS_EMUL_ENABLE; + } else { + val &= ~EXYNOS_EMUL_ENABLE; + } + + writel(val, data->base + reg->emul_con); + + clk_disable(data->clk); + mutex_unlock(&data->lock); + return 0; +out: + return ret; +} +#else +static int exynos_tmu_set_emulation(void *drv_data, unsigned long temp) + { return -EINVAL; } +#endif/*CONFIG_THERMAL_EMULATION*/ + +static void exynos_tmu_work(struct work_struct *work) +{ + struct exynos_tmu_data *data = container_of(work, + struct exynos_tmu_data, irq_work); + struct exynos_tmu_platform_data *pdata = data->pdata; + const struct exynos_tmu_registers *reg = pdata->registers; + unsigned int val_irq, val_type; + + /* Find which sensor generated this interrupt */ + if (reg->tmu_irqstatus) { + val_type = readl(data->base_common + reg->tmu_irqstatus); + if (!((val_type >> data->id) & 0x1)) + goto out; + } + + exynos_report_trigger(data->reg_conf); + mutex_lock(&data->lock); + clk_enable(data->clk); + + /* TODO: take action based on particular interrupt */ + val_irq = readl(data->base + reg->tmu_intstat); + /* clear the interrupts */ + writel(val_irq, data->base + reg->tmu_intclear); + + clk_disable(data->clk); + mutex_unlock(&data->lock); +out: + enable_irq(data->irq); +} + +static irqreturn_t exynos_tmu_irq(int irq, void *id) +{ + struct exynos_tmu_data *data = id; + + disable_irq_nosync(irq); + schedule_work(&data->irq_work); + + return IRQ_HANDLED; +} + +static const struct of_device_id exynos_tmu_match[] = { + { + .compatible = "samsung,exynos4210-tmu", + .data = (void *)EXYNOS4210_TMU_DRV_DATA, + }, + { + .compatible = "samsung,exynos4412-tmu", + .data = (void *)EXYNOS5250_TMU_DRV_DATA, + }, + { + .compatible = "samsung,exynos5250-tmu", + .data = (void *)EXYNOS5250_TMU_DRV_DATA, + }, + { + .compatible = "samsung,exynos5440-tmu", + .data = (void *)EXYNOS5440_TMU_DRV_DATA, + }, + {}, +}; +MODULE_DEVICE_TABLE(of, exynos_tmu_match); + +static inline struct exynos_tmu_platform_data *exynos_get_driver_data( + struct platform_device *pdev, int id) +{ + struct exynos_tmu_init_data *data_table; + struct exynos_tmu_platform_data *tmu_data; + const struct of_device_id *match; + + match = of_match_node(exynos_tmu_match, pdev->dev.of_node); + if (!match) + return NULL; + data_table = (struct exynos_tmu_init_data *) match->data; + if (!data_table || id >= data_table->tmu_count) + return NULL; + tmu_data = data_table->tmu_data; + return (struct exynos_tmu_platform_data *) (tmu_data + id); +} + +static int exynos_map_dt_data(struct platform_device *pdev) +{ + struct exynos_tmu_data *data = platform_get_drvdata(pdev); + struct exynos_tmu_platform_data *pdata; + struct resource res; + int ret; + + if (!data || !pdev->dev.of_node) + return -ENODEV; + + /* + * Try enabling the regulator if found + * TODO: Add regulator as an SOC feature, so that regulator enable + * is a compulsory call. + */ + data->regulator = devm_regulator_get(&pdev->dev, "vtmu"); + if (!IS_ERR(data->regulator)) { + ret = regulator_enable(data->regulator); + if (ret) { + dev_err(&pdev->dev, "failed to enable vtmu\n"); + return ret; + } + } else { + dev_info(&pdev->dev, "Regulator node (vtmu) not found\n"); + } + + data->id = of_alias_get_id(pdev->dev.of_node, "tmuctrl"); + if (data->id < 0) + data->id = 0; + + data->irq = irq_of_parse_and_map(pdev->dev.of_node, 0); + if (data->irq <= 0) { + dev_err(&pdev->dev, "failed to get IRQ\n"); + return -ENODEV; + } + + if (of_address_to_resource(pdev->dev.of_node, 0, &res)) { + dev_err(&pdev->dev, "failed to get Resource 0\n"); + return -ENODEV; + } + + data->base = devm_ioremap(&pdev->dev, res.start, resource_size(&res)); + if (!data->base) { + dev_err(&pdev->dev, "Failed to ioremap memory\n"); + return -EADDRNOTAVAIL; + } + + pdata = exynos_get_driver_data(pdev, data->id); + if (!pdata) { + dev_err(&pdev->dev, "No platform init data supplied.\n"); + return -ENODEV; + } + data->pdata = pdata; + /* + * Check if the TMU shares some registers and then try to map the + * memory of common registers. + */ + if (!TMU_SUPPORTS(pdata, SHARED_MEMORY)) + return 0; + + if (of_address_to_resource(pdev->dev.of_node, 1, &res)) { + dev_err(&pdev->dev, "failed to get Resource 1\n"); + return -ENODEV; + } + + data->base_common = devm_ioremap(&pdev->dev, res.start, + resource_size(&res)); + if (!data->base_common) { + dev_err(&pdev->dev, "Failed to ioremap memory\n"); + return -ENOMEM; + } + + return 0; +} + +static int exynos_tmu_probe(struct platform_device *pdev) +{ + struct exynos_tmu_data *data; + struct exynos_tmu_platform_data *pdata; + struct thermal_sensor_conf *sensor_conf; + int ret, i; + + data = devm_kzalloc(&pdev->dev, sizeof(struct exynos_tmu_data), + GFP_KERNEL); + if (!data) { + dev_err(&pdev->dev, "Failed to allocate driver structure\n"); + return -ENOMEM; + } + + platform_set_drvdata(pdev, data); + mutex_init(&data->lock); + + ret = exynos_map_dt_data(pdev); + if (ret) + return ret; + + pdata = data->pdata; + + INIT_WORK(&data->irq_work, exynos_tmu_work); + + data->clk = devm_clk_get(&pdev->dev, "tmu_apbif"); + if (IS_ERR(data->clk)) { + dev_err(&pdev->dev, "Failed to get clock\n"); + return PTR_ERR(data->clk); + } + + ret = clk_prepare(data->clk); + if (ret) + return ret; + + if (pdata->type == SOC_ARCH_EXYNOS || + pdata->type == SOC_ARCH_EXYNOS4210 || + pdata->type == SOC_ARCH_EXYNOS5440) + data->soc = pdata->type; + else { + ret = -EINVAL; + dev_err(&pdev->dev, "Platform not supported\n"); + goto err_clk; + } + + ret = exynos_tmu_initialize(pdev); + if (ret) { + dev_err(&pdev->dev, "Failed to initialize TMU\n"); + goto err_clk; + } + + exynos_tmu_control(pdev, true); + + /* Allocate a structure to register with the exynos core thermal */ + sensor_conf = devm_kzalloc(&pdev->dev, + sizeof(struct thermal_sensor_conf), GFP_KERNEL); + if (!sensor_conf) { + dev_err(&pdev->dev, "Failed to allocate registration struct\n"); + ret = -ENOMEM; + goto err_clk; + } + sprintf(sensor_conf->name, "therm_zone%d", data->id); + sensor_conf->read_temperature = (int (*)(void *))exynos_tmu_read; + sensor_conf->write_emul_temp = + (int (*)(void *, unsigned long))exynos_tmu_set_emulation; + sensor_conf->driver_data = data; + sensor_conf->trip_data.trip_count = pdata->trigger_enable[0] + + pdata->trigger_enable[1] + pdata->trigger_enable[2]+ + pdata->trigger_enable[3]; + + for (i = 0; i < sensor_conf->trip_data.trip_count; i++) { + sensor_conf->trip_data.trip_val[i] = + pdata->threshold + pdata->trigger_levels[i]; + sensor_conf->trip_data.trip_type[i] = + pdata->trigger_type[i]; + } + + sensor_conf->trip_data.trigger_falling = pdata->threshold_falling; + + sensor_conf->cooling_data.freq_clip_count = pdata->freq_tab_count; + for (i = 0; i < pdata->freq_tab_count; i++) { + sensor_conf->cooling_data.freq_data[i].freq_clip_max = + pdata->freq_tab[i].freq_clip_max; + sensor_conf->cooling_data.freq_data[i].temp_level = + pdata->freq_tab[i].temp_level; + } + sensor_conf->dev = &pdev->dev; + /* Register the sensor with thermal management interface */ + ret = exynos_register_thermal(sensor_conf); + if (ret) { + dev_err(&pdev->dev, "Failed to register thermal interface\n"); + goto err_clk; + } + data->reg_conf = sensor_conf; + + ret = devm_request_irq(&pdev->dev, data->irq, exynos_tmu_irq, + IRQF_TRIGGER_RISING | IRQF_SHARED, dev_name(&pdev->dev), data); + if (ret) { + dev_err(&pdev->dev, "Failed to request irq: %d\n", data->irq); + goto err_clk; + } + + return 0; +err_clk: + clk_unprepare(data->clk); + return ret; +} + +static int exynos_tmu_remove(struct platform_device *pdev) +{ + struct exynos_tmu_data *data = platform_get_drvdata(pdev); + + exynos_tmu_control(pdev, false); + + exynos_unregister_thermal(data->reg_conf); + + clk_unprepare(data->clk); + + if (!IS_ERR(data->regulator)) + regulator_disable(data->regulator); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int exynos_tmu_suspend(struct device *dev) +{ + exynos_tmu_control(to_platform_device(dev), false); + + return 0; +} + +static int exynos_tmu_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + + exynos_tmu_initialize(pdev); + exynos_tmu_control(pdev, true); + + return 0; +} + +static SIMPLE_DEV_PM_OPS(exynos_tmu_pm, + exynos_tmu_suspend, exynos_tmu_resume); +#define EXYNOS_TMU_PM (&exynos_tmu_pm) +#else +#define EXYNOS_TMU_PM NULL +#endif + +static struct platform_driver exynos_tmu_driver = { + .driver = { + .name = "exynos-tmu", + .owner = THIS_MODULE, + .pm = EXYNOS_TMU_PM, + .of_match_table = exynos_tmu_match, + }, + .probe = exynos_tmu_probe, + .remove = exynos_tmu_remove, +}; + +module_platform_driver(exynos_tmu_driver); + +MODULE_DESCRIPTION("EXYNOS TMU Driver"); +MODULE_AUTHOR("Donggeun Kim <dg77.kim@samsung.com>"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:exynos-tmu"); diff --git a/drivers/thermal/samsung/exynos_tmu.h b/drivers/thermal/samsung/exynos_tmu.h new file mode 100644 index 000000000000..b364c9eee701 --- /dev/null +++ b/drivers/thermal/samsung/exynos_tmu.h @@ -0,0 +1,311 @@ +/* + * exynos_tmu.h - Samsung EXYNOS TMU (Thermal Management Unit) + * + * Copyright (C) 2011 Samsung Electronics + * Donggeun Kim <dg77.kim@samsung.com> + * Amit Daniel Kachhap <amit.daniel@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _EXYNOS_TMU_H +#define _EXYNOS_TMU_H +#include <linux/cpu_cooling.h> + +#include "exynos_thermal_common.h" + +enum calibration_type { + TYPE_ONE_POINT_TRIMMING, + TYPE_ONE_POINT_TRIMMING_25, + TYPE_ONE_POINT_TRIMMING_85, + TYPE_TWO_POINT_TRIMMING, + TYPE_NONE, +}; + +enum calibration_mode { + SW_MODE, + HW_MODE, +}; + +enum soc_type { + SOC_ARCH_EXYNOS4210 = 1, + SOC_ARCH_EXYNOS, + SOC_ARCH_EXYNOS5440, +}; + +/** + * EXYNOS TMU supported features. + * TMU_SUPPORT_EMULATION - This features is used to set user defined + * temperature to the TMU controller. + * TMU_SUPPORT_MULTI_INST - This features denotes that the soc + * has many instances of TMU. + * TMU_SUPPORT_TRIM_RELOAD - This features shows that trimming can + * be reloaded. + * TMU_SUPPORT_FALLING_TRIP - This features shows that interrupt can + * be registered for falling trips also. + * TMU_SUPPORT_READY_STATUS - This feature tells that the TMU current + * state(active/idle) can be checked. + * TMU_SUPPORT_EMUL_TIME - This features allows to set next temp emulation + * sample time. + * TMU_SUPPORT_SHARED_MEMORY - This feature tells that the different TMU + * sensors shares some common registers. + * TMU_SUPPORT - macro to compare the above features with the supplied. + */ +#define TMU_SUPPORT_EMULATION BIT(0) +#define TMU_SUPPORT_MULTI_INST BIT(1) +#define TMU_SUPPORT_TRIM_RELOAD BIT(2) +#define TMU_SUPPORT_FALLING_TRIP BIT(3) +#define TMU_SUPPORT_READY_STATUS BIT(4) +#define TMU_SUPPORT_EMUL_TIME BIT(5) +#define TMU_SUPPORT_SHARED_MEMORY BIT(6) + +#define TMU_SUPPORTS(a, b) (a->features & TMU_SUPPORT_ ## b) + +/** + * struct exynos_tmu_register - register descriptors to access registers and + * bitfields. The register validity, offsets and bitfield values may vary + * slightly across different exynos SOC's. + * @triminfo_data: register containing 2 pont trimming data + * @triminfo_25_shift: shift bit of the 25 C trim value in triminfo_data reg. + * @triminfo_85_shift: shift bit of the 85 C trim value in triminfo_data reg. + * @triminfo_ctrl: trim info controller register. + * @triminfo_reload_shift: shift of triminfo reload enable bit in triminfo_ctrl + reg. + * @tmu_ctrl: TMU main controller register. + * @buf_vref_sel_shift: shift bits of reference voltage in tmu_ctrl register. + * @buf_vref_sel_mask: mask bits of reference voltage in tmu_ctrl register. + * @therm_trip_mode_shift: shift bits of tripping mode in tmu_ctrl register. + * @therm_trip_mode_mask: mask bits of tripping mode in tmu_ctrl register. + * @therm_trip_en_shift: shift bits of tripping enable in tmu_ctrl register. + * @buf_slope_sel_shift: shift bits of amplifier gain value in tmu_ctrl + register. + * @buf_slope_sel_mask: mask bits of amplifier gain value in tmu_ctrl register. + * @calib_mode_shift: shift bits of calibration mode value in tmu_ctrl + register. + * @calib_mode_mask: mask bits of calibration mode value in tmu_ctrl + register. + * @therm_trip_tq_en_shift: shift bits of thermal trip enable by TQ pin in + tmu_ctrl register. + * @core_en_shift: shift bits of TMU core enable bit in tmu_ctrl register. + * @tmu_status: register drescribing the TMU status. + * @tmu_cur_temp: register containing the current temperature of the TMU. + * @tmu_cur_temp_shift: shift bits of current temp value in tmu_cur_temp + register. + * @threshold_temp: register containing the base threshold level. + * @threshold_th0: Register containing first set of rising levels. + * @threshold_th0_l0_shift: shift bits of level0 threshold temperature. + * @threshold_th0_l1_shift: shift bits of level1 threshold temperature. + * @threshold_th0_l2_shift: shift bits of level2 threshold temperature. + * @threshold_th0_l3_shift: shift bits of level3 threshold temperature. + * @threshold_th1: Register containing second set of rising levels. + * @threshold_th1_l0_shift: shift bits of level0 threshold temperature. + * @threshold_th1_l1_shift: shift bits of level1 threshold temperature. + * @threshold_th1_l2_shift: shift bits of level2 threshold temperature. + * @threshold_th1_l3_shift: shift bits of level3 threshold temperature. + * @threshold_th2: Register containing third set of rising levels. + * @threshold_th2_l0_shift: shift bits of level0 threshold temperature. + * @threshold_th3: Register containing fourth set of rising levels. + * @threshold_th3_l0_shift: shift bits of level0 threshold temperature. + * @tmu_inten: register containing the different threshold interrupt + enable bits. + * @inten_rise_shift: shift bits of all rising interrupt bits. + * @inten_rise_mask: mask bits of all rising interrupt bits. + * @inten_fall_shift: shift bits of all rising interrupt bits. + * @inten_fall_mask: mask bits of all rising interrupt bits. + * @inten_rise0_shift: shift bits of rising 0 interrupt bits. + * @inten_rise1_shift: shift bits of rising 1 interrupt bits. + * @inten_rise2_shift: shift bits of rising 2 interrupt bits. + * @inten_rise3_shift: shift bits of rising 3 interrupt bits. + * @inten_fall0_shift: shift bits of falling 0 interrupt bits. + * @inten_fall1_shift: shift bits of falling 1 interrupt bits. + * @inten_fall2_shift: shift bits of falling 2 interrupt bits. + * @inten_fall3_shift: shift bits of falling 3 interrupt bits. + * @tmu_intstat: Register containing the interrupt status values. + * @tmu_intclear: Register for clearing the raised interrupt status. + * @emul_con: TMU emulation controller register. + * @emul_temp_shift: shift bits of emulation temperature. + * @emul_time_shift: shift bits of emulation time. + * @emul_time_mask: mask bits of emulation time. + * @tmu_irqstatus: register to find which TMU generated interrupts. + * @tmu_pmin: register to get/set the Pmin value. + */ +struct exynos_tmu_registers { + u32 triminfo_data; + u32 triminfo_25_shift; + u32 triminfo_85_shift; + + u32 triminfo_ctrl; + u32 triminfo_reload_shift; + + u32 tmu_ctrl; + u32 buf_vref_sel_shift; + u32 buf_vref_sel_mask; + u32 therm_trip_mode_shift; + u32 therm_trip_mode_mask; + u32 therm_trip_en_shift; + u32 buf_slope_sel_shift; + u32 buf_slope_sel_mask; + u32 calib_mode_shift; + u32 calib_mode_mask; + u32 therm_trip_tq_en_shift; + u32 core_en_shift; + + u32 tmu_status; + + u32 tmu_cur_temp; + u32 tmu_cur_temp_shift; + + u32 threshold_temp; + + u32 threshold_th0; + u32 threshold_th0_l0_shift; + u32 threshold_th0_l1_shift; + u32 threshold_th0_l2_shift; + u32 threshold_th0_l3_shift; + + u32 threshold_th1; + u32 threshold_th1_l0_shift; + u32 threshold_th1_l1_shift; + u32 threshold_th1_l2_shift; + u32 threshold_th1_l3_shift; + + u32 threshold_th2; + u32 threshold_th2_l0_shift; + + u32 threshold_th3; + u32 threshold_th3_l0_shift; + + u32 tmu_inten; + u32 inten_rise_shift; + u32 inten_rise_mask; + u32 inten_fall_shift; + u32 inten_fall_mask; + u32 inten_rise0_shift; + u32 inten_rise1_shift; + u32 inten_rise2_shift; + u32 inten_rise3_shift; + u32 inten_fall0_shift; + u32 inten_fall1_shift; + u32 inten_fall2_shift; + u32 inten_fall3_shift; + + u32 tmu_intstat; + + u32 tmu_intclear; + + u32 emul_con; + u32 emul_temp_shift; + u32 emul_time_shift; + u32 emul_time_mask; + + u32 tmu_irqstatus; + u32 tmu_pmin; +}; + +/** + * struct exynos_tmu_platform_data + * @threshold: basic temperature for generating interrupt + * 25 <= threshold <= 125 [unit: degree Celsius] + * @threshold_falling: differntial value for setting threshold + * of temperature falling interrupt. + * @trigger_levels: array for each interrupt levels + * [unit: degree Celsius] + * 0: temperature for trigger_level0 interrupt + * condition for trigger_level0 interrupt: + * current temperature > threshold + trigger_levels[0] + * 1: temperature for trigger_level1 interrupt + * condition for trigger_level1 interrupt: + * current temperature > threshold + trigger_levels[1] + * 2: temperature for trigger_level2 interrupt + * condition for trigger_level2 interrupt: + * current temperature > threshold + trigger_levels[2] + * 3: temperature for trigger_level3 interrupt + * condition for trigger_level3 interrupt: + * current temperature > threshold + trigger_levels[3] + * @trigger_type: defines the type of trigger. Possible values are, + * THROTTLE_ACTIVE trigger type + * THROTTLE_PASSIVE trigger type + * SW_TRIP trigger type + * HW_TRIP + * @trigger_enable[]: array to denote which trigger levels are enabled. + * 1 = enable trigger_level[] interrupt, + * 0 = disable trigger_level[] interrupt + * @max_trigger_level: max trigger level supported by the TMU + * @gain: gain of amplifier in the positive-TC generator block + * 0 <= gain <= 15 + * @reference_voltage: reference voltage of amplifier + * in the positive-TC generator block + * 0 <= reference_voltage <= 31 + * @noise_cancel_mode: noise cancellation mode + * 000, 100, 101, 110 and 111 can be different modes + * @type: determines the type of SOC + * @efuse_value: platform defined fuse value + * @min_efuse_value: minimum valid trimming data + * @max_efuse_value: maximum valid trimming data + * @first_point_trim: temp value of the first point trimming + * @second_point_trim: temp value of the second point trimming + * @default_temp_offset: default temperature offset in case of no trimming + * @cal_type: calibration type for temperature + * @cal_mode: calibration mode for temperature + * @freq_clip_table: Table representing frequency reduction percentage. + * @freq_tab_count: Count of the above table as frequency reduction may + * applicable to only some of the trigger levels. + * @registers: Pointer to structure containing all the TMU controller registers + * and bitfields shifts and masks. + * @features: a bitfield value indicating the features supported in SOC like + * emulation, multi instance etc + * + * This structure is required for configuration of exynos_tmu driver. + */ +struct exynos_tmu_platform_data { + u8 threshold; + u8 threshold_falling; + u8 trigger_levels[MAX_TRIP_COUNT]; + enum trigger_type trigger_type[MAX_TRIP_COUNT]; + bool trigger_enable[MAX_TRIP_COUNT]; + u8 max_trigger_level; + u8 gain; + u8 reference_voltage; + u8 noise_cancel_mode; + + u32 efuse_value; + u32 min_efuse_value; + u32 max_efuse_value; + u8 first_point_trim; + u8 second_point_trim; + u8 default_temp_offset; + + enum calibration_type cal_type; + enum calibration_mode cal_mode; + enum soc_type type; + struct freq_clip_table freq_tab[4]; + unsigned int freq_tab_count; + const struct exynos_tmu_registers *registers; + unsigned int features; +}; + +/** + * struct exynos_tmu_init_data + * @tmu_count: number of TMU instances. + * @tmu_data: platform data of all TMU instances. + * This structure is required to store data for multi-instance exynos tmu + * driver. + */ +struct exynos_tmu_init_data { + int tmu_count; + struct exynos_tmu_platform_data tmu_data[]; +}; + +#endif /* _EXYNOS_TMU_H */ diff --git a/drivers/thermal/samsung/exynos_tmu_data.c b/drivers/thermal/samsung/exynos_tmu_data.c new file mode 100644 index 000000000000..9002499c1f69 --- /dev/null +++ b/drivers/thermal/samsung/exynos_tmu_data.c @@ -0,0 +1,250 @@ +/* + * exynos_tmu_data.c - Samsung EXYNOS tmu data file + * + * Copyright (C) 2013 Samsung Electronics + * Amit Daniel Kachhap <amit.daniel@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include "exynos_thermal_common.h" +#include "exynos_tmu.h" +#include "exynos_tmu_data.h" + +#if defined(CONFIG_CPU_EXYNOS4210) +static const struct exynos_tmu_registers exynos4210_tmu_registers = { + .triminfo_data = EXYNOS_TMU_REG_TRIMINFO, + .triminfo_25_shift = EXYNOS_TRIMINFO_25_SHIFT, + .triminfo_85_shift = EXYNOS_TRIMINFO_85_SHIFT, + .tmu_ctrl = EXYNOS_TMU_REG_CONTROL, + .buf_vref_sel_shift = EXYNOS_TMU_REF_VOLTAGE_SHIFT, + .buf_vref_sel_mask = EXYNOS_TMU_REF_VOLTAGE_MASK, + .buf_slope_sel_shift = EXYNOS_TMU_BUF_SLOPE_SEL_SHIFT, + .buf_slope_sel_mask = EXYNOS_TMU_BUF_SLOPE_SEL_MASK, + .core_en_shift = EXYNOS_TMU_CORE_EN_SHIFT, + .tmu_status = EXYNOS_TMU_REG_STATUS, + .tmu_cur_temp = EXYNOS_TMU_REG_CURRENT_TEMP, + .threshold_temp = EXYNOS4210_TMU_REG_THRESHOLD_TEMP, + .threshold_th0 = EXYNOS4210_TMU_REG_TRIG_LEVEL0, + .tmu_inten = EXYNOS_TMU_REG_INTEN, + .inten_rise_mask = EXYNOS4210_TMU_TRIG_LEVEL_MASK, + .inten_rise0_shift = EXYNOS_TMU_INTEN_RISE0_SHIFT, + .inten_rise1_shift = EXYNOS_TMU_INTEN_RISE1_SHIFT, + .inten_rise2_shift = EXYNOS_TMU_INTEN_RISE2_SHIFT, + .inten_rise3_shift = EXYNOS_TMU_INTEN_RISE3_SHIFT, + .tmu_intstat = EXYNOS_TMU_REG_INTSTAT, + .tmu_intclear = EXYNOS_TMU_REG_INTCLEAR, +}; + +struct exynos_tmu_init_data const exynos4210_default_tmu_data = { + .tmu_data = { + { + .threshold = 80, + .trigger_levels[0] = 5, + .trigger_levels[1] = 20, + .trigger_levels[2] = 30, + .trigger_enable[0] = true, + .trigger_enable[1] = true, + .trigger_enable[2] = true, + .trigger_enable[3] = false, + .trigger_type[0] = THROTTLE_ACTIVE, + .trigger_type[1] = THROTTLE_ACTIVE, + .trigger_type[2] = SW_TRIP, + .max_trigger_level = 4, + .gain = 15, + .reference_voltage = 7, + .cal_type = TYPE_ONE_POINT_TRIMMING, + .min_efuse_value = 40, + .max_efuse_value = 100, + .first_point_trim = 25, + .second_point_trim = 85, + .default_temp_offset = 50, + .freq_tab[0] = { + .freq_clip_max = 800 * 1000, + .temp_level = 85, + }, + .freq_tab[1] = { + .freq_clip_max = 200 * 1000, + .temp_level = 100, + }, + .freq_tab_count = 2, + .type = SOC_ARCH_EXYNOS4210, + .registers = &exynos4210_tmu_registers, + .features = TMU_SUPPORT_READY_STATUS, + }, + }, + .tmu_count = 1, +}; +#endif + +#if defined(CONFIG_SOC_EXYNOS5250) || defined(CONFIG_SOC_EXYNOS4412) +static const struct exynos_tmu_registers exynos5250_tmu_registers = { + .triminfo_data = EXYNOS_TMU_REG_TRIMINFO, + .triminfo_25_shift = EXYNOS_TRIMINFO_25_SHIFT, + .triminfo_85_shift = EXYNOS_TRIMINFO_85_SHIFT, + .triminfo_ctrl = EXYNOS_TMU_TRIMINFO_CON, + .triminfo_reload_shift = EXYNOS_TRIMINFO_RELOAD_SHIFT, + .tmu_ctrl = EXYNOS_TMU_REG_CONTROL, + .buf_vref_sel_shift = EXYNOS_TMU_REF_VOLTAGE_SHIFT, + .buf_vref_sel_mask = EXYNOS_TMU_REF_VOLTAGE_MASK, + .therm_trip_mode_shift = EXYNOS_TMU_TRIP_MODE_SHIFT, + .therm_trip_mode_mask = EXYNOS_TMU_TRIP_MODE_MASK, + .therm_trip_en_shift = EXYNOS_TMU_THERM_TRIP_EN_SHIFT, + .buf_slope_sel_shift = EXYNOS_TMU_BUF_SLOPE_SEL_SHIFT, + .buf_slope_sel_mask = EXYNOS_TMU_BUF_SLOPE_SEL_MASK, + .core_en_shift = EXYNOS_TMU_CORE_EN_SHIFT, + .tmu_status = EXYNOS_TMU_REG_STATUS, + .tmu_cur_temp = EXYNOS_TMU_REG_CURRENT_TEMP, + .threshold_th0 = EXYNOS_THD_TEMP_RISE, + .threshold_th1 = EXYNOS_THD_TEMP_FALL, + .tmu_inten = EXYNOS_TMU_REG_INTEN, + .inten_rise_mask = EXYNOS_TMU_RISE_INT_MASK, + .inten_rise_shift = EXYNOS_TMU_RISE_INT_SHIFT, + .inten_fall_mask = EXYNOS_TMU_FALL_INT_MASK, + .inten_fall_shift = EXYNOS_TMU_FALL_INT_SHIFT, + .inten_rise0_shift = EXYNOS_TMU_INTEN_RISE0_SHIFT, + .inten_rise1_shift = EXYNOS_TMU_INTEN_RISE1_SHIFT, + .inten_rise2_shift = EXYNOS_TMU_INTEN_RISE2_SHIFT, + .inten_rise3_shift = EXYNOS_TMU_INTEN_RISE3_SHIFT, + .inten_fall0_shift = EXYNOS_TMU_INTEN_FALL0_SHIFT, + .tmu_intstat = EXYNOS_TMU_REG_INTSTAT, + .tmu_intclear = EXYNOS_TMU_REG_INTCLEAR, + .emul_con = EXYNOS_EMUL_CON, + .emul_temp_shift = EXYNOS_EMUL_DATA_SHIFT, + .emul_time_shift = EXYNOS_EMUL_TIME_SHIFT, + .emul_time_mask = EXYNOS_EMUL_TIME_MASK, +}; + +#define EXYNOS5250_TMU_DATA \ + .threshold_falling = 10, \ + .trigger_levels[0] = 85, \ + .trigger_levels[1] = 103, \ + .trigger_levels[2] = 110, \ + .trigger_levels[3] = 120, \ + .trigger_enable[0] = true, \ + .trigger_enable[1] = true, \ + .trigger_enable[2] = true, \ + .trigger_enable[3] = false, \ + .trigger_type[0] = THROTTLE_ACTIVE, \ + .trigger_type[1] = THROTTLE_ACTIVE, \ + .trigger_type[2] = SW_TRIP, \ + .trigger_type[3] = HW_TRIP, \ + .max_trigger_level = 4, \ + .gain = 8, \ + .reference_voltage = 16, \ + .noise_cancel_mode = 4, \ + .cal_type = TYPE_ONE_POINT_TRIMMING, \ + .efuse_value = 55, \ + .min_efuse_value = 40, \ + .max_efuse_value = 100, \ + .first_point_trim = 25, \ + .second_point_trim = 85, \ + .default_temp_offset = 50, \ + .freq_tab[0] = { \ + .freq_clip_max = 800 * 1000, \ + .temp_level = 85, \ + }, \ + .freq_tab[1] = { \ + .freq_clip_max = 200 * 1000, \ + .temp_level = 103, \ + }, \ + .freq_tab_count = 2, \ + .type = SOC_ARCH_EXYNOS, \ + .registers = &exynos5250_tmu_registers, \ + .features = (TMU_SUPPORT_EMULATION | TMU_SUPPORT_TRIM_RELOAD | \ + TMU_SUPPORT_FALLING_TRIP | TMU_SUPPORT_READY_STATUS | \ + TMU_SUPPORT_EMUL_TIME) + +struct exynos_tmu_init_data const exynos5250_default_tmu_data = { + .tmu_data = { + { EXYNOS5250_TMU_DATA }, + }, + .tmu_count = 1, +}; +#endif + +#if defined(CONFIG_SOC_EXYNOS5440) +static const struct exynos_tmu_registers exynos5440_tmu_registers = { + .triminfo_data = EXYNOS5440_TMU_S0_7_TRIM, + .triminfo_25_shift = EXYNOS_TRIMINFO_25_SHIFT, + .triminfo_85_shift = EXYNOS_TRIMINFO_85_SHIFT, + .tmu_ctrl = EXYNOS5440_TMU_S0_7_CTRL, + .buf_vref_sel_shift = EXYNOS_TMU_REF_VOLTAGE_SHIFT, + .buf_vref_sel_mask = EXYNOS_TMU_REF_VOLTAGE_MASK, + .therm_trip_mode_shift = EXYNOS_TMU_TRIP_MODE_SHIFT, + .therm_trip_mode_mask = EXYNOS_TMU_TRIP_MODE_MASK, + .therm_trip_en_shift = EXYNOS_TMU_THERM_TRIP_EN_SHIFT, + .buf_slope_sel_shift = EXYNOS_TMU_BUF_SLOPE_SEL_SHIFT, + .buf_slope_sel_mask = EXYNOS_TMU_BUF_SLOPE_SEL_MASK, + .calib_mode_shift = EXYNOS_TMU_CALIB_MODE_SHIFT, + .calib_mode_mask = EXYNOS_TMU_CALIB_MODE_MASK, + .core_en_shift = EXYNOS_TMU_CORE_EN_SHIFT, + .tmu_status = EXYNOS5440_TMU_S0_7_STATUS, + .tmu_cur_temp = EXYNOS5440_TMU_S0_7_TEMP, + .threshold_th0 = EXYNOS5440_TMU_S0_7_TH0, + .threshold_th1 = EXYNOS5440_TMU_S0_7_TH1, + .threshold_th2 = EXYNOS5440_TMU_S0_7_TH2, + .threshold_th3_l0_shift = EXYNOS5440_TMU_TH_RISE4_SHIFT, + .tmu_inten = EXYNOS5440_TMU_S0_7_IRQEN, + .inten_rise_mask = EXYNOS5440_TMU_RISE_INT_MASK, + .inten_rise_shift = EXYNOS5440_TMU_RISE_INT_SHIFT, + .inten_fall_mask = EXYNOS5440_TMU_FALL_INT_MASK, + .inten_fall_shift = EXYNOS5440_TMU_FALL_INT_SHIFT, + .inten_rise0_shift = EXYNOS5440_TMU_INTEN_RISE0_SHIFT, + .inten_rise1_shift = EXYNOS5440_TMU_INTEN_RISE1_SHIFT, + .inten_rise2_shift = EXYNOS5440_TMU_INTEN_RISE2_SHIFT, + .inten_rise3_shift = EXYNOS5440_TMU_INTEN_RISE3_SHIFT, + .inten_fall0_shift = EXYNOS5440_TMU_INTEN_FALL0_SHIFT, + .tmu_intstat = EXYNOS5440_TMU_S0_7_IRQ, + .tmu_intclear = EXYNOS5440_TMU_S0_7_IRQ, + .tmu_irqstatus = EXYNOS5440_TMU_IRQ_STATUS, + .emul_con = EXYNOS5440_TMU_S0_7_DEBUG, + .emul_temp_shift = EXYNOS_EMUL_DATA_SHIFT, + .tmu_pmin = EXYNOS5440_TMU_PMIN, +}; + +#define EXYNOS5440_TMU_DATA \ + .trigger_levels[0] = 100, \ + .trigger_levels[4] = 105, \ + .trigger_enable[0] = 1, \ + .trigger_type[0] = SW_TRIP, \ + .trigger_type[4] = HW_TRIP, \ + .max_trigger_level = 5, \ + .gain = 5, \ + .reference_voltage = 16, \ + .noise_cancel_mode = 4, \ + .cal_type = TYPE_ONE_POINT_TRIMMING, \ + .cal_mode = 0, \ + .efuse_value = 0x5b2d, \ + .min_efuse_value = 16, \ + .max_efuse_value = 76, \ + .first_point_trim = 25, \ + .second_point_trim = 70, \ + .default_temp_offset = 25, \ + .type = SOC_ARCH_EXYNOS5440, \ + .registers = &exynos5440_tmu_registers, \ + .features = (TMU_SUPPORT_EMULATION | TMU_SUPPORT_FALLING_TRIP | \ + TMU_SUPPORT_MULTI_INST | TMU_SUPPORT_SHARED_MEMORY), + +struct exynos_tmu_init_data const exynos5440_default_tmu_data = { + .tmu_data = { + { EXYNOS5440_TMU_DATA } , + { EXYNOS5440_TMU_DATA } , + { EXYNOS5440_TMU_DATA } , + }, + .tmu_count = 3, +}; +#endif diff --git a/drivers/thermal/samsung/exynos_tmu_data.h b/drivers/thermal/samsung/exynos_tmu_data.h new file mode 100644 index 000000000000..dc7feb51099b --- /dev/null +++ b/drivers/thermal/samsung/exynos_tmu_data.h @@ -0,0 +1,155 @@ +/* + * exynos_tmu_data.h - Samsung EXYNOS tmu data header file + * + * Copyright (C) 2013 Samsung Electronics + * Amit Daniel Kachhap <amit.daniel@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _EXYNOS_TMU_DATA_H +#define _EXYNOS_TMU_DATA_H + +/* Exynos generic registers */ +#define EXYNOS_TMU_REG_TRIMINFO 0x0 +#define EXYNOS_TMU_REG_CONTROL 0x20 +#define EXYNOS_TMU_REG_STATUS 0x28 +#define EXYNOS_TMU_REG_CURRENT_TEMP 0x40 +#define EXYNOS_TMU_REG_INTEN 0x70 +#define EXYNOS_TMU_REG_INTSTAT 0x74 +#define EXYNOS_TMU_REG_INTCLEAR 0x78 + +#define EXYNOS_TMU_TEMP_MASK 0xff +#define EXYNOS_TMU_REF_VOLTAGE_SHIFT 24 +#define EXYNOS_TMU_REF_VOLTAGE_MASK 0x1f +#define EXYNOS_TMU_BUF_SLOPE_SEL_MASK 0xf +#define EXYNOS_TMU_BUF_SLOPE_SEL_SHIFT 8 +#define EXYNOS_TMU_CORE_EN_SHIFT 0 + +/* Exynos4210 specific registers */ +#define EXYNOS4210_TMU_REG_THRESHOLD_TEMP 0x44 +#define EXYNOS4210_TMU_REG_TRIG_LEVEL0 0x50 +#define EXYNOS4210_TMU_REG_TRIG_LEVEL1 0x54 +#define EXYNOS4210_TMU_REG_TRIG_LEVEL2 0x58 +#define EXYNOS4210_TMU_REG_TRIG_LEVEL3 0x5C +#define EXYNOS4210_TMU_REG_PAST_TEMP0 0x60 +#define EXYNOS4210_TMU_REG_PAST_TEMP1 0x64 +#define EXYNOS4210_TMU_REG_PAST_TEMP2 0x68 +#define EXYNOS4210_TMU_REG_PAST_TEMP3 0x6C + +#define EXYNOS4210_TMU_TRIG_LEVEL0_MASK 0x1 +#define EXYNOS4210_TMU_TRIG_LEVEL1_MASK 0x10 +#define EXYNOS4210_TMU_TRIG_LEVEL2_MASK 0x100 +#define EXYNOS4210_TMU_TRIG_LEVEL3_MASK 0x1000 +#define EXYNOS4210_TMU_TRIG_LEVEL_MASK 0x1111 +#define EXYNOS4210_TMU_INTCLEAR_VAL 0x1111 + +/* Exynos5250 and Exynos4412 specific registers */ +#define EXYNOS_TMU_TRIMINFO_CON 0x14 +#define EXYNOS_THD_TEMP_RISE 0x50 +#define EXYNOS_THD_TEMP_FALL 0x54 +#define EXYNOS_EMUL_CON 0x80 + +#define EXYNOS_TRIMINFO_RELOAD_SHIFT 1 +#define EXYNOS_TRIMINFO_25_SHIFT 0 +#define EXYNOS_TRIMINFO_85_SHIFT 8 +#define EXYNOS_TMU_RISE_INT_MASK 0x111 +#define EXYNOS_TMU_RISE_INT_SHIFT 0 +#define EXYNOS_TMU_FALL_INT_MASK 0x111 +#define EXYNOS_TMU_FALL_INT_SHIFT 12 +#define EXYNOS_TMU_CLEAR_RISE_INT 0x111 +#define EXYNOS_TMU_CLEAR_FALL_INT (0x111 << 12) +#define EXYNOS_TMU_TRIP_MODE_SHIFT 13 +#define EXYNOS_TMU_TRIP_MODE_MASK 0x7 +#define EXYNOS_TMU_THERM_TRIP_EN_SHIFT 12 +#define EXYNOS_TMU_CALIB_MODE_SHIFT 4 +#define EXYNOS_TMU_CALIB_MODE_MASK 0x3 + +#define EXYNOS_TMU_INTEN_RISE0_SHIFT 0 +#define EXYNOS_TMU_INTEN_RISE1_SHIFT 4 +#define EXYNOS_TMU_INTEN_RISE2_SHIFT 8 +#define EXYNOS_TMU_INTEN_RISE3_SHIFT 12 +#define EXYNOS_TMU_INTEN_FALL0_SHIFT 16 +#define EXYNOS_TMU_INTEN_FALL1_SHIFT 20 +#define EXYNOS_TMU_INTEN_FALL2_SHIFT 24 + +#define EXYNOS_EMUL_TIME 0x57F0 +#define EXYNOS_EMUL_TIME_MASK 0xffff +#define EXYNOS_EMUL_TIME_SHIFT 16 +#define EXYNOS_EMUL_DATA_SHIFT 8 +#define EXYNOS_EMUL_DATA_MASK 0xFF +#define EXYNOS_EMUL_ENABLE 0x1 + +#define EXYNOS_MAX_TRIGGER_PER_REG 4 + +/*exynos5440 specific registers*/ +#define EXYNOS5440_TMU_S0_7_TRIM 0x000 +#define EXYNOS5440_TMU_S0_7_CTRL 0x020 +#define EXYNOS5440_TMU_S0_7_DEBUG 0x040 +#define EXYNOS5440_TMU_S0_7_STATUS 0x060 +#define EXYNOS5440_TMU_S0_7_TEMP 0x0f0 +#define EXYNOS5440_TMU_S0_7_TH0 0x110 +#define EXYNOS5440_TMU_S0_7_TH1 0x130 +#define EXYNOS5440_TMU_S0_7_TH2 0x150 +#define EXYNOS5440_TMU_S0_7_EVTEN 0x1F0 +#define EXYNOS5440_TMU_S0_7_IRQEN 0x210 +#define EXYNOS5440_TMU_S0_7_IRQ 0x230 +/* exynos5440 common registers */ +#define EXYNOS5440_TMU_IRQ_STATUS 0x000 +#define EXYNOS5440_TMU_PMIN 0x004 +#define EXYNOS5440_TMU_TEMP 0x008 + +#define EXYNOS5440_TMU_RISE_INT_MASK 0xf +#define EXYNOS5440_TMU_RISE_INT_SHIFT 0 +#define EXYNOS5440_TMU_FALL_INT_MASK 0xf +#define EXYNOS5440_TMU_FALL_INT_SHIFT 4 +#define EXYNOS5440_TMU_INTEN_RISE0_SHIFT 0 +#define EXYNOS5440_TMU_INTEN_RISE1_SHIFT 1 +#define EXYNOS5440_TMU_INTEN_RISE2_SHIFT 2 +#define EXYNOS5440_TMU_INTEN_RISE3_SHIFT 3 +#define EXYNOS5440_TMU_INTEN_FALL0_SHIFT 4 +#define EXYNOS5440_TMU_INTEN_FALL1_SHIFT 5 +#define EXYNOS5440_TMU_INTEN_FALL2_SHIFT 6 +#define EXYNOS5440_TMU_INTEN_FALL3_SHIFT 7 +#define EXYNOS5440_TMU_TH_RISE0_SHIFT 0 +#define EXYNOS5440_TMU_TH_RISE1_SHIFT 8 +#define EXYNOS5440_TMU_TH_RISE2_SHIFT 16 +#define EXYNOS5440_TMU_TH_RISE3_SHIFT 24 +#define EXYNOS5440_TMU_TH_RISE4_SHIFT 24 +#define EXYNOS5440_EFUSE_SWAP_OFFSET 8 + +#if defined(CONFIG_CPU_EXYNOS4210) +extern struct exynos_tmu_init_data const exynos4210_default_tmu_data; +#define EXYNOS4210_TMU_DRV_DATA (&exynos4210_default_tmu_data) +#else +#define EXYNOS4210_TMU_DRV_DATA (NULL) +#endif + +#if (defined(CONFIG_SOC_EXYNOS5250) || defined(CONFIG_SOC_EXYNOS4412)) +extern struct exynos_tmu_init_data const exynos5250_default_tmu_data; +#define EXYNOS5250_TMU_DRV_DATA (&exynos5250_default_tmu_data) +#else +#define EXYNOS5250_TMU_DRV_DATA (NULL) +#endif + +#if defined(CONFIG_SOC_EXYNOS5440) +extern struct exynos_tmu_init_data const exynos5440_default_tmu_data; +#define EXYNOS5440_TMU_DRV_DATA (&exynos5440_default_tmu_data) +#else +#define EXYNOS5440_TMU_DRV_DATA (NULL) +#endif + +#endif /*_EXYNOS_TMU_DATA_H*/ diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c index 4d4ddae1a991..d89e781b0a18 100644 --- a/drivers/thermal/step_wise.c +++ b/drivers/thermal/step_wise.c @@ -51,44 +51,51 @@ static unsigned long get_target_state(struct thermal_instance *instance, { struct thermal_cooling_device *cdev = instance->cdev; unsigned long cur_state; + unsigned long next_target; + /* + * We keep this instance the way it is by default. + * Otherwise, we use the current state of the + * cdev in use to determine the next_target. + */ cdev->ops->get_cur_state(cdev, &cur_state); + next_target = instance->target; switch (trend) { case THERMAL_TREND_RAISING: if (throttle) { - cur_state = cur_state < instance->upper ? + next_target = cur_state < instance->upper ? (cur_state + 1) : instance->upper; - if (cur_state < instance->lower) - cur_state = instance->lower; + if (next_target < instance->lower) + next_target = instance->lower; } break; case THERMAL_TREND_RAISE_FULL: if (throttle) - cur_state = instance->upper; + next_target = instance->upper; break; case THERMAL_TREND_DROPPING: if (cur_state == instance->lower) { if (!throttle) - cur_state = -1; + next_target = THERMAL_NO_TARGET; } else { - cur_state -= 1; - if (cur_state > instance->upper) - cur_state = instance->upper; + next_target = cur_state - 1; + if (next_target > instance->upper) + next_target = instance->upper; } break; case THERMAL_TREND_DROP_FULL: if (cur_state == instance->lower) { if (!throttle) - cur_state = -1; + next_target = THERMAL_NO_TARGET; } else - cur_state = instance->lower; + next_target = instance->lower; break; default: break; } - return cur_state; + return next_target; } static void update_passive_instance(struct thermal_zone_device *tz, @@ -133,6 +140,9 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) old_target = instance->target; instance->target = get_target_state(instance, trend, throttle); + if (old_target == instance->target) + continue; + /* Activate a passive thermal instance */ if (old_target == THERMAL_NO_TARGET && instance->target != THERMAL_NO_TARGET) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 1f02e8edb45c..4962a6aaf295 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -38,6 +38,7 @@ #include <net/genetlink.h> #include "thermal_core.h" +#include "thermal_hwmon.h" MODULE_AUTHOR("Zhang Rui"); MODULE_DESCRIPTION("Generic thermal management sysfs support"); @@ -201,14 +202,23 @@ static void print_bind_err_msg(struct thermal_zone_device *tz, } static void __bind(struct thermal_zone_device *tz, int mask, - struct thermal_cooling_device *cdev) + struct thermal_cooling_device *cdev, + unsigned long *limits) { int i, ret; for (i = 0; i < tz->trips; i++) { if (mask & (1 << i)) { + unsigned long upper, lower; + + upper = THERMAL_NO_LIMIT; + lower = THERMAL_NO_LIMIT; + if (limits) { + lower = limits[i * 2]; + upper = limits[i * 2 + 1]; + } ret = thermal_zone_bind_cooling_device(tz, i, cdev, - THERMAL_NO_LIMIT, THERMAL_NO_LIMIT); + upper, lower); if (ret) print_bind_err_msg(tz, cdev, ret); } @@ -253,7 +263,8 @@ static void bind_cdev(struct thermal_cooling_device *cdev) if (tzp->tbp[i].match(pos, cdev)) continue; tzp->tbp[i].cdev = cdev; - __bind(pos, tzp->tbp[i].trip_mask, cdev); + __bind(pos, tzp->tbp[i].trip_mask, cdev, + tzp->tbp[i].binding_limits); } } @@ -291,7 +302,8 @@ static void bind_tz(struct thermal_zone_device *tz) if (tzp->tbp[i].match(tz, pos)) continue; tzp->tbp[i].cdev = pos; - __bind(tz, tzp->tbp[i].trip_mask, pos); + __bind(tz, tzp->tbp[i].trip_mask, pos, + tzp->tbp[i].binding_limits); } } exit: @@ -859,260 +871,6 @@ thermal_cooling_device_trip_point_show(struct device *dev, /* Device management */ -#if defined(CONFIG_THERMAL_HWMON) - -/* hwmon sys I/F */ -#include <linux/hwmon.h> - -/* thermal zone devices with the same type share one hwmon device */ -struct thermal_hwmon_device { - char type[THERMAL_NAME_LENGTH]; - struct device *device; - int count; - struct list_head tz_list; - struct list_head node; -}; - -struct thermal_hwmon_attr { - struct device_attribute attr; - char name[16]; -}; - -/* one temperature input for each thermal zone */ -struct thermal_hwmon_temp { - struct list_head hwmon_node; - struct thermal_zone_device *tz; - struct thermal_hwmon_attr temp_input; /* hwmon sys attr */ - struct thermal_hwmon_attr temp_crit; /* hwmon sys attr */ -}; - -static LIST_HEAD(thermal_hwmon_list); - -static ssize_t -name_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct thermal_hwmon_device *hwmon = dev_get_drvdata(dev); - return sprintf(buf, "%s\n", hwmon->type); -} -static DEVICE_ATTR(name, 0444, name_show, NULL); - -static ssize_t -temp_input_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - long temperature; - int ret; - struct thermal_hwmon_attr *hwmon_attr - = container_of(attr, struct thermal_hwmon_attr, attr); - struct thermal_hwmon_temp *temp - = container_of(hwmon_attr, struct thermal_hwmon_temp, - temp_input); - struct thermal_zone_device *tz = temp->tz; - - ret = thermal_zone_get_temp(tz, &temperature); - - if (ret) - return ret; - - return sprintf(buf, "%ld\n", temperature); -} - -static ssize_t -temp_crit_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct thermal_hwmon_attr *hwmon_attr - = container_of(attr, struct thermal_hwmon_attr, attr); - struct thermal_hwmon_temp *temp - = container_of(hwmon_attr, struct thermal_hwmon_temp, - temp_crit); - struct thermal_zone_device *tz = temp->tz; - long temperature; - int ret; - - ret = tz->ops->get_trip_temp(tz, 0, &temperature); - if (ret) - return ret; - - return sprintf(buf, "%ld\n", temperature); -} - - -static struct thermal_hwmon_device * -thermal_hwmon_lookup_by_type(const struct thermal_zone_device *tz) -{ - struct thermal_hwmon_device *hwmon; - - mutex_lock(&thermal_list_lock); - list_for_each_entry(hwmon, &thermal_hwmon_list, node) - if (!strcmp(hwmon->type, tz->type)) { - mutex_unlock(&thermal_list_lock); - return hwmon; - } - mutex_unlock(&thermal_list_lock); - - return NULL; -} - -/* Find the temperature input matching a given thermal zone */ -static struct thermal_hwmon_temp * -thermal_hwmon_lookup_temp(const struct thermal_hwmon_device *hwmon, - const struct thermal_zone_device *tz) -{ - struct thermal_hwmon_temp *temp; - - mutex_lock(&thermal_list_lock); - list_for_each_entry(temp, &hwmon->tz_list, hwmon_node) - if (temp->tz == tz) { - mutex_unlock(&thermal_list_lock); - return temp; - } - mutex_unlock(&thermal_list_lock); - - return NULL; -} - -static int -thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) -{ - struct thermal_hwmon_device *hwmon; - struct thermal_hwmon_temp *temp; - int new_hwmon_device = 1; - int result; - - hwmon = thermal_hwmon_lookup_by_type(tz); - if (hwmon) { - new_hwmon_device = 0; - goto register_sys_interface; - } - - hwmon = kzalloc(sizeof(struct thermal_hwmon_device), GFP_KERNEL); - if (!hwmon) - return -ENOMEM; - - INIT_LIST_HEAD(&hwmon->tz_list); - strlcpy(hwmon->type, tz->type, THERMAL_NAME_LENGTH); - hwmon->device = hwmon_device_register(NULL); - if (IS_ERR(hwmon->device)) { - result = PTR_ERR(hwmon->device); - goto free_mem; - } - dev_set_drvdata(hwmon->device, hwmon); - result = device_create_file(hwmon->device, &dev_attr_name); - if (result) - goto free_mem; - - register_sys_interface: - temp = kzalloc(sizeof(struct thermal_hwmon_temp), GFP_KERNEL); - if (!temp) { - result = -ENOMEM; - goto unregister_name; - } - - temp->tz = tz; - hwmon->count++; - - snprintf(temp->temp_input.name, sizeof(temp->temp_input.name), - "temp%d_input", hwmon->count); - temp->temp_input.attr.attr.name = temp->temp_input.name; - temp->temp_input.attr.attr.mode = 0444; - temp->temp_input.attr.show = temp_input_show; - sysfs_attr_init(&temp->temp_input.attr.attr); - result = device_create_file(hwmon->device, &temp->temp_input.attr); - if (result) - goto free_temp_mem; - - if (tz->ops->get_crit_temp) { - unsigned long temperature; - if (!tz->ops->get_crit_temp(tz, &temperature)) { - snprintf(temp->temp_crit.name, - sizeof(temp->temp_crit.name), - "temp%d_crit", hwmon->count); - temp->temp_crit.attr.attr.name = temp->temp_crit.name; - temp->temp_crit.attr.attr.mode = 0444; - temp->temp_crit.attr.show = temp_crit_show; - sysfs_attr_init(&temp->temp_crit.attr.attr); - result = device_create_file(hwmon->device, - &temp->temp_crit.attr); - if (result) - goto unregister_input; - } - } - - mutex_lock(&thermal_list_lock); - if (new_hwmon_device) - list_add_tail(&hwmon->node, &thermal_hwmon_list); - list_add_tail(&temp->hwmon_node, &hwmon->tz_list); - mutex_unlock(&thermal_list_lock); - - return 0; - - unregister_input: - device_remove_file(hwmon->device, &temp->temp_input.attr); - free_temp_mem: - kfree(temp); - unregister_name: - if (new_hwmon_device) { - device_remove_file(hwmon->device, &dev_attr_name); - hwmon_device_unregister(hwmon->device); - } - free_mem: - if (new_hwmon_device) - kfree(hwmon); - - return result; -} - -static void -thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) -{ - struct thermal_hwmon_device *hwmon; - struct thermal_hwmon_temp *temp; - - hwmon = thermal_hwmon_lookup_by_type(tz); - if (unlikely(!hwmon)) { - /* Should never happen... */ - dev_dbg(&tz->device, "hwmon device lookup failed!\n"); - return; - } - - temp = thermal_hwmon_lookup_temp(hwmon, tz); - if (unlikely(!temp)) { - /* Should never happen... */ - dev_dbg(&tz->device, "temperature input lookup failed!\n"); - return; - } - - device_remove_file(hwmon->device, &temp->temp_input.attr); - if (tz->ops->get_crit_temp) - device_remove_file(hwmon->device, &temp->temp_crit.attr); - - mutex_lock(&thermal_list_lock); - list_del(&temp->hwmon_node); - kfree(temp); - if (!list_empty(&hwmon->tz_list)) { - mutex_unlock(&thermal_list_lock); - return; - } - list_del(&hwmon->node); - mutex_unlock(&thermal_list_lock); - - device_remove_file(hwmon->device, &dev_attr_name); - hwmon_device_unregister(hwmon->device); - kfree(hwmon); -} -#else -static int -thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) -{ - return 0; -} - -static void -thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) -{ -} -#endif - /** * thermal_zone_bind_cooling_device() - bind a cooling device to a thermal zone * @tz: pointer to struct thermal_zone_device @@ -1715,9 +1473,11 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, mutex_unlock(&thermal_governor_lock); - result = thermal_add_hwmon_sysfs(tz); - if (result) - goto unregister; + if (!tz->tzp || !tz->tzp->no_hwmon) { + result = thermal_add_hwmon_sysfs(tz); + if (result) + goto unregister; + } mutex_lock(&thermal_list_lock); list_add_tail(&tz->node, &thermal_tz_list); diff --git a/drivers/thermal/thermal_hwmon.c b/drivers/thermal/thermal_hwmon.c new file mode 100644 index 000000000000..eeef0e2498ca --- /dev/null +++ b/drivers/thermal/thermal_hwmon.c @@ -0,0 +1,269 @@ +/* + * thermal_hwmon.c - Generic Thermal Management hwmon support. + * + * Code based on Intel thermal_core.c. Copyrights of the original code: + * Copyright (C) 2008 Intel Corp + * Copyright (C) 2008 Zhang Rui <rui.zhang@intel.com> + * Copyright (C) 2008 Sujith Thomas <sujith.thomas@intel.com> + * + * Copyright (C) 2013 Texas Instruments + * Copyright (C) 2013 Eduardo Valentin <eduardo.valentin@ti.com> + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include <linux/hwmon.h> +#include <linux/thermal.h> +#include <linux/slab.h> +#include <linux/err.h> +#include "thermal_hwmon.h" + +/* hwmon sys I/F */ +/* thermal zone devices with the same type share one hwmon device */ +struct thermal_hwmon_device { + char type[THERMAL_NAME_LENGTH]; + struct device *device; + int count; + struct list_head tz_list; + struct list_head node; +}; + +struct thermal_hwmon_attr { + struct device_attribute attr; + char name[16]; +}; + +/* one temperature input for each thermal zone */ +struct thermal_hwmon_temp { + struct list_head hwmon_node; + struct thermal_zone_device *tz; + struct thermal_hwmon_attr temp_input; /* hwmon sys attr */ + struct thermal_hwmon_attr temp_crit; /* hwmon sys attr */ +}; + +static LIST_HEAD(thermal_hwmon_list); + +static DEFINE_MUTEX(thermal_hwmon_list_lock); + +static ssize_t +name_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct thermal_hwmon_device *hwmon = dev_get_drvdata(dev); + return sprintf(buf, "%s\n", hwmon->type); +} +static DEVICE_ATTR(name, 0444, name_show, NULL); + +static ssize_t +temp_input_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + long temperature; + int ret; + struct thermal_hwmon_attr *hwmon_attr + = container_of(attr, struct thermal_hwmon_attr, attr); + struct thermal_hwmon_temp *temp + = container_of(hwmon_attr, struct thermal_hwmon_temp, + temp_input); + struct thermal_zone_device *tz = temp->tz; + + ret = thermal_zone_get_temp(tz, &temperature); + + if (ret) + return ret; + + return sprintf(buf, "%ld\n", temperature); +} + +static ssize_t +temp_crit_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct thermal_hwmon_attr *hwmon_attr + = container_of(attr, struct thermal_hwmon_attr, attr); + struct thermal_hwmon_temp *temp + = container_of(hwmon_attr, struct thermal_hwmon_temp, + temp_crit); + struct thermal_zone_device *tz = temp->tz; + long temperature; + int ret; + + ret = tz->ops->get_trip_temp(tz, 0, &temperature); + if (ret) + return ret; + + return sprintf(buf, "%ld\n", temperature); +} + + +static struct thermal_hwmon_device * +thermal_hwmon_lookup_by_type(const struct thermal_zone_device *tz) +{ + struct thermal_hwmon_device *hwmon; + + mutex_lock(&thermal_hwmon_list_lock); + list_for_each_entry(hwmon, &thermal_hwmon_list, node) + if (!strcmp(hwmon->type, tz->type)) { + mutex_unlock(&thermal_hwmon_list_lock); + return hwmon; + } + mutex_unlock(&thermal_hwmon_list_lock); + + return NULL; +} + +/* Find the temperature input matching a given thermal zone */ +static struct thermal_hwmon_temp * +thermal_hwmon_lookup_temp(const struct thermal_hwmon_device *hwmon, + const struct thermal_zone_device *tz) +{ + struct thermal_hwmon_temp *temp; + + mutex_lock(&thermal_hwmon_list_lock); + list_for_each_entry(temp, &hwmon->tz_list, hwmon_node) + if (temp->tz == tz) { + mutex_unlock(&thermal_hwmon_list_lock); + return temp; + } + mutex_unlock(&thermal_hwmon_list_lock); + + return NULL; +} + +int thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) +{ + struct thermal_hwmon_device *hwmon; + struct thermal_hwmon_temp *temp; + int new_hwmon_device = 1; + int result; + + hwmon = thermal_hwmon_lookup_by_type(tz); + if (hwmon) { + new_hwmon_device = 0; + goto register_sys_interface; + } + + hwmon = kzalloc(sizeof(*hwmon), GFP_KERNEL); + if (!hwmon) + return -ENOMEM; + + INIT_LIST_HEAD(&hwmon->tz_list); + strlcpy(hwmon->type, tz->type, THERMAL_NAME_LENGTH); + hwmon->device = hwmon_device_register(&tz->device); + if (IS_ERR(hwmon->device)) { + result = PTR_ERR(hwmon->device); + goto free_mem; + } + dev_set_drvdata(hwmon->device, hwmon); + result = device_create_file(hwmon->device, &dev_attr_name); + if (result) + goto free_mem; + + register_sys_interface: + temp = kzalloc(sizeof(*temp), GFP_KERNEL); + if (!temp) { + result = -ENOMEM; + goto unregister_name; + } + + temp->tz = tz; + hwmon->count++; + + snprintf(temp->temp_input.name, sizeof(temp->temp_input.name), + "temp%d_input", hwmon->count); + temp->temp_input.attr.attr.name = temp->temp_input.name; + temp->temp_input.attr.attr.mode = 0444; + temp->temp_input.attr.show = temp_input_show; + sysfs_attr_init(&temp->temp_input.attr.attr); + result = device_create_file(hwmon->device, &temp->temp_input.attr); + if (result) + goto free_temp_mem; + + if (tz->ops->get_crit_temp) { + unsigned long temperature; + if (!tz->ops->get_crit_temp(tz, &temperature)) { + snprintf(temp->temp_crit.name, + sizeof(temp->temp_crit.name), + "temp%d_crit", hwmon->count); + temp->temp_crit.attr.attr.name = temp->temp_crit.name; + temp->temp_crit.attr.attr.mode = 0444; + temp->temp_crit.attr.show = temp_crit_show; + sysfs_attr_init(&temp->temp_crit.attr.attr); + result = device_create_file(hwmon->device, + &temp->temp_crit.attr); + if (result) + goto unregister_input; + } + } + + mutex_lock(&thermal_hwmon_list_lock); + if (new_hwmon_device) + list_add_tail(&hwmon->node, &thermal_hwmon_list); + list_add_tail(&temp->hwmon_node, &hwmon->tz_list); + mutex_unlock(&thermal_hwmon_list_lock); + + return 0; + + unregister_input: + device_remove_file(hwmon->device, &temp->temp_input.attr); + free_temp_mem: + kfree(temp); + unregister_name: + if (new_hwmon_device) { + device_remove_file(hwmon->device, &dev_attr_name); + hwmon_device_unregister(hwmon->device); + } + free_mem: + if (new_hwmon_device) + kfree(hwmon); + + return result; +} + +void thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) +{ + struct thermal_hwmon_device *hwmon; + struct thermal_hwmon_temp *temp; + + hwmon = thermal_hwmon_lookup_by_type(tz); + if (unlikely(!hwmon)) { + /* Should never happen... */ + dev_dbg(&tz->device, "hwmon device lookup failed!\n"); + return; + } + + temp = thermal_hwmon_lookup_temp(hwmon, tz); + if (unlikely(!temp)) { + /* Should never happen... */ + dev_dbg(&tz->device, "temperature input lookup failed!\n"); + return; + } + + device_remove_file(hwmon->device, &temp->temp_input.attr); + if (tz->ops->get_crit_temp) + device_remove_file(hwmon->device, &temp->temp_crit.attr); + + mutex_lock(&thermal_hwmon_list_lock); + list_del(&temp->hwmon_node); + kfree(temp); + if (!list_empty(&hwmon->tz_list)) { + mutex_unlock(&thermal_hwmon_list_lock); + return; + } + list_del(&hwmon->node); + mutex_unlock(&thermal_hwmon_list_lock); + + device_remove_file(hwmon->device, &dev_attr_name); + hwmon_device_unregister(hwmon->device); + kfree(hwmon); +} diff --git a/drivers/thermal/thermal_hwmon.h b/drivers/thermal/thermal_hwmon.h new file mode 100644 index 000000000000..c798fdb2ae43 --- /dev/null +++ b/drivers/thermal/thermal_hwmon.h @@ -0,0 +1,49 @@ +/* + * thermal_hwmon.h - Generic Thermal Management hwmon support. + * + * Code based on Intel thermal_core.c. Copyrights of the original code: + * Copyright (C) 2008 Intel Corp + * Copyright (C) 2008 Zhang Rui <rui.zhang@intel.com> + * Copyright (C) 2008 Sujith Thomas <sujith.thomas@intel.com> + * + * Copyright (C) 2013 Texas Instruments + * Copyright (C) 2013 Eduardo Valentin <eduardo.valentin@ti.com> + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#ifndef __THERMAL_HWMON_H__ +#define __THERMAL_HWMON_H__ + +#include <linux/thermal.h> + +#ifdef CONFIG_THERMAL_HWMON +int thermal_add_hwmon_sysfs(struct thermal_zone_device *tz); +void thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz); +#else +static int +thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) +{ + return 0; +} + +static void +thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) +{ +} +#endif + +#endif /* __THERMAL_HWMON_H__ */ diff --git a/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c b/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c index e5d8326a54d6..a4929272074f 100644 --- a/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c +++ b/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c @@ -42,6 +42,7 @@ dra752_core_temp_sensor_registers = { .mask_hot_mask = DRA752_BANDGAP_CTRL_1_MASK_HOT_CORE_MASK, .mask_cold_mask = DRA752_BANDGAP_CTRL_1_MASK_COLD_CORE_MASK, .mask_sidlemode_mask = DRA752_BANDGAP_CTRL_1_SIDLEMODE_MASK, + .mask_counter_delay_mask = DRA752_BANDGAP_CTRL_1_COUNTER_DELAY_MASK, .mask_freeze_mask = DRA752_BANDGAP_CTRL_1_FREEZE_CORE_MASK, .mask_clear_mask = DRA752_BANDGAP_CTRL_1_CLEAR_CORE_MASK, .mask_clear_accum_mask = DRA752_BANDGAP_CTRL_1_CLEAR_ACCUM_CORE_MASK, @@ -77,6 +78,7 @@ dra752_iva_temp_sensor_registers = { .mask_hot_mask = DRA752_BANDGAP_CTRL_2_MASK_HOT_IVA_MASK, .mask_cold_mask = DRA752_BANDGAP_CTRL_2_MASK_COLD_IVA_MASK, .mask_sidlemode_mask = DRA752_BANDGAP_CTRL_1_SIDLEMODE_MASK, + .mask_counter_delay_mask = DRA752_BANDGAP_CTRL_1_COUNTER_DELAY_MASK, .mask_freeze_mask = DRA752_BANDGAP_CTRL_2_FREEZE_IVA_MASK, .mask_clear_mask = DRA752_BANDGAP_CTRL_2_CLEAR_IVA_MASK, .mask_clear_accum_mask = DRA752_BANDGAP_CTRL_2_CLEAR_ACCUM_IVA_MASK, @@ -112,6 +114,7 @@ dra752_mpu_temp_sensor_registers = { .mask_hot_mask = DRA752_BANDGAP_CTRL_1_MASK_HOT_MPU_MASK, .mask_cold_mask = DRA752_BANDGAP_CTRL_1_MASK_COLD_MPU_MASK, .mask_sidlemode_mask = DRA752_BANDGAP_CTRL_1_SIDLEMODE_MASK, + .mask_counter_delay_mask = DRA752_BANDGAP_CTRL_1_COUNTER_DELAY_MASK, .mask_freeze_mask = DRA752_BANDGAP_CTRL_1_FREEZE_MPU_MASK, .mask_clear_mask = DRA752_BANDGAP_CTRL_1_CLEAR_MPU_MASK, .mask_clear_accum_mask = DRA752_BANDGAP_CTRL_1_CLEAR_ACCUM_MPU_MASK, @@ -147,6 +150,7 @@ dra752_dspeve_temp_sensor_registers = { .mask_hot_mask = DRA752_BANDGAP_CTRL_2_MASK_HOT_DSPEVE_MASK, .mask_cold_mask = DRA752_BANDGAP_CTRL_2_MASK_COLD_DSPEVE_MASK, .mask_sidlemode_mask = DRA752_BANDGAP_CTRL_1_SIDLEMODE_MASK, + .mask_counter_delay_mask = DRA752_BANDGAP_CTRL_1_COUNTER_DELAY_MASK, .mask_freeze_mask = DRA752_BANDGAP_CTRL_2_FREEZE_DSPEVE_MASK, .mask_clear_mask = DRA752_BANDGAP_CTRL_2_CLEAR_DSPEVE_MASK, .mask_clear_accum_mask = DRA752_BANDGAP_CTRL_2_CLEAR_ACCUM_DSPEVE_MASK, @@ -182,6 +186,7 @@ dra752_gpu_temp_sensor_registers = { .mask_hot_mask = DRA752_BANDGAP_CTRL_1_MASK_HOT_GPU_MASK, .mask_cold_mask = DRA752_BANDGAP_CTRL_1_MASK_COLD_GPU_MASK, .mask_sidlemode_mask = DRA752_BANDGAP_CTRL_1_SIDLEMODE_MASK, + .mask_counter_delay_mask = DRA752_BANDGAP_CTRL_1_COUNTER_DELAY_MASK, .mask_freeze_mask = DRA752_BANDGAP_CTRL_1_FREEZE_GPU_MASK, .mask_clear_mask = DRA752_BANDGAP_CTRL_1_CLEAR_GPU_MASK, .mask_clear_accum_mask = DRA752_BANDGAP_CTRL_1_CLEAR_ACCUM_GPU_MASK, diff --git a/drivers/thermal/ti-soc-thermal/ti-bandgap.c b/drivers/thermal/ti-soc-thermal/ti-bandgap.c index 9dfd47196e63..74c0e3474d6e 100644 --- a/drivers/thermal/ti-soc-thermal/ti-bandgap.c +++ b/drivers/thermal/ti-soc-thermal/ti-bandgap.c @@ -1020,9 +1020,13 @@ int ti_bandgap_get_trend(struct ti_bandgap *bgp, int id, int *trend) /* Fetch the update interval */ ret = ti_bandgap_read_update_interval(bgp, id, &interval); - if (ret || !interval) + if (ret) goto unfreeze; + /* Set the interval to 1 ms if bandgap counter delay is not set */ + if (interval == 0) + interval = 1; + *trend = (t1 - t2) / interval; dev_dbg(bgp->dev, "The temperatures are t1 = %d and t2 = %d and trend =%d\n", diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c index 4c5f55c37349..4f8b9af54a5a 100644 --- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c +++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c @@ -174,6 +174,9 @@ static int ti_thermal_set_mode(struct thermal_zone_device *thermal, enum thermal_device_mode mode) { struct ti_thermal_data *data = thermal->devdata; + struct ti_bandgap *bgp; + + bgp = data->bgp; if (!data->ti_thermal) { dev_notice(&thermal->device, "thermal zone not registered\n"); @@ -190,6 +193,8 @@ static int ti_thermal_set_mode(struct thermal_zone_device *thermal, mutex_unlock(&data->ti_thermal->lock); data->mode = mode; + ti_bandgap_write_update_interval(bgp, data->sensor_id, + data->ti_thermal->polling_delay); thermal_zone_device_update(data->ti_thermal); dev_dbg(&thermal->device, "thermal polling set for duration=%d msec\n", data->ti_thermal->polling_delay); @@ -313,6 +318,8 @@ int ti_thermal_expose_sensor(struct ti_bandgap *bgp, int id, } data->ti_thermal->polling_delay = FAST_TEMP_MONITORING_RATE; ti_bandgap_set_sensor_data(bgp, id, data); + ti_bandgap_write_update_interval(bgp, data->sensor_id, + data->ti_thermal->polling_delay); return 0; } diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index 47c6e7b9e150..febd45cd5027 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -5,7 +5,7 @@ if TTY menu "Serial drivers" - depends on HAS_IOMEM && GENERIC_HARDIRQS + depends on HAS_IOMEM source "drivers/tty/serial/8250/Kconfig" diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index a9355ce1c6d5..3a1a01af9a80 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -854,7 +854,8 @@ void disassociate_ctty(int on_exit) struct pid *tty_pgrp = tty_get_pgrp(tty); if (tty_pgrp) { kill_pgrp(tty_pgrp, SIGHUP, on_exit); - kill_pgrp(tty_pgrp, SIGCONT, on_exit); + if (!on_exit) + kill_pgrp(tty_pgrp, SIGCONT, on_exit); put_pid(tty_pgrp); } } diff --git a/drivers/usb/dwc3/Kconfig b/drivers/usb/dwc3/Kconfig index f969ea266acb..b870872e020f 100644 --- a/drivers/usb/dwc3/Kconfig +++ b/drivers/usb/dwc3/Kconfig @@ -1,6 +1,6 @@ config USB_DWC3 tristate "DesignWare USB3 DRD Core Support" - depends on (USB || USB_GADGET) && GENERIC_HARDIRQS && HAS_DMA + depends on (USB || USB_GADGET) && HAS_DMA depends on EXTCON select USB_XHCI_PLATFORM if USB_SUPPORT && USB_XHCI_HCD help diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 30e2dd8a1f2c..48cddf3cd6b8 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -313,7 +313,7 @@ config USB_S3C_HSUDC config USB_MV_UDC tristate "Marvell USB2.0 Device Controller" - depends on GENERIC_HARDIRQS && HAS_DMA + depends on HAS_DMA help Marvell Socs (including PXA and MMP series) include a high speed USB2.0 OTG controller, which can be configured as high speed or @@ -425,7 +425,7 @@ config USB_GOKU config USB_EG20T tristate "Intel EG20T PCH/LAPIS Semiconductor IOH(ML7213/ML7831) UDC" - depends on PCI && GENERIC_HARDIRQS + depends on PCI help This is a USB device driver for EG20T PCH. EG20T PCH is the platform controller hub that is used in Intel's diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c index 465ef8e2cc91..b94c049ab0d0 100644 --- a/drivers/usb/gadget/inode.c +++ b/drivers/usb/gadget/inode.c @@ -524,7 +524,7 @@ struct kiocb_priv { unsigned actual; }; -static int ep_aio_cancel(struct kiocb *iocb, struct io_event *e) +static int ep_aio_cancel(struct kiocb *iocb) { struct kiocb_priv *priv = iocb->private; struct ep_data *epdata; @@ -540,7 +540,6 @@ static int ep_aio_cancel(struct kiocb *iocb, struct io_event *e) // spin_unlock(&epdata->dev->lock); local_irq_enable(); - aio_put_req(iocb); return value; } @@ -709,11 +708,11 @@ ep_aio_read(struct kiocb *iocb, const struct iovec *iov, if (unlikely(usb_endpoint_dir_in(&epdata->desc))) return -EINVAL; - buf = kmalloc(iocb->ki_left, GFP_KERNEL); + buf = kmalloc(iocb->ki_nbytes, GFP_KERNEL); if (unlikely(!buf)) return -ENOMEM; - return ep_aio_rwtail(iocb, buf, iocb->ki_left, epdata, iov, nr_segs); + return ep_aio_rwtail(iocb, buf, iocb->ki_nbytes, epdata, iov, nr_segs); } static ssize_t @@ -728,7 +727,7 @@ ep_aio_write(struct kiocb *iocb, const struct iovec *iov, if (unlikely(!usb_endpoint_dir_in(&epdata->desc))) return -EINVAL; - buf = kmalloc(iocb->ki_left, GFP_KERNEL); + buf = kmalloc(iocb->ki_nbytes, GFP_KERNEL); if (unlikely(!buf)) return -ENOMEM; diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig index 5be0326aae38..b3f20d7f15de 100644 --- a/drivers/usb/host/Kconfig +++ b/drivers/usb/host/Kconfig @@ -278,7 +278,6 @@ endif # USB_EHCI_HCD config USB_OXU210HP_HCD tristate "OXU210HP HCD support" - depends on GENERIC_HARDIRQS ---help--- The OXU210HP is an USB host/OTG/device controller. Enable this option if your board has this chip. If unsure, say N. diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig index c64ee09a7c0e..c258a97ef1b0 100644 --- a/drivers/usb/musb/Kconfig +++ b/drivers/usb/musb/Kconfig @@ -71,7 +71,6 @@ config USB_MUSB_DA8XX config USB_MUSB_TUSB6010 tristate "TUSB6010" - depends on GENERIC_HARDIRQS config USB_MUSB_OMAP2PLUS tristate "OMAP2430 and onwards" diff --git a/drivers/usb/renesas_usbhs/Kconfig b/drivers/usb/renesas_usbhs/Kconfig index 019bf7e49ee6..1c4195abc108 100644 --- a/drivers/usb/renesas_usbhs/Kconfig +++ b/drivers/usb/renesas_usbhs/Kconfig @@ -4,7 +4,7 @@ config USB_RENESAS_USBHS tristate 'Renesas USBHS controller' - depends on USB_GADGET && GENERIC_HARDIRQS + depends on USB_GADGET default n help Renesas USBHS is a discrete USB host and peripheral controller chip diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 0c27c7df1b09..4b79a1f2f901 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1,12 +1,12 @@ /******************************************************************************* * Vhost kernel TCM fabric driver for virtio SCSI initiators * - * (C) Copyright 2010-2012 RisingTide Systems LLC. + * (C) Copyright 2010-2013 Datera, Inc. * (C) Copyright 2010-2012 IBM Corp. * * Licensed to the Linux Foundation under the General Public License (GPL) version 2. * - * Authors: Nicholas A. Bellinger <nab@risingtidesystems.com> + * Authors: Nicholas A. Bellinger <nab@daterainc.com> * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> * * This program is free software; you can redistribute it and/or modify @@ -48,12 +48,16 @@ #include <linux/virtio_scsi.h> #include <linux/llist.h> #include <linux/bitmap.h> +#include <linux/percpu_ida.h> #include "vhost.h" #define TCM_VHOST_VERSION "v0.1" #define TCM_VHOST_NAMELEN 256 #define TCM_VHOST_MAX_CDB_SIZE 32 +#define TCM_VHOST_DEFAULT_TAGS 256 +#define TCM_VHOST_PREALLOC_SGLS 2048 +#define TCM_VHOST_PREALLOC_PAGES 2048 struct vhost_scsi_inflight { /* Wait for the flush operation to finish */ @@ -79,6 +83,7 @@ struct tcm_vhost_cmd { u32 tvc_lun; /* Pointer to the SGL formatted memory from virtio-scsi */ struct scatterlist *tvc_sgl; + struct page **tvc_upages; /* Pointer to response */ struct virtio_scsi_cmd_resp __user *tvc_resp; /* Pointer to vhost_scsi for our device */ @@ -450,17 +455,16 @@ static void tcm_vhost_release_cmd(struct se_cmd *se_cmd) { struct tcm_vhost_cmd *tv_cmd = container_of(se_cmd, struct tcm_vhost_cmd, tvc_se_cmd); + struct se_session *se_sess = se_cmd->se_sess; if (tv_cmd->tvc_sgl_count) { u32 i; for (i = 0; i < tv_cmd->tvc_sgl_count; i++) put_page(sg_page(&tv_cmd->tvc_sgl[i])); - - kfree(tv_cmd->tvc_sgl); } tcm_vhost_put_inflight(tv_cmd->inflight); - kfree(tv_cmd); + percpu_ida_free(&se_sess->sess_tag_pool, se_cmd->map_tag); } static int tcm_vhost_shutdown_session(struct se_session *se_sess) @@ -704,7 +708,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) } static struct tcm_vhost_cmd * -vhost_scsi_allocate_cmd(struct vhost_virtqueue *vq, +vhost_scsi_get_tag(struct vhost_virtqueue *vq, struct tcm_vhost_tpg *tpg, struct virtio_scsi_cmd_req *v_req, u32 exp_data_len, @@ -712,18 +716,27 @@ vhost_scsi_allocate_cmd(struct vhost_virtqueue *vq, { struct tcm_vhost_cmd *cmd; struct tcm_vhost_nexus *tv_nexus; + struct se_session *se_sess; + struct scatterlist *sg; + struct page **pages; + int tag; tv_nexus = tpg->tpg_nexus; if (!tv_nexus) { pr_err("Unable to locate active struct tcm_vhost_nexus\n"); return ERR_PTR(-EIO); } + se_sess = tv_nexus->tvn_se_sess; - cmd = kzalloc(sizeof(struct tcm_vhost_cmd), GFP_ATOMIC); - if (!cmd) { - pr_err("Unable to allocate struct tcm_vhost_cmd\n"); - return ERR_PTR(-ENOMEM); - } + tag = percpu_ida_alloc(&se_sess->sess_tag_pool, GFP_KERNEL); + cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[tag]; + sg = cmd->tvc_sgl; + pages = cmd->tvc_upages; + memset(cmd, 0, sizeof(struct tcm_vhost_cmd)); + + cmd->tvc_sgl = sg; + cmd->tvc_upages = pages; + cmd->tvc_se_cmd.map_tag = tag; cmd->tvc_tag = v_req->tag; cmd->tvc_task_attr = v_req->task_attr; cmd->tvc_exp_data_len = exp_data_len; @@ -740,7 +753,8 @@ vhost_scsi_allocate_cmd(struct vhost_virtqueue *vq, * Returns the number of scatterlist entries used or -errno on error. */ static int -vhost_scsi_map_to_sgl(struct scatterlist *sgl, +vhost_scsi_map_to_sgl(struct tcm_vhost_cmd *tv_cmd, + struct scatterlist *sgl, unsigned int sgl_count, struct iovec *iov, int write) @@ -752,13 +766,25 @@ vhost_scsi_map_to_sgl(struct scatterlist *sgl, struct page **pages; int ret, i; + if (sgl_count > TCM_VHOST_PREALLOC_SGLS) { + pr_err("vhost_scsi_map_to_sgl() psgl_count: %u greater than" + " preallocated TCM_VHOST_PREALLOC_SGLS: %u\n", + sgl_count, TCM_VHOST_PREALLOC_SGLS); + return -ENOBUFS; + } + pages_nr = iov_num_pages(iov); if (pages_nr > sgl_count) return -ENOBUFS; - pages = kmalloc(pages_nr * sizeof(struct page *), GFP_KERNEL); - if (!pages) - return -ENOMEM; + if (pages_nr > TCM_VHOST_PREALLOC_PAGES) { + pr_err("vhost_scsi_map_to_sgl() pages_nr: %u greater than" + " preallocated TCM_VHOST_PREALLOC_PAGES: %u\n", + pages_nr, TCM_VHOST_PREALLOC_PAGES); + return -ENOBUFS; + } + + pages = tv_cmd->tvc_upages; ret = get_user_pages_fast((unsigned long)ptr, pages_nr, write, pages); /* No pages were pinned */ @@ -783,7 +809,6 @@ vhost_scsi_map_to_sgl(struct scatterlist *sgl, } out: - kfree(pages); return ret; } @@ -807,24 +832,20 @@ vhost_scsi_map_iov_to_sgl(struct tcm_vhost_cmd *cmd, /* TODO overflow checking */ - sg = kmalloc(sizeof(cmd->tvc_sgl[0]) * sgl_count, GFP_ATOMIC); - if (!sg) - return -ENOMEM; - pr_debug("%s sg %p sgl_count %u is_err %d\n", __func__, - sg, sgl_count, !sg); + sg = cmd->tvc_sgl; + pr_debug("%s sg %p sgl_count %u\n", __func__, sg, sgl_count); sg_init_table(sg, sgl_count); - cmd->tvc_sgl = sg; cmd->tvc_sgl_count = sgl_count; pr_debug("Mapping %u iovecs for %u pages\n", niov, sgl_count); for (i = 0; i < niov; i++) { - ret = vhost_scsi_map_to_sgl(sg, sgl_count, &iov[i], write); + ret = vhost_scsi_map_to_sgl(cmd, sg, sgl_count, &iov[i], + write); if (ret < 0) { for (i = 0; i < cmd->tvc_sgl_count; i++) put_page(sg_page(&cmd->tvc_sgl[i])); - kfree(cmd->tvc_sgl); - cmd->tvc_sgl = NULL; + cmd->tvc_sgl_count = 0; return ret; } @@ -989,10 +1010,10 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) for (i = 0; i < data_num; i++) exp_data_len += vq->iov[data_first + i].iov_len; - cmd = vhost_scsi_allocate_cmd(vq, tpg, &v_req, - exp_data_len, data_direction); + cmd = vhost_scsi_get_tag(vq, tpg, &v_req, + exp_data_len, data_direction); if (IS_ERR(cmd)) { - vq_err(vq, "vhost_scsi_allocate_cmd failed %ld\n", + vq_err(vq, "vhost_scsi_get_tag failed %ld\n", PTR_ERR(cmd)); goto err_cmd; } @@ -1654,11 +1675,31 @@ static void tcm_vhost_drop_nodeacl(struct se_node_acl *se_acl) kfree(nacl); } +static void tcm_vhost_free_cmd_map_res(struct tcm_vhost_nexus *nexus, + struct se_session *se_sess) +{ + struct tcm_vhost_cmd *tv_cmd; + unsigned int i; + + if (!se_sess->sess_cmd_map) + return; + + for (i = 0; i < TCM_VHOST_DEFAULT_TAGS; i++) { + tv_cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[i]; + + kfree(tv_cmd->tvc_sgl); + kfree(tv_cmd->tvc_upages); + } +} + static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tpg, const char *name) { struct se_portal_group *se_tpg; + struct se_session *se_sess; struct tcm_vhost_nexus *tv_nexus; + struct tcm_vhost_cmd *tv_cmd; + unsigned int i; mutex_lock(&tpg->tv_tpg_mutex); if (tpg->tpg_nexus) { @@ -1675,14 +1716,37 @@ static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tpg, return -ENOMEM; } /* - * Initialize the struct se_session pointer + * Initialize the struct se_session pointer and setup tagpool + * for struct tcm_vhost_cmd descriptors */ - tv_nexus->tvn_se_sess = transport_init_session(); + tv_nexus->tvn_se_sess = transport_init_session_tags( + TCM_VHOST_DEFAULT_TAGS, + sizeof(struct tcm_vhost_cmd)); if (IS_ERR(tv_nexus->tvn_se_sess)) { mutex_unlock(&tpg->tv_tpg_mutex); kfree(tv_nexus); return -ENOMEM; } + se_sess = tv_nexus->tvn_se_sess; + for (i = 0; i < TCM_VHOST_DEFAULT_TAGS; i++) { + tv_cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[i]; + + tv_cmd->tvc_sgl = kzalloc(sizeof(struct scatterlist) * + TCM_VHOST_PREALLOC_SGLS, GFP_KERNEL); + if (!tv_cmd->tvc_sgl) { + mutex_unlock(&tpg->tv_tpg_mutex); + pr_err("Unable to allocate tv_cmd->tvc_sgl\n"); + goto out; + } + + tv_cmd->tvc_upages = kzalloc(sizeof(struct page *) * + TCM_VHOST_PREALLOC_PAGES, GFP_KERNEL); + if (!tv_cmd->tvc_upages) { + mutex_unlock(&tpg->tv_tpg_mutex); + pr_err("Unable to allocate tv_cmd->tvc_upages\n"); + goto out; + } + } /* * Since we are running in 'demo mode' this call with generate a * struct se_node_acl for the tcm_vhost struct se_portal_group with @@ -1694,9 +1758,7 @@ static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tpg, mutex_unlock(&tpg->tv_tpg_mutex); pr_debug("core_tpg_check_initiator_node_acl() failed" " for %s\n", name); - transport_free_session(tv_nexus->tvn_se_sess); - kfree(tv_nexus); - return -ENOMEM; + goto out; } /* * Now register the TCM vhost virtual I_T Nexus as active with the @@ -1708,6 +1770,12 @@ static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tpg, mutex_unlock(&tpg->tv_tpg_mutex); return 0; + +out: + tcm_vhost_free_cmd_map_res(tv_nexus, se_sess); + transport_free_session(se_sess); + kfree(tv_nexus); + return -ENOMEM; } static int tcm_vhost_drop_nexus(struct tcm_vhost_tpg *tpg) @@ -1747,6 +1815,8 @@ static int tcm_vhost_drop_nexus(struct tcm_vhost_tpg *tpg) pr_debug("TCM_vhost_ConfigFS: Removing I_T Nexus to emulated" " %s Initiator Port: %s\n", tcm_vhost_dump_proto_id(tpg->tport), tv_nexus->tvn_se_sess->se_node_acl->initiatorname); + + tcm_vhost_free_cmd_map_res(tv_nexus, se_sess); /* * Release the SCSI I_T Nexus to the emulated vhost Target Port */ diff --git a/drivers/w1/masters/Kconfig b/drivers/w1/masters/Kconfig index 2bd1257dcc1c..efc7f075fcbe 100644 --- a/drivers/w1/masters/Kconfig +++ b/drivers/w1/masters/Kconfig @@ -42,7 +42,7 @@ config W1_MASTER_MXC config W1_MASTER_DS1WM tristate "Maxim DS1WM 1-wire busmaster" - depends on W1 && GENERIC_HARDIRQS + depends on W1 help Say Y here to enable the DS1WM 1-wire driver, such as that in HP iPAQ devices like h5xxx, h2200, and ASIC3-based like diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 362085d7ad8f..d1d53f301de7 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -290,6 +290,16 @@ config ORION_WATCHDOG To compile this driver as a module, choose M here: the module will be called orion_wdt. +config SUNXI_WATCHDOG + tristate "Allwinner SoCs watchdog support" + depends on ARCH_SUNXI + select WATCHDOG_CORE + help + Say Y here to include support for the watchdog timer + in Allwinner SoCs. + To compile this driver as a module, choose M here: the + module will be called sunxi_wdt. + config COH901327_WATCHDOG bool "ST-Ericsson COH 901 327 watchdog" depends on ARCH_U300 diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index 2f26a0b47ddc..6c5bb274d3cd 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -46,6 +46,7 @@ obj-$(CONFIG_PNX4008_WATCHDOG) += pnx4008_wdt.o obj-$(CONFIG_IOP_WATCHDOG) += iop_wdt.o obj-$(CONFIG_DAVINCI_WATCHDOG) += davinci_wdt.o obj-$(CONFIG_ORION_WATCHDOG) += orion_wdt.o +obj-$(CONFIG_SUNXI_WATCHDOG) += sunxi_wdt.o obj-$(CONFIG_COH901327_WATCHDOG) += coh901327_wdt.o obj-$(CONFIG_STMP3XXX_RTC_WATCHDOG) += stmp3xxx_rtc_wdt.o obj-$(CONFIG_NUC900_WATCHDOG) += nuc900_wdt.o diff --git a/drivers/watchdog/ar7_wdt.c b/drivers/watchdog/ar7_wdt.c index 2f3cc8fb471a..b3709f9cf5be 100644 --- a/drivers/watchdog/ar7_wdt.c +++ b/drivers/watchdog/ar7_wdt.c @@ -280,11 +280,6 @@ static int ar7_wdt_probe(struct platform_device *pdev) ar7_regs_wdt = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs"); - if (!ar7_regs_wdt) { - pr_err("could not get registers resource\n"); - return -ENODEV; - } - ar7_wdt = devm_ioremap_resource(&pdev->dev, ar7_regs_wdt); if (IS_ERR(ar7_wdt)) return PTR_ERR(ar7_wdt); diff --git a/drivers/watchdog/nuc900_wdt.c b/drivers/watchdog/nuc900_wdt.c index e2b6d2cf5c9d..b15b6efd91a1 100644 --- a/drivers/watchdog/nuc900_wdt.c +++ b/drivers/watchdog/nuc900_wdt.c @@ -256,11 +256,6 @@ static int nuc900wdt_probe(struct platform_device *pdev) spin_lock_init(&nuc900_wdt->wdt_lock); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (res == NULL) { - dev_err(&pdev->dev, "no memory resource specified\n"); - return -ENOENT; - } - nuc900_wdt->wdt_base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(nuc900_wdt->wdt_base)) return PTR_ERR(nuc900_wdt->wdt_base); diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c index 6a22cf5d35bd..23aad7c6bf5d 100644 --- a/drivers/watchdog/s3c2410_wdt.c +++ b/drivers/watchdog/s3c2410_wdt.c @@ -84,13 +84,17 @@ MODULE_PARM_DESC(soft_noboot, "Watchdog action, set to 1 to ignore reboots, " "0 to reboot (default 0)"); MODULE_PARM_DESC(debug, "Watchdog debug, set to >1 for debug (default 0)"); -static struct device *wdt_dev; /* platform device attached to */ -static struct resource *wdt_mem; -static struct resource *wdt_irq; -static struct clk *wdt_clock; -static void __iomem *wdt_base; -static unsigned int wdt_count; -static DEFINE_SPINLOCK(wdt_lock); +struct s3c2410_wdt { + struct device *dev; + struct clk *clock; + void __iomem *reg_base; + unsigned int count; + spinlock_t lock; + unsigned long wtcon_save; + unsigned long wtdat_save; + struct watchdog_device wdt_device; + struct notifier_block freq_transition; +}; /* watchdog control routines */ @@ -102,29 +106,38 @@ do { \ /* functions */ +static inline struct s3c2410_wdt *freq_to_wdt(struct notifier_block *nb) +{ + return container_of(nb, struct s3c2410_wdt, freq_transition); +} + static int s3c2410wdt_keepalive(struct watchdog_device *wdd) { - spin_lock(&wdt_lock); - writel(wdt_count, wdt_base + S3C2410_WTCNT); - spin_unlock(&wdt_lock); + struct s3c2410_wdt *wdt = watchdog_get_drvdata(wdd); + + spin_lock(&wdt->lock); + writel(wdt->count, wdt->reg_base + S3C2410_WTCNT); + spin_unlock(&wdt->lock); return 0; } -static void __s3c2410wdt_stop(void) +static void __s3c2410wdt_stop(struct s3c2410_wdt *wdt) { unsigned long wtcon; - wtcon = readl(wdt_base + S3C2410_WTCON); + wtcon = readl(wdt->reg_base + S3C2410_WTCON); wtcon &= ~(S3C2410_WTCON_ENABLE | S3C2410_WTCON_RSTEN); - writel(wtcon, wdt_base + S3C2410_WTCON); + writel(wtcon, wdt->reg_base + S3C2410_WTCON); } static int s3c2410wdt_stop(struct watchdog_device *wdd) { - spin_lock(&wdt_lock); - __s3c2410wdt_stop(); - spin_unlock(&wdt_lock); + struct s3c2410_wdt *wdt = watchdog_get_drvdata(wdd); + + spin_lock(&wdt->lock); + __s3c2410wdt_stop(wdt); + spin_unlock(&wdt->lock); return 0; } @@ -132,12 +145,13 @@ static int s3c2410wdt_stop(struct watchdog_device *wdd) static int s3c2410wdt_start(struct watchdog_device *wdd) { unsigned long wtcon; + struct s3c2410_wdt *wdt = watchdog_get_drvdata(wdd); - spin_lock(&wdt_lock); + spin_lock(&wdt->lock); - __s3c2410wdt_stop(); + __s3c2410wdt_stop(wdt); - wtcon = readl(wdt_base + S3C2410_WTCON); + wtcon = readl(wdt->reg_base + S3C2410_WTCON); wtcon |= S3C2410_WTCON_ENABLE | S3C2410_WTCON_DIV128; if (soft_noboot) { @@ -148,25 +162,26 @@ static int s3c2410wdt_start(struct watchdog_device *wdd) wtcon |= S3C2410_WTCON_RSTEN; } - DBG("%s: wdt_count=0x%08x, wtcon=%08lx\n", - __func__, wdt_count, wtcon); + DBG("%s: count=0x%08x, wtcon=%08lx\n", + __func__, wdt->count, wtcon); - writel(wdt_count, wdt_base + S3C2410_WTDAT); - writel(wdt_count, wdt_base + S3C2410_WTCNT); - writel(wtcon, wdt_base + S3C2410_WTCON); - spin_unlock(&wdt_lock); + writel(wdt->count, wdt->reg_base + S3C2410_WTDAT); + writel(wdt->count, wdt->reg_base + S3C2410_WTCNT); + writel(wtcon, wdt->reg_base + S3C2410_WTCON); + spin_unlock(&wdt->lock); return 0; } -static inline int s3c2410wdt_is_running(void) +static inline int s3c2410wdt_is_running(struct s3c2410_wdt *wdt) { - return readl(wdt_base + S3C2410_WTCON) & S3C2410_WTCON_ENABLE; + return readl(wdt->reg_base + S3C2410_WTCON) & S3C2410_WTCON_ENABLE; } static int s3c2410wdt_set_heartbeat(struct watchdog_device *wdd, unsigned timeout) { - unsigned long freq = clk_get_rate(wdt_clock); + struct s3c2410_wdt *wdt = watchdog_get_drvdata(wdd); + unsigned long freq = clk_get_rate(wdt->clock); unsigned int count; unsigned int divisor = 1; unsigned long wtcon; @@ -192,7 +207,7 @@ static int s3c2410wdt_set_heartbeat(struct watchdog_device *wdd, unsigned timeou } if ((count / divisor) >= 0x10000) { - dev_err(wdt_dev, "timeout %d too big\n", timeout); + dev_err(wdt->dev, "timeout %d too big\n", timeout); return -EINVAL; } } @@ -201,15 +216,15 @@ static int s3c2410wdt_set_heartbeat(struct watchdog_device *wdd, unsigned timeou __func__, timeout, divisor, count, count/divisor); count /= divisor; - wdt_count = count; + wdt->count = count; /* update the pre-scaler */ - wtcon = readl(wdt_base + S3C2410_WTCON); + wtcon = readl(wdt->reg_base + S3C2410_WTCON); wtcon &= ~S3C2410_WTCON_PRESCALE_MASK; wtcon |= S3C2410_WTCON_PRESCALE(divisor-1); - writel(count, wdt_base + S3C2410_WTDAT); - writel(wtcon, wdt_base + S3C2410_WTCON); + writel(count, wdt->reg_base + S3C2410_WTDAT); + writel(wtcon, wdt->reg_base + S3C2410_WTCON); wdd->timeout = (count * divisor) / freq; @@ -242,21 +257,23 @@ static struct watchdog_device s3c2410_wdd = { static irqreturn_t s3c2410wdt_irq(int irqno, void *param) { - dev_info(wdt_dev, "watchdog timer expired (irq)\n"); + struct s3c2410_wdt *wdt = platform_get_drvdata(param); + + dev_info(wdt->dev, "watchdog timer expired (irq)\n"); - s3c2410wdt_keepalive(&s3c2410_wdd); + s3c2410wdt_keepalive(&wdt->wdt_device); return IRQ_HANDLED; } - #ifdef CONFIG_CPU_FREQ static int s3c2410wdt_cpufreq_transition(struct notifier_block *nb, unsigned long val, void *data) { int ret; + struct s3c2410_wdt *wdt = freq_to_wdt(nb); - if (!s3c2410wdt_is_running()) + if (!s3c2410wdt_is_running(wdt)) goto done; if (val == CPUFREQ_PRECHANGE) { @@ -265,14 +282,15 @@ static int s3c2410wdt_cpufreq_transition(struct notifier_block *nb, * the watchdog is running. */ - s3c2410wdt_keepalive(&s3c2410_wdd); + s3c2410wdt_keepalive(&wdt->wdt_device); } else if (val == CPUFREQ_POSTCHANGE) { - s3c2410wdt_stop(&s3c2410_wdd); + s3c2410wdt_stop(&wdt->wdt_device); - ret = s3c2410wdt_set_heartbeat(&s3c2410_wdd, s3c2410_wdd.timeout); + ret = s3c2410wdt_set_heartbeat(&wdt->wdt_device, + wdt->wdt_device.timeout); if (ret >= 0) - s3c2410wdt_start(&s3c2410_wdd); + s3c2410wdt_start(&wdt->wdt_device); else goto err; } @@ -281,34 +299,35 @@ done: return 0; err: - dev_err(wdt_dev, "cannot set new value for timeout %d\n", - s3c2410_wdd.timeout); + dev_err(wdt->dev, "cannot set new value for timeout %d\n", + wdt->wdt_device.timeout); return ret; } -static struct notifier_block s3c2410wdt_cpufreq_transition_nb = { - .notifier_call = s3c2410wdt_cpufreq_transition, -}; - -static inline int s3c2410wdt_cpufreq_register(void) +static inline int s3c2410wdt_cpufreq_register(struct s3c2410_wdt *wdt) { - return cpufreq_register_notifier(&s3c2410wdt_cpufreq_transition_nb, + wdt->freq_transition.notifier_call = s3c2410wdt_cpufreq_transition; + + return cpufreq_register_notifier(&wdt->freq_transition, CPUFREQ_TRANSITION_NOTIFIER); } -static inline void s3c2410wdt_cpufreq_deregister(void) +static inline void s3c2410wdt_cpufreq_deregister(struct s3c2410_wdt *wdt) { - cpufreq_unregister_notifier(&s3c2410wdt_cpufreq_transition_nb, + wdt->freq_transition.notifier_call = s3c2410wdt_cpufreq_transition; + + cpufreq_unregister_notifier(&wdt->freq_transition, CPUFREQ_TRANSITION_NOTIFIER); } #else -static inline int s3c2410wdt_cpufreq_register(void) + +static inline int s3c2410wdt_cpufreq_register(struct s3c2410_wdt *wdt) { return 0; } -static inline void s3c2410wdt_cpufreq_deregister(void) +static inline void s3c2410wdt_cpufreq_deregister(struct s3c2410_wdt *wdt) { } #endif @@ -316,6 +335,9 @@ static inline void s3c2410wdt_cpufreq_deregister(void) static int s3c2410wdt_probe(struct platform_device *pdev) { struct device *dev; + struct s3c2410_wdt *wdt; + struct resource *wdt_mem; + struct resource *wdt_irq; unsigned int wtcon; int started = 0; int ret; @@ -323,13 +345,14 @@ static int s3c2410wdt_probe(struct platform_device *pdev) DBG("%s: probe=%p\n", __func__, pdev); dev = &pdev->dev; - wdt_dev = &pdev->dev; - wdt_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (wdt_mem == NULL) { - dev_err(dev, "no memory resource specified\n"); - return -ENOENT; - } + wdt = devm_kzalloc(dev, sizeof(*wdt), GFP_KERNEL); + if (!wdt) + return -ENOMEM; + + wdt->dev = &pdev->dev; + spin_lock_init(&wdt->lock); + wdt->wdt_device = s3c2410_wdd; wdt_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); if (wdt_irq == NULL) { @@ -339,35 +362,40 @@ static int s3c2410wdt_probe(struct platform_device *pdev) } /* get the memory region for the watchdog timer */ - wdt_base = devm_ioremap_resource(dev, wdt_mem); - if (IS_ERR(wdt_base)) { - ret = PTR_ERR(wdt_base); + wdt_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + wdt->reg_base = devm_ioremap_resource(dev, wdt_mem); + if (IS_ERR(wdt->reg_base)) { + ret = PTR_ERR(wdt->reg_base); goto err; } - DBG("probe: mapped wdt_base=%p\n", wdt_base); + DBG("probe: mapped reg_base=%p\n", wdt->reg_base); - wdt_clock = devm_clk_get(dev, "watchdog"); - if (IS_ERR(wdt_clock)) { + wdt->clock = devm_clk_get(dev, "watchdog"); + if (IS_ERR(wdt->clock)) { dev_err(dev, "failed to find watchdog clock source\n"); - ret = PTR_ERR(wdt_clock); + ret = PTR_ERR(wdt->clock); goto err; } - clk_prepare_enable(wdt_clock); + clk_prepare_enable(wdt->clock); - ret = s3c2410wdt_cpufreq_register(); + ret = s3c2410wdt_cpufreq_register(wdt); if (ret < 0) { dev_err(dev, "failed to register cpufreq\n"); goto err_clk; } + watchdog_set_drvdata(&wdt->wdt_device, wdt); + /* see if we can actually set the requested timer margin, and if * not, try the default value */ - watchdog_init_timeout(&s3c2410_wdd, tmr_margin, &pdev->dev); - if (s3c2410wdt_set_heartbeat(&s3c2410_wdd, s3c2410_wdd.timeout)) { - started = s3c2410wdt_set_heartbeat(&s3c2410_wdd, + watchdog_init_timeout(&wdt->wdt_device, tmr_margin, &pdev->dev); + ret = s3c2410wdt_set_heartbeat(&wdt->wdt_device, + wdt->wdt_device.timeout); + if (ret) { + started = s3c2410wdt_set_heartbeat(&wdt->wdt_device, CONFIG_S3C2410_WATCHDOG_DEFAULT_TIME); if (started == 0) @@ -386,9 +414,9 @@ static int s3c2410wdt_probe(struct platform_device *pdev) goto err_cpufreq; } - watchdog_set_nowayout(&s3c2410_wdd, nowayout); + watchdog_set_nowayout(&wdt->wdt_device, nowayout); - ret = watchdog_register_device(&s3c2410_wdd); + ret = watchdog_register_device(&wdt->wdt_device); if (ret) { dev_err(dev, "cannot register watchdog (%d)\n", ret); goto err_cpufreq; @@ -396,18 +424,20 @@ static int s3c2410wdt_probe(struct platform_device *pdev) if (tmr_atboot && started == 0) { dev_info(dev, "starting watchdog timer\n"); - s3c2410wdt_start(&s3c2410_wdd); + s3c2410wdt_start(&wdt->wdt_device); } else if (!tmr_atboot) { /* if we're not enabling the watchdog, then ensure it is * disabled if it has been left running from the bootloader * or other source */ - s3c2410wdt_stop(&s3c2410_wdd); + s3c2410wdt_stop(&wdt->wdt_device); } + platform_set_drvdata(pdev, wdt); + /* print out a statement of readiness */ - wtcon = readl(wdt_base + S3C2410_WTCON); + wtcon = readl(wdt->reg_base + S3C2410_WTCON); dev_info(dev, "watchdog %sactive, reset %sabled, irq %sabled\n", (wtcon & S3C2410_WTCON_ENABLE) ? "" : "in", @@ -417,64 +447,64 @@ static int s3c2410wdt_probe(struct platform_device *pdev) return 0; err_cpufreq: - s3c2410wdt_cpufreq_deregister(); + s3c2410wdt_cpufreq_deregister(wdt); err_clk: - clk_disable_unprepare(wdt_clock); - wdt_clock = NULL; + clk_disable_unprepare(wdt->clock); + wdt->clock = NULL; err: - wdt_irq = NULL; - wdt_mem = NULL; return ret; } static int s3c2410wdt_remove(struct platform_device *dev) { - watchdog_unregister_device(&s3c2410_wdd); + struct s3c2410_wdt *wdt = platform_get_drvdata(dev); - s3c2410wdt_cpufreq_deregister(); + watchdog_unregister_device(&wdt->wdt_device); - clk_disable_unprepare(wdt_clock); - wdt_clock = NULL; + s3c2410wdt_cpufreq_deregister(wdt); + + clk_disable_unprepare(wdt->clock); + wdt->clock = NULL; - wdt_irq = NULL; - wdt_mem = NULL; return 0; } static void s3c2410wdt_shutdown(struct platform_device *dev) { - s3c2410wdt_stop(&s3c2410_wdd); + struct s3c2410_wdt *wdt = platform_get_drvdata(dev); + + s3c2410wdt_stop(&wdt->wdt_device); } #ifdef CONFIG_PM_SLEEP -static unsigned long wtcon_save; -static unsigned long wtdat_save; - static int s3c2410wdt_suspend(struct device *dev) { + struct s3c2410_wdt *wdt = dev_get_drvdata(dev); + /* Save watchdog state, and turn it off. */ - wtcon_save = readl(wdt_base + S3C2410_WTCON); - wtdat_save = readl(wdt_base + S3C2410_WTDAT); + wdt->wtcon_save = readl(wdt->reg_base + S3C2410_WTCON); + wdt->wtdat_save = readl(wdt->reg_base + S3C2410_WTDAT); /* Note that WTCNT doesn't need to be saved. */ - s3c2410wdt_stop(&s3c2410_wdd); + s3c2410wdt_stop(&wdt->wdt_device); return 0; } static int s3c2410wdt_resume(struct device *dev) { - /* Restore watchdog state. */ + struct s3c2410_wdt *wdt = dev_get_drvdata(dev); - writel(wtdat_save, wdt_base + S3C2410_WTDAT); - writel(wtdat_save, wdt_base + S3C2410_WTCNT); /* Reset count */ - writel(wtcon_save, wdt_base + S3C2410_WTCON); + /* Restore watchdog state. */ + writel(wdt->wtdat_save, wdt->reg_base + S3C2410_WTDAT); + writel(wdt->wtdat_save, wdt->reg_base + S3C2410_WTCNT);/* Reset count */ + writel(wdt->wtcon_save, wdt->reg_base + S3C2410_WTCON); dev_info(dev, "watchdog %sabled\n", - (wtcon_save & S3C2410_WTCON_ENABLE) ? "en" : "dis"); + (wdt->wtcon_save & S3C2410_WTCON_ENABLE) ? "en" : "dis"); return 0; } diff --git a/drivers/watchdog/sunxi_wdt.c b/drivers/watchdog/sunxi_wdt.c new file mode 100644 index 000000000000..1f94b42764aa --- /dev/null +++ b/drivers/watchdog/sunxi_wdt.c @@ -0,0 +1,237 @@ +/* + * sunxi Watchdog Driver + * + * Copyright (c) 2013 Carlo Caione + * 2012 Henrik Nordstrom + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Based on xen_wdt.c + * (c) Copyright 2010 Novell, Inc. + */ + +#include <linux/clk.h> +#include <linux/err.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/types.h> +#include <linux/watchdog.h> + +#define WDT_MAX_TIMEOUT 16 +#define WDT_MIN_TIMEOUT 1 +#define WDT_MODE_TIMEOUT(n) ((n) << 3) +#define WDT_TIMEOUT_MASK WDT_MODE_TIMEOUT(0x0F) + +#define WDT_CTRL 0x00 +#define WDT_CTRL_RELOAD ((1 << 0) | (0x0a57 << 1)) + +#define WDT_MODE 0x04 +#define WDT_MODE_EN (1 << 0) +#define WDT_MODE_RST_EN (1 << 1) + +#define DRV_NAME "sunxi-wdt" +#define DRV_VERSION "1.0" + +static bool nowayout = WATCHDOG_NOWAYOUT; +static unsigned int timeout = WDT_MAX_TIMEOUT; + +struct sunxi_wdt_dev { + struct watchdog_device wdt_dev; + void __iomem *wdt_base; +}; + +/* + * wdt_timeout_map maps the watchdog timer interval value in seconds to + * the value of the register WDT_MODE bit 3:6 + * + * [timeout seconds] = register value + * + */ + +static const int wdt_timeout_map[] = { + [1] = 0b0001, /* 1s */ + [2] = 0b0010, /* 2s */ + [3] = 0b0011, /* 3s */ + [4] = 0b0100, /* 4s */ + [5] = 0b0101, /* 5s */ + [6] = 0b0110, /* 6s */ + [8] = 0b0111, /* 8s */ + [10] = 0b1000, /* 10s */ + [12] = 0b1001, /* 12s */ + [14] = 0b1010, /* 14s */ + [16] = 0b1011, /* 16s */ +}; + +static int sunxi_wdt_ping(struct watchdog_device *wdt_dev) +{ + struct sunxi_wdt_dev *sunxi_wdt = watchdog_get_drvdata(wdt_dev); + void __iomem *wdt_base = sunxi_wdt->wdt_base; + + iowrite32(WDT_CTRL_RELOAD, wdt_base + WDT_CTRL); + + return 0; +} + +static int sunxi_wdt_set_timeout(struct watchdog_device *wdt_dev, + unsigned int timeout) +{ + struct sunxi_wdt_dev *sunxi_wdt = watchdog_get_drvdata(wdt_dev); + void __iomem *wdt_base = sunxi_wdt->wdt_base; + u32 reg; + + if (wdt_timeout_map[timeout] == 0) + timeout++; + + sunxi_wdt->wdt_dev.timeout = timeout; + + reg = ioread32(wdt_base + WDT_MODE); + reg &= ~WDT_TIMEOUT_MASK; + reg |= WDT_MODE_TIMEOUT(wdt_timeout_map[timeout]); + iowrite32(reg, wdt_base + WDT_MODE); + + sunxi_wdt_ping(wdt_dev); + + return 0; +} + +static int sunxi_wdt_stop(struct watchdog_device *wdt_dev) +{ + struct sunxi_wdt_dev *sunxi_wdt = watchdog_get_drvdata(wdt_dev); + void __iomem *wdt_base = sunxi_wdt->wdt_base; + + iowrite32(0, wdt_base + WDT_MODE); + + return 0; +} + +static int sunxi_wdt_start(struct watchdog_device *wdt_dev) +{ + u32 reg; + struct sunxi_wdt_dev *sunxi_wdt = watchdog_get_drvdata(wdt_dev); + void __iomem *wdt_base = sunxi_wdt->wdt_base; + int ret; + + ret = sunxi_wdt_set_timeout(&sunxi_wdt->wdt_dev, + sunxi_wdt->wdt_dev.timeout); + if (ret < 0) + return ret; + + reg = ioread32(wdt_base + WDT_MODE); + reg |= (WDT_MODE_RST_EN | WDT_MODE_EN); + iowrite32(reg, wdt_base + WDT_MODE); + + return 0; +} + +static const struct watchdog_info sunxi_wdt_info = { + .identity = DRV_NAME, + .options = WDIOF_SETTIMEOUT | + WDIOF_KEEPALIVEPING | + WDIOF_MAGICCLOSE, +}; + +static const struct watchdog_ops sunxi_wdt_ops = { + .owner = THIS_MODULE, + .start = sunxi_wdt_start, + .stop = sunxi_wdt_stop, + .ping = sunxi_wdt_ping, + .set_timeout = sunxi_wdt_set_timeout, +}; + +static int __init sunxi_wdt_probe(struct platform_device *pdev) +{ + struct sunxi_wdt_dev *sunxi_wdt; + struct resource *res; + int err; + + sunxi_wdt = devm_kzalloc(&pdev->dev, sizeof(*sunxi_wdt), GFP_KERNEL); + if (!sunxi_wdt) + return -EINVAL; + + platform_set_drvdata(pdev, sunxi_wdt); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + sunxi_wdt->wdt_base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(sunxi_wdt->wdt_base)) + return PTR_ERR(sunxi_wdt->wdt_base); + + sunxi_wdt->wdt_dev.info = &sunxi_wdt_info; + sunxi_wdt->wdt_dev.ops = &sunxi_wdt_ops; + sunxi_wdt->wdt_dev.timeout = WDT_MAX_TIMEOUT; + sunxi_wdt->wdt_dev.max_timeout = WDT_MAX_TIMEOUT; + sunxi_wdt->wdt_dev.min_timeout = WDT_MIN_TIMEOUT; + sunxi_wdt->wdt_dev.parent = &pdev->dev; + + watchdog_init_timeout(&sunxi_wdt->wdt_dev, timeout, &pdev->dev); + watchdog_set_nowayout(&sunxi_wdt->wdt_dev, nowayout); + + watchdog_set_drvdata(&sunxi_wdt->wdt_dev, sunxi_wdt); + + sunxi_wdt_stop(&sunxi_wdt->wdt_dev); + + err = watchdog_register_device(&sunxi_wdt->wdt_dev); + if (unlikely(err)) + return err; + + dev_info(&pdev->dev, "Watchdog enabled (timeout=%d sec, nowayout=%d)", + sunxi_wdt->wdt_dev.timeout, nowayout); + + return 0; +} + +static int __exit sunxi_wdt_remove(struct platform_device *pdev) +{ + struct sunxi_wdt_dev *sunxi_wdt = platform_get_drvdata(pdev); + + watchdog_unregister_device(&sunxi_wdt->wdt_dev); + watchdog_set_drvdata(&sunxi_wdt->wdt_dev, NULL); + + return 0; +} + +static void sunxi_wdt_shutdown(struct platform_device *pdev) +{ + struct sunxi_wdt_dev *sunxi_wdt = platform_get_drvdata(pdev); + + sunxi_wdt_stop(&sunxi_wdt->wdt_dev); +} + +static const struct of_device_id sunxi_wdt_dt_ids[] = { + { .compatible = "allwinner,sun4i-wdt" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, sunxi_wdt_dt_ids); + +static struct platform_driver sunxi_wdt_driver = { + .probe = sunxi_wdt_probe, + .remove = sunxi_wdt_remove, + .shutdown = sunxi_wdt_shutdown, + .driver = { + .owner = THIS_MODULE, + .name = DRV_NAME, + .of_match_table = of_match_ptr(sunxi_wdt_dt_ids) + }, +}; + +module_platform_driver(sunxi_wdt_driver); + +module_param(timeout, uint, 0); +MODULE_PARM_DESC(timeout, "Watchdog heartbeat in seconds"); + +module_param(nowayout, bool, 0); +MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started " + "(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Carlo Caione <carlo.caione@gmail.com>"); +MODULE_AUTHOR("Henrik Nordstrom <henrik@henriknordstrom.net>"); +MODULE_DESCRIPTION("sunxi WatchDog Timer Driver"); +MODULE_VERSION(DRV_VERSION); diff --git a/drivers/watchdog/ts72xx_wdt.c b/drivers/watchdog/ts72xx_wdt.c index 4da59b4d73f0..42913f131dc2 100644 --- a/drivers/watchdog/ts72xx_wdt.c +++ b/drivers/watchdog/ts72xx_wdt.c @@ -403,21 +403,11 @@ static int ts72xx_wdt_probe(struct platform_device *pdev) } r1 = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!r1) { - dev_err(&pdev->dev, "failed to get memory resource\n"); - return -ENODEV; - } - wdt->control_reg = devm_ioremap_resource(&pdev->dev, r1); if (IS_ERR(wdt->control_reg)) return PTR_ERR(wdt->control_reg); r2 = platform_get_resource(pdev, IORESOURCE_MEM, 1); - if (!r2) { - dev_err(&pdev->dev, "failed to get memory resource\n"); - return -ENODEV; - } - wdt->feed_reg = devm_ioremap_resource(&pdev->dev, r2); if (IS_ERR(wdt->feed_reg)) return PTR_ERR(wdt->feed_reg); diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 3101cf6daf56..a50c6e3a7cc4 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -349,8 +349,6 @@ static enum bp_state increase_reservation(unsigned long nr_pages) BUG_ON(page == NULL); pfn = page_to_pfn(page); - BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) && - phys_to_machine_mapping_valid(pfn)); set_phys_to_machine(pfn, frame_list[i]); @@ -380,6 +378,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) enum bp_state state = BP_DONE; unsigned long pfn, i; struct page *page; + struct page *scratch_page; int ret; struct xen_memory_reservation reservation = { .address_bits = 0, @@ -399,6 +398,8 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); + scratch_page = get_balloon_scratch_page(); + for (i = 0; i < nr_pages; i++) { page = alloc_page(gfp); if (page == NULL) { @@ -416,7 +417,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) if (xen_pv_domain() && !PageHighMem(page)) { ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), - pfn_pte(page_to_pfn(__get_cpu_var(balloon_scratch_page)), + pfn_pte(page_to_pfn(scratch_page), PAGE_KERNEL_RO), 0); BUG_ON(ret); } @@ -432,14 +433,14 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) pfn = mfn_to_pfn(frame_list[i]); if (!xen_feature(XENFEAT_auto_translated_physmap)) { unsigned long p; - struct page *pg; - pg = __get_cpu_var(balloon_scratch_page); - p = page_to_pfn(pg); + p = page_to_pfn(scratch_page); __set_phys_to_machine(pfn, pfn_to_mfn(p)); } balloon_append(pfn_to_page(pfn)); } + put_balloon_scratch_page(); + set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 58e6cbce4156..08f2e1e9a7e6 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -603,10 +603,11 @@ static int v9fs_cache_register(void) if (ret < 0) return ret; #ifdef CONFIG_9P_FSCACHE - return fscache_register_netfs(&v9fs_cache_netfs); -#else - return ret; + ret = fscache_register_netfs(&v9fs_cache_netfs); + if (ret < 0) + v9fs_destroy_inode_cache(); #endif + return ret; } static void v9fs_cache_unregister(void) diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 53687bbf2296..a7c481402c46 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -267,14 +267,8 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, } /* Only creates */ - if (!(flags & O_CREAT)) + if (!(flags & O_CREAT) || dentry->d_inode) return finish_no_open(file, res); - else if (dentry->d_inode) { - if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) - return -EEXIST; - else - return finish_no_open(file, res); - } v9ses = v9fs_inode2v9ses(dir); diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 5f95d1ed9c6d..b9acadafa4a1 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -50,7 +50,7 @@ static void adfs_write_failed(struct address_space *mapping, loff_t to) struct inode *inode = mapping->host; if (to > inode->i_size) - truncate_pagecache(inode, to, inode->i_size); + truncate_pagecache(inode, inode->i_size); } static int adfs_write_begin(struct file *file, struct address_space *mapping, diff --git a/fs/affs/file.c b/fs/affs/file.c index 776e3935a758..8669b6ecddee 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -406,7 +406,7 @@ static void affs_write_failed(struct address_space *mapping, loff_t to) struct inode *inode = mapping->host; if (to > inode->i_size) { - truncate_pagecache(inode, to, inode->i_size); + truncate_pagecache(inode, inode->i_size); affs_truncate(inode); } } @@ -26,6 +26,7 @@ #include <linux/mm.h> #include <linux/mman.h> #include <linux/mmu_context.h> +#include <linux/percpu.h> #include <linux/slab.h> #include <linux/timer.h> #include <linux/aio.h> @@ -35,6 +36,10 @@ #include <linux/eventfd.h> #include <linux/blkdev.h> #include <linux/compat.h> +#include <linux/anon_inodes.h> +#include <linux/migrate.h> +#include <linux/ramfs.h> +#include <linux/percpu-refcount.h> #include <asm/kmap_types.h> #include <asm/uaccess.h> @@ -61,14 +66,29 @@ struct aio_ring { #define AIO_RING_PAGES 8 +struct kioctx_table { + struct rcu_head rcu; + unsigned nr; + struct kioctx *table[]; +}; + +struct kioctx_cpu { + unsigned reqs_available; +}; + struct kioctx { - atomic_t users; + struct percpu_ref users; atomic_t dead; - /* This needs improving */ unsigned long user_id; - struct hlist_node list; + struct __percpu kioctx_cpu *cpu; + + /* + * For percpu reqs_available, number of slots we move to/from global + * counter at a time: + */ + unsigned req_batch; /* * This is what userspace passed to io_setup(), it's not used for * anything but counting against the global max_reqs quota. @@ -88,10 +108,18 @@ struct kioctx { long nr_pages; struct rcu_head rcu_head; - struct work_struct rcu_work; + struct work_struct free_work; struct { - atomic_t reqs_active; + /* + * This counts the number of available slots in the ringbuffer, + * so we avoid overflowing it: it's decremented (if positive) + * when allocating a kiocb and incremented when the resulting + * io_event is pulled off the ringbuffer. + * + * We batch accesses to it with a percpu version. + */ + atomic_t reqs_available; } ____cacheline_aligned_in_smp; struct { @@ -110,6 +138,9 @@ struct kioctx { } ____cacheline_aligned_in_smp; struct page *internal_pages[AIO_RING_PAGES]; + struct file *aio_ring_file; + + unsigned id; }; /*------ sysctl variables----*/ @@ -138,15 +169,77 @@ __initcall(aio_setup); static void aio_free_ring(struct kioctx *ctx) { - long i; + int i; + struct file *aio_ring_file = ctx->aio_ring_file; - for (i = 0; i < ctx->nr_pages; i++) + for (i = 0; i < ctx->nr_pages; i++) { + pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i, + page_count(ctx->ring_pages[i])); put_page(ctx->ring_pages[i]); + } if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) kfree(ctx->ring_pages); + + if (aio_ring_file) { + truncate_setsize(aio_ring_file->f_inode, 0); + fput(aio_ring_file); + ctx->aio_ring_file = NULL; + } +} + +static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma) +{ + vma->vm_ops = &generic_file_vm_ops; + return 0; } +static const struct file_operations aio_ring_fops = { + .mmap = aio_ring_mmap, +}; + +static int aio_set_page_dirty(struct page *page) +{ + return 0; +} + +#if IS_ENABLED(CONFIG_MIGRATION) +static int aio_migratepage(struct address_space *mapping, struct page *new, + struct page *old, enum migrate_mode mode) +{ + struct kioctx *ctx = mapping->private_data; + unsigned long flags; + unsigned idx = old->index; + int rc; + + /* Writeback must be complete */ + BUG_ON(PageWriteback(old)); + put_page(old); + + rc = migrate_page_move_mapping(mapping, new, old, NULL, mode); + if (rc != MIGRATEPAGE_SUCCESS) { + get_page(old); + return rc; + } + + get_page(new); + + spin_lock_irqsave(&ctx->completion_lock, flags); + migrate_page_copy(new, old); + ctx->ring_pages[idx] = new; + spin_unlock_irqrestore(&ctx->completion_lock, flags); + + return rc; +} +#endif + +static const struct address_space_operations aio_ctx_aops = { + .set_page_dirty = aio_set_page_dirty, +#if IS_ENABLED(CONFIG_MIGRATION) + .migratepage = aio_migratepage, +#endif +}; + static int aio_setup_ring(struct kioctx *ctx) { struct aio_ring *ring; @@ -154,20 +247,45 @@ static int aio_setup_ring(struct kioctx *ctx) struct mm_struct *mm = current->mm; unsigned long size, populate; int nr_pages; + int i; + struct file *file; /* Compensate for the ring buffer's head/tail overlap entry */ nr_events += 2; /* 1 is required, 2 for good luck */ size = sizeof(struct aio_ring); size += sizeof(struct io_event) * nr_events; - nr_pages = (size + PAGE_SIZE-1) >> PAGE_SHIFT; + nr_pages = PFN_UP(size); if (nr_pages < 0) return -EINVAL; - nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) / sizeof(struct io_event); + file = anon_inode_getfile_private("[aio]", &aio_ring_fops, ctx, O_RDWR); + if (IS_ERR(file)) { + ctx->aio_ring_file = NULL; + return -EAGAIN; + } + + file->f_inode->i_mapping->a_ops = &aio_ctx_aops; + file->f_inode->i_mapping->private_data = ctx; + file->f_inode->i_size = PAGE_SIZE * (loff_t)nr_pages; + + for (i = 0; i < nr_pages; i++) { + struct page *page; + page = find_or_create_page(file->f_inode->i_mapping, + i, GFP_HIGHUSER | __GFP_ZERO); + if (!page) + break; + pr_debug("pid(%d) page[%d]->count=%d\n", + current->pid, i, page_count(page)); + SetPageUptodate(page); + SetPageDirty(page); + unlock_page(page); + } + ctx->aio_ring_file = file; + nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) + / sizeof(struct io_event); - ctx->nr_events = 0; ctx->ring_pages = ctx->internal_pages; if (nr_pages > AIO_RING_PAGES) { ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *), @@ -178,10 +296,11 @@ static int aio_setup_ring(struct kioctx *ctx) ctx->mmap_size = nr_pages * PAGE_SIZE; pr_debug("attempting mmap of %lu bytes\n", ctx->mmap_size); + down_write(&mm->mmap_sem); - ctx->mmap_base = do_mmap_pgoff(NULL, 0, ctx->mmap_size, - PROT_READ|PROT_WRITE, - MAP_ANONYMOUS|MAP_PRIVATE, 0, &populate); + ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, 0, &populate); if (IS_ERR((void *)ctx->mmap_base)) { up_write(&mm->mmap_sem); ctx->mmap_size = 0; @@ -190,23 +309,34 @@ static int aio_setup_ring(struct kioctx *ctx) } pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base); + + /* We must do this while still holding mmap_sem for write, as we + * need to be protected against userspace attempting to mremap() + * or munmap() the ring buffer. + */ ctx->nr_pages = get_user_pages(current, mm, ctx->mmap_base, nr_pages, 1, 0, ctx->ring_pages, NULL); + + /* Dropping the reference here is safe as the page cache will hold + * onto the pages for us. It is also required so that page migration + * can unmap the pages and get the right reference count. + */ + for (i = 0; i < ctx->nr_pages; i++) + put_page(ctx->ring_pages[i]); + up_write(&mm->mmap_sem); if (unlikely(ctx->nr_pages != nr_pages)) { aio_free_ring(ctx); return -EAGAIN; } - if (populate) - mm_populate(ctx->mmap_base, populate); ctx->user_id = ctx->mmap_base; ctx->nr_events = nr_events; /* trusted copy */ ring = kmap_atomic(ctx->ring_pages[0]); ring->nr = nr_events; /* user copy */ - ring->id = ctx->user_id; + ring->id = ~0U; ring->head = ring->tail = 0; ring->magic = AIO_RING_MAGIC; ring->compat_features = AIO_RING_COMPAT_FEATURES; @@ -238,11 +368,9 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) } EXPORT_SYMBOL(kiocb_set_cancel_fn); -static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb, - struct io_event *res) +static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb) { kiocb_cancel_fn *old, *cancel; - int ret = -EINVAL; /* * Don't want to set kiocb->ki_cancel = KIOCB_CANCELLED unless it @@ -252,28 +380,20 @@ static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb, cancel = ACCESS_ONCE(kiocb->ki_cancel); do { if (!cancel || cancel == KIOCB_CANCELLED) - return ret; + return -EINVAL; old = cancel; cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED); } while (cancel != old); - atomic_inc(&kiocb->ki_users); - spin_unlock_irq(&ctx->ctx_lock); - - memset(res, 0, sizeof(*res)); - res->obj = (u64)(unsigned long)kiocb->ki_obj.user; - res->data = kiocb->ki_user_data; - ret = cancel(kiocb, res); - - spin_lock_irq(&ctx->ctx_lock); - - return ret; + return cancel(kiocb); } static void free_ioctx_rcu(struct rcu_head *head) { struct kioctx *ctx = container_of(head, struct kioctx, rcu_head); + + free_percpu(ctx->cpu); kmem_cache_free(kioctx_cachep, ctx); } @@ -282,12 +402,13 @@ static void free_ioctx_rcu(struct rcu_head *head) * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted - * now it's safe to cancel any that need to be. */ -static void free_ioctx(struct kioctx *ctx) +static void free_ioctx(struct work_struct *work) { + struct kioctx *ctx = container_of(work, struct kioctx, free_work); struct aio_ring *ring; - struct io_event res; struct kiocb *req; - unsigned head, avail; + unsigned cpu, avail; + DEFINE_WAIT(wait); spin_lock_irq(&ctx->ctx_lock); @@ -296,28 +417,38 @@ static void free_ioctx(struct kioctx *ctx) struct kiocb, ki_list); list_del_init(&req->ki_list); - kiocb_cancel(ctx, req, &res); + kiocb_cancel(ctx, req); } spin_unlock_irq(&ctx->ctx_lock); - ring = kmap_atomic(ctx->ring_pages[0]); - head = ring->head; - kunmap_atomic(ring); + for_each_possible_cpu(cpu) { + struct kioctx_cpu *kcpu = per_cpu_ptr(ctx->cpu, cpu); - while (atomic_read(&ctx->reqs_active) > 0) { - wait_event(ctx->wait, - head != ctx->tail || - atomic_read(&ctx->reqs_active) <= 0); + atomic_add(kcpu->reqs_available, &ctx->reqs_available); + kcpu->reqs_available = 0; + } - avail = (head <= ctx->tail ? ctx->tail : ctx->nr_events) - head; + while (1) { + prepare_to_wait(&ctx->wait, &wait, TASK_UNINTERRUPTIBLE); - atomic_sub(avail, &ctx->reqs_active); - head += avail; - head %= ctx->nr_events; + ring = kmap_atomic(ctx->ring_pages[0]); + avail = (ring->head <= ring->tail) + ? ring->tail - ring->head + : ctx->nr_events - ring->head + ring->tail; + + atomic_add(avail, &ctx->reqs_available); + ring->head = ring->tail; + kunmap_atomic(ring); + + if (atomic_read(&ctx->reqs_available) >= ctx->nr_events - 1) + break; + + schedule(); } + finish_wait(&ctx->wait, &wait); - WARN_ON(atomic_read(&ctx->reqs_active) < 0); + WARN_ON(atomic_read(&ctx->reqs_available) > ctx->nr_events - 1); aio_free_ring(ctx); @@ -333,10 +464,68 @@ static void free_ioctx(struct kioctx *ctx) call_rcu(&ctx->rcu_head, free_ioctx_rcu); } -static void put_ioctx(struct kioctx *ctx) +static void free_ioctx_ref(struct percpu_ref *ref) { - if (unlikely(atomic_dec_and_test(&ctx->users))) - free_ioctx(ctx); + struct kioctx *ctx = container_of(ref, struct kioctx, users); + + INIT_WORK(&ctx->free_work, free_ioctx); + schedule_work(&ctx->free_work); +} + +static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) +{ + unsigned i, new_nr; + struct kioctx_table *table, *old; + struct aio_ring *ring; + + spin_lock(&mm->ioctx_lock); + rcu_read_lock(); + table = rcu_dereference(mm->ioctx_table); + + while (1) { + if (table) + for (i = 0; i < table->nr; i++) + if (!table->table[i]) { + ctx->id = i; + table->table[i] = ctx; + rcu_read_unlock(); + spin_unlock(&mm->ioctx_lock); + + ring = kmap_atomic(ctx->ring_pages[0]); + ring->id = ctx->id; + kunmap_atomic(ring); + return 0; + } + + new_nr = (table ? table->nr : 1) * 4; + + rcu_read_unlock(); + spin_unlock(&mm->ioctx_lock); + + table = kzalloc(sizeof(*table) + sizeof(struct kioctx *) * + new_nr, GFP_KERNEL); + if (!table) + return -ENOMEM; + + table->nr = new_nr; + + spin_lock(&mm->ioctx_lock); + rcu_read_lock(); + old = rcu_dereference(mm->ioctx_table); + + if (!old) { + rcu_assign_pointer(mm->ioctx_table, table); + } else if (table->nr > old->nr) { + memcpy(table->table, old->table, + old->nr * sizeof(struct kioctx *)); + + rcu_assign_pointer(mm->ioctx_table, table); + kfree_rcu(old, rcu); + } else { + kfree(table); + table = old; + } + } } /* ioctx_alloc @@ -348,6 +537,18 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) struct kioctx *ctx; int err = -ENOMEM; + /* + * We keep track of the number of available ringbuffer slots, to prevent + * overflow (reqs_available), and we also use percpu counters for this. + * + * So since up to half the slots might be on other cpu's percpu counters + * and unavailable, double nr_events so userspace sees what they + * expected: additionally, we move req_batch slots to/from percpu + * counters at a time, so make sure that isn't 0: + */ + nr_events = max(nr_events, num_possible_cpus() * 4); + nr_events *= 2; + /* Prevent overflows */ if ((nr_events > (0x10000000U / sizeof(struct io_event))) || (nr_events > (0x10000000U / sizeof(struct kiocb)))) { @@ -355,7 +556,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) return ERR_PTR(-EINVAL); } - if (!nr_events || (unsigned long)nr_events > aio_max_nr) + if (!nr_events || (unsigned long)nr_events > (aio_max_nr * 2UL)) return ERR_PTR(-EAGAIN); ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL); @@ -364,8 +565,9 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) ctx->max_reqs = nr_events; - atomic_set(&ctx->users, 2); - atomic_set(&ctx->dead, 0); + if (percpu_ref_init(&ctx->users, free_ioctx_ref)) + goto out_freectx; + spin_lock_init(&ctx->ctx_lock); spin_lock_init(&ctx->completion_lock); mutex_init(&ctx->ring_lock); @@ -373,12 +575,21 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) INIT_LIST_HEAD(&ctx->active_reqs); + ctx->cpu = alloc_percpu(struct kioctx_cpu); + if (!ctx->cpu) + goto out_freeref; + if (aio_setup_ring(ctx) < 0) - goto out_freectx; + goto out_freepcpu; + + atomic_set(&ctx->reqs_available, ctx->nr_events - 1); + ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4); + if (ctx->req_batch < 1) + ctx->req_batch = 1; /* limit the number of system wide aios */ spin_lock(&aio_nr_lock); - if (aio_nr + nr_events > aio_max_nr || + if (aio_nr + nr_events > (aio_max_nr * 2UL) || aio_nr + nr_events < aio_nr) { spin_unlock(&aio_nr_lock); goto out_cleanup; @@ -386,49 +597,54 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) aio_nr += ctx->max_reqs; spin_unlock(&aio_nr_lock); - /* now link into global list. */ - spin_lock(&mm->ioctx_lock); - hlist_add_head_rcu(&ctx->list, &mm->ioctx_list); - spin_unlock(&mm->ioctx_lock); + percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */ + + err = ioctx_add_table(ctx, mm); + if (err) + goto out_cleanup_put; pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", ctx, ctx->user_id, mm, ctx->nr_events); return ctx; +out_cleanup_put: + percpu_ref_put(&ctx->users); out_cleanup: err = -EAGAIN; aio_free_ring(ctx); +out_freepcpu: + free_percpu(ctx->cpu); +out_freeref: + free_percpu(ctx->users.pcpu_count); out_freectx: + if (ctx->aio_ring_file) + fput(ctx->aio_ring_file); kmem_cache_free(kioctx_cachep, ctx); pr_debug("error allocating ioctx %d\n", err); return ERR_PTR(err); } -static void kill_ioctx_work(struct work_struct *work) -{ - struct kioctx *ctx = container_of(work, struct kioctx, rcu_work); - - wake_up_all(&ctx->wait); - put_ioctx(ctx); -} - -static void kill_ioctx_rcu(struct rcu_head *head) -{ - struct kioctx *ctx = container_of(head, struct kioctx, rcu_head); - - INIT_WORK(&ctx->rcu_work, kill_ioctx_work); - schedule_work(&ctx->rcu_work); -} - /* kill_ioctx * Cancels all outstanding aio requests on an aio context. Used * when the processes owning a context have all exited to encourage * the rapid destruction of the kioctx. */ -static void kill_ioctx(struct kioctx *ctx) +static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx) { if (!atomic_xchg(&ctx->dead, 1)) { - hlist_del_rcu(&ctx->list); + struct kioctx_table *table; + + spin_lock(&mm->ioctx_lock); + rcu_read_lock(); + table = rcu_dereference(mm->ioctx_table); + + WARN_ON(ctx != table->table[ctx->id]); + table->table[ctx->id] = NULL; + rcu_read_unlock(); + spin_unlock(&mm->ioctx_lock); + + /* percpu_ref_kill() will do the necessary call_rcu() */ + wake_up_all(&ctx->wait); /* * It'd be more correct to do this in free_ioctx(), after all @@ -445,24 +661,23 @@ static void kill_ioctx(struct kioctx *ctx) if (ctx->mmap_size) vm_munmap(ctx->mmap_base, ctx->mmap_size); - /* Between hlist_del_rcu() and dropping the initial ref */ - call_rcu(&ctx->rcu_head, kill_ioctx_rcu); + percpu_ref_kill(&ctx->users); } } /* wait_on_sync_kiocb: * Waits on the given sync kiocb to complete. */ -ssize_t wait_on_sync_kiocb(struct kiocb *iocb) +ssize_t wait_on_sync_kiocb(struct kiocb *req) { - while (atomic_read(&iocb->ki_users)) { + while (!req->ki_ctx) { set_current_state(TASK_UNINTERRUPTIBLE); - if (!atomic_read(&iocb->ki_users)) + if (req->ki_ctx) break; io_schedule(); } __set_current_state(TASK_RUNNING); - return iocb->ki_user_data; + return req->ki_user_data; } EXPORT_SYMBOL(wait_on_sync_kiocb); @@ -476,16 +691,28 @@ EXPORT_SYMBOL(wait_on_sync_kiocb); */ void exit_aio(struct mm_struct *mm) { + struct kioctx_table *table; struct kioctx *ctx; - struct hlist_node *n; - - hlist_for_each_entry_safe(ctx, n, &mm->ioctx_list, list) { - if (1 != atomic_read(&ctx->users)) - printk(KERN_DEBUG - "exit_aio:ioctx still alive: %d %d %d\n", - atomic_read(&ctx->users), - atomic_read(&ctx->dead), - atomic_read(&ctx->reqs_active)); + unsigned i = 0; + + while (1) { + rcu_read_lock(); + table = rcu_dereference(mm->ioctx_table); + + do { + if (!table || i >= table->nr) { + rcu_read_unlock(); + rcu_assign_pointer(mm->ioctx_table, NULL); + if (table) + kfree(table); + return; + } + + ctx = table->table[i++]; + } while (!ctx); + + rcu_read_unlock(); + /* * We don't need to bother with munmap() here - * exit_mmap(mm) is coming and it'll unmap everything. @@ -496,40 +723,75 @@ void exit_aio(struct mm_struct *mm) */ ctx->mmap_size = 0; - kill_ioctx(ctx); + kill_ioctx(mm, ctx); + } +} + +static void put_reqs_available(struct kioctx *ctx, unsigned nr) +{ + struct kioctx_cpu *kcpu; + + preempt_disable(); + kcpu = this_cpu_ptr(ctx->cpu); + + kcpu->reqs_available += nr; + while (kcpu->reqs_available >= ctx->req_batch * 2) { + kcpu->reqs_available -= ctx->req_batch; + atomic_add(ctx->req_batch, &ctx->reqs_available); + } + + preempt_enable(); +} + +static bool get_reqs_available(struct kioctx *ctx) +{ + struct kioctx_cpu *kcpu; + bool ret = false; + + preempt_disable(); + kcpu = this_cpu_ptr(ctx->cpu); + + if (!kcpu->reqs_available) { + int old, avail = atomic_read(&ctx->reqs_available); + + do { + if (avail < ctx->req_batch) + goto out; + + old = avail; + avail = atomic_cmpxchg(&ctx->reqs_available, + avail, avail - ctx->req_batch); + } while (avail != old); + + kcpu->reqs_available += ctx->req_batch; } + + ret = true; + kcpu->reqs_available--; +out: + preempt_enable(); + return ret; } /* aio_get_req - * Allocate a slot for an aio request. Increments the ki_users count - * of the kioctx so that the kioctx stays around until all requests are - * complete. Returns NULL if no requests are free. - * - * Returns with kiocb->ki_users set to 2. The io submit code path holds - * an extra reference while submitting the i/o. - * This prevents races between the aio code path referencing the - * req (after submitting it) and aio_complete() freeing the req. + * Allocate a slot for an aio request. + * Returns NULL if no requests are free. */ static inline struct kiocb *aio_get_req(struct kioctx *ctx) { struct kiocb *req; - if (atomic_read(&ctx->reqs_active) >= ctx->nr_events) + if (!get_reqs_available(ctx)) return NULL; - if (atomic_inc_return(&ctx->reqs_active) > ctx->nr_events - 1) - goto out_put; - req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO); if (unlikely(!req)) goto out_put; - atomic_set(&req->ki_users, 2); req->ki_ctx = ctx; - return req; out_put: - atomic_dec(&ctx->reqs_active); + put_reqs_available(ctx, 1); return NULL; } @@ -539,35 +801,32 @@ static void kiocb_free(struct kiocb *req) fput(req->ki_filp); if (req->ki_eventfd != NULL) eventfd_ctx_put(req->ki_eventfd); - if (req->ki_dtor) - req->ki_dtor(req); - if (req->ki_iovec != &req->ki_inline_vec) - kfree(req->ki_iovec); kmem_cache_free(kiocb_cachep, req); } -void aio_put_req(struct kiocb *req) -{ - if (atomic_dec_and_test(&req->ki_users)) - kiocb_free(req); -} -EXPORT_SYMBOL(aio_put_req); - static struct kioctx *lookup_ioctx(unsigned long ctx_id) { + struct aio_ring __user *ring = (void __user *)ctx_id; struct mm_struct *mm = current->mm; struct kioctx *ctx, *ret = NULL; + struct kioctx_table *table; + unsigned id; + + if (get_user(id, &ring->id)) + return NULL; rcu_read_lock(); + table = rcu_dereference(mm->ioctx_table); - hlist_for_each_entry_rcu(ctx, &mm->ioctx_list, list) { - if (ctx->user_id == ctx_id) { - atomic_inc(&ctx->users); - ret = ctx; - break; - } - } + if (!table || id >= table->nr) + goto out; + ctx = table->table[id]; + if (ctx && ctx->user_id == ctx_id) { + percpu_ref_get(&ctx->users); + ret = ctx; + } +out: rcu_read_unlock(); return ret; } @@ -591,16 +850,16 @@ void aio_complete(struct kiocb *iocb, long res, long res2) * - the sync task helpfully left a reference to itself in the iocb */ if (is_sync_kiocb(iocb)) { - BUG_ON(atomic_read(&iocb->ki_users) != 1); iocb->ki_user_data = res; - atomic_set(&iocb->ki_users, 0); + smp_wmb(); + iocb->ki_ctx = ERR_PTR(-EXDEV); wake_up_process(iocb->ki_obj.tsk); return; } /* * Take rcu_read_lock() in case the kioctx is being destroyed, as we - * need to issue a wakeup after decrementing reqs_active. + * need to issue a wakeup after incrementing reqs_available. */ rcu_read_lock(); @@ -613,17 +872,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2) } /* - * cancelled requests don't get events, userland was given one - * when the event got cancelled. - */ - if (unlikely(xchg(&iocb->ki_cancel, - KIOCB_CANCELLED) == KIOCB_CANCELLED)) { - atomic_dec(&ctx->reqs_active); - /* Still need the wake_up in case free_ioctx is waiting */ - goto put_rq; - } - - /* * Add a completion event to the ring buffer. Must be done holding * ctx->completion_lock to prevent other code from messing with the tail * pointer since we might be called from irq context. @@ -675,9 +923,8 @@ void aio_complete(struct kiocb *iocb, long res, long res2) if (iocb->ki_eventfd != NULL) eventfd_signal(iocb->ki_eventfd, 1); -put_rq: /* everything turned out well, dispose of the aiocb. */ - aio_put_req(iocb); + kiocb_free(iocb); /* * We have to order our ring_info tail store above and test @@ -702,7 +949,7 @@ static long aio_read_events_ring(struct kioctx *ctx, struct io_event __user *event, long nr) { struct aio_ring *ring; - unsigned head, pos; + unsigned head, tail, pos; long ret = 0; int copy_ret; @@ -710,11 +957,12 @@ static long aio_read_events_ring(struct kioctx *ctx, ring = kmap_atomic(ctx->ring_pages[0]); head = ring->head; + tail = ring->tail; kunmap_atomic(ring); - pr_debug("h%u t%u m%u\n", head, ctx->tail, ctx->nr_events); + pr_debug("h%u t%u m%u\n", head, tail, ctx->nr_events); - if (head == ctx->tail) + if (head == tail) goto out; while (ret < nr) { @@ -722,8 +970,8 @@ static long aio_read_events_ring(struct kioctx *ctx, struct io_event *ev; struct page *page; - avail = (head <= ctx->tail ? ctx->tail : ctx->nr_events) - head; - if (head == ctx->tail) + avail = (head <= tail ? tail : ctx->nr_events) - head; + if (head == tail) break; avail = min(avail, nr - ret); @@ -754,9 +1002,9 @@ static long aio_read_events_ring(struct kioctx *ctx, kunmap_atomic(ring); flush_dcache_page(ctx->ring_pages[0]); - pr_debug("%li h%u t%u\n", ret, head, ctx->tail); + pr_debug("%li h%u t%u\n", ret, head, tail); - atomic_sub(ret, &ctx->reqs_active); + put_reqs_available(ctx, ret); out: mutex_unlock(&ctx->ring_lock); @@ -854,8 +1102,8 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) if (!IS_ERR(ioctx)) { ret = put_user(ioctx->user_id, ctxp); if (ret) - kill_ioctx(ioctx); - put_ioctx(ioctx); + kill_ioctx(current->mm, ioctx); + percpu_ref_put(&ioctx->users); } out: @@ -872,101 +1120,37 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) { struct kioctx *ioctx = lookup_ioctx(ctx); if (likely(NULL != ioctx)) { - kill_ioctx(ioctx); - put_ioctx(ioctx); + kill_ioctx(current->mm, ioctx); + percpu_ref_put(&ioctx->users); return 0; } pr_debug("EINVAL: io_destroy: invalid context id\n"); return -EINVAL; } -static void aio_advance_iovec(struct kiocb *iocb, ssize_t ret) -{ - struct iovec *iov = &iocb->ki_iovec[iocb->ki_cur_seg]; - - BUG_ON(ret <= 0); - - while (iocb->ki_cur_seg < iocb->ki_nr_segs && ret > 0) { - ssize_t this = min((ssize_t)iov->iov_len, ret); - iov->iov_base += this; - iov->iov_len -= this; - iocb->ki_left -= this; - ret -= this; - if (iov->iov_len == 0) { - iocb->ki_cur_seg++; - iov++; - } - } - - /* the caller should not have done more io than what fit in - * the remaining iovecs */ - BUG_ON(ret > 0 && iocb->ki_left == 0); -} - typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *, unsigned long, loff_t); -static ssize_t aio_rw_vect_retry(struct kiocb *iocb, int rw, aio_rw_op *rw_op) -{ - struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; - ssize_t ret = 0; - - /* This matches the pread()/pwrite() logic */ - if (iocb->ki_pos < 0) - return -EINVAL; - - if (rw == WRITE) - file_start_write(file); - do { - ret = rw_op(iocb, &iocb->ki_iovec[iocb->ki_cur_seg], - iocb->ki_nr_segs - iocb->ki_cur_seg, - iocb->ki_pos); - if (ret > 0) - aio_advance_iovec(iocb, ret); - - /* retry all partial writes. retry partial reads as long as its a - * regular file. */ - } while (ret > 0 && iocb->ki_left > 0 && - (rw == WRITE || - (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode)))); - if (rw == WRITE) - file_end_write(file); - - /* This means we must have transferred all that we could */ - /* No need to retry anymore */ - if ((ret == 0) || (iocb->ki_left == 0)) - ret = iocb->ki_nbytes - iocb->ki_left; - - /* If we managed to write some out we return that, rather than - * the eventual error. */ - if (rw == WRITE - && ret < 0 && ret != -EIOCBQUEUED - && iocb->ki_nbytes - iocb->ki_left) - ret = iocb->ki_nbytes - iocb->ki_left; - - return ret; -} - -static ssize_t aio_setup_vectored_rw(int rw, struct kiocb *kiocb, bool compat) +static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb, + int rw, char __user *buf, + unsigned long *nr_segs, + struct iovec **iovec, + bool compat) { ssize_t ret; - kiocb->ki_nr_segs = kiocb->ki_nbytes; + *nr_segs = kiocb->ki_nbytes; #ifdef CONFIG_COMPAT if (compat) ret = compat_rw_copy_check_uvector(rw, - (struct compat_iovec __user *)kiocb->ki_buf, - kiocb->ki_nr_segs, 1, &kiocb->ki_inline_vec, - &kiocb->ki_iovec); + (struct compat_iovec __user *)buf, + *nr_segs, 1, *iovec, iovec); else #endif ret = rw_copy_check_uvector(rw, - (struct iovec __user *)kiocb->ki_buf, - kiocb->ki_nr_segs, 1, &kiocb->ki_inline_vec, - &kiocb->ki_iovec); + (struct iovec __user *)buf, + *nr_segs, 1, *iovec, iovec); if (ret < 0) return ret; @@ -975,15 +1159,17 @@ static ssize_t aio_setup_vectored_rw(int rw, struct kiocb *kiocb, bool compat) return 0; } -static ssize_t aio_setup_single_vector(int rw, struct kiocb *kiocb) +static ssize_t aio_setup_single_vector(struct kiocb *kiocb, + int rw, char __user *buf, + unsigned long *nr_segs, + struct iovec *iovec) { - if (unlikely(!access_ok(!rw, kiocb->ki_buf, kiocb->ki_nbytes))) + if (unlikely(!access_ok(!rw, buf, kiocb->ki_nbytes))) return -EFAULT; - kiocb->ki_iovec = &kiocb->ki_inline_vec; - kiocb->ki_iovec->iov_base = kiocb->ki_buf; - kiocb->ki_iovec->iov_len = kiocb->ki_nbytes; - kiocb->ki_nr_segs = 1; + iovec->iov_base = buf; + iovec->iov_len = kiocb->ki_nbytes; + *nr_segs = 1; return 0; } @@ -992,15 +1178,18 @@ static ssize_t aio_setup_single_vector(int rw, struct kiocb *kiocb) * Performs the initial checks and aio retry method * setup for the kiocb at the time of io submission. */ -static ssize_t aio_run_iocb(struct kiocb *req, bool compat) +static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, + char __user *buf, bool compat) { struct file *file = req->ki_filp; ssize_t ret; + unsigned long nr_segs; int rw; fmode_t mode; aio_rw_op *rw_op; + struct iovec inline_vec, *iovec = &inline_vec; - switch (req->ki_opcode) { + switch (opcode) { case IOCB_CMD_PREAD: case IOCB_CMD_PREADV: mode = FMODE_READ; @@ -1021,21 +1210,38 @@ rw_common: if (!rw_op) return -EINVAL; - ret = (req->ki_opcode == IOCB_CMD_PREADV || - req->ki_opcode == IOCB_CMD_PWRITEV) - ? aio_setup_vectored_rw(rw, req, compat) - : aio_setup_single_vector(rw, req); + ret = (opcode == IOCB_CMD_PREADV || + opcode == IOCB_CMD_PWRITEV) + ? aio_setup_vectored_rw(req, rw, buf, &nr_segs, + &iovec, compat) + : aio_setup_single_vector(req, rw, buf, &nr_segs, + iovec); if (ret) return ret; ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes); - if (ret < 0) + if (ret < 0) { + if (iovec != &inline_vec) + kfree(iovec); return ret; + } req->ki_nbytes = ret; - req->ki_left = ret; - ret = aio_rw_vect_retry(req, rw, rw_op); + /* XXX: move/kill - rw_verify_area()? */ + /* This matches the pread()/pwrite() logic */ + if (req->ki_pos < 0) { + ret = -EINVAL; + break; + } + + if (rw == WRITE) + file_start_write(file); + + ret = rw_op(req, iovec, nr_segs, req->ki_pos); + + if (rw == WRITE) + file_end_write(file); break; case IOCB_CMD_FDSYNC: @@ -1057,6 +1263,9 @@ rw_common: return -EINVAL; } + if (iovec != &inline_vec) + kfree(iovec); + if (ret != -EIOCBQUEUED) { /* * There's no easy way to restart the syscall since other AIO's @@ -1128,21 +1337,18 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, req->ki_obj.user = user_iocb; req->ki_user_data = iocb->aio_data; req->ki_pos = iocb->aio_offset; + req->ki_nbytes = iocb->aio_nbytes; - req->ki_buf = (char __user *)(unsigned long)iocb->aio_buf; - req->ki_left = req->ki_nbytes = iocb->aio_nbytes; - req->ki_opcode = iocb->aio_lio_opcode; - - ret = aio_run_iocb(req, compat); + ret = aio_run_iocb(req, iocb->aio_lio_opcode, + (char __user *)(unsigned long)iocb->aio_buf, + compat); if (ret) goto out_put_req; - aio_put_req(req); /* drop extra ref to req */ return 0; out_put_req: - atomic_dec(&ctx->reqs_active); - aio_put_req(req); /* drop extra ref to req */ - aio_put_req(req); /* drop i/o ref to req */ + put_reqs_available(ctx, 1); + kiocb_free(req); return ret; } @@ -1195,7 +1401,7 @@ long do_io_submit(aio_context_t ctx_id, long nr, } blk_finish_plug(&plug); - put_ioctx(ctx); + percpu_ref_put(&ctx->users); return i ? i : ret; } @@ -1252,7 +1458,6 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, struct io_event __user *, result) { - struct io_event res; struct kioctx *ctx; struct kiocb *kiocb; u32 key; @@ -1270,21 +1475,22 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, kiocb = lookup_kiocb(ctx, iocb, key); if (kiocb) - ret = kiocb_cancel(ctx, kiocb, &res); + ret = kiocb_cancel(ctx, kiocb); else ret = -EINVAL; spin_unlock_irq(&ctx->ctx_lock); if (!ret) { - /* Cancellation succeeded -- copy the result - * into the user's buffer. + /* + * The result argument is no longer used - the io_event is + * always delivered via the ring buffer. -EINPROGRESS indicates + * cancellation is progress: */ - if (copy_to_user(result, &res, sizeof(res))) - ret = -EFAULT; + ret = -EINPROGRESS; } - put_ioctx(ctx); + percpu_ref_put(&ctx->users); return ret; } @@ -1313,7 +1519,7 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id, if (likely(ioctx)) { if (likely(min_nr <= nr && min_nr >= 0)) ret = read_events(ioctx, min_nr, nr, events, timeout); - put_ioctx(ioctx); + percpu_ref_put(&ioctx->users); } return ret; } diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 47a65df8c871..85c961849953 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -109,6 +109,72 @@ static struct file_system_type anon_inode_fs_type = { }; /** + * anon_inode_getfile_private - creates a new file instance by hooking it up to an + * anonymous inode, and a dentry that describe the "class" + * of the file + * + * @name: [in] name of the "class" of the new file + * @fops: [in] file operations for the new file + * @priv: [in] private data for the new file (will be file's private_data) + * @flags: [in] flags + * + * + * Similar to anon_inode_getfile, but each file holds a single inode. + * + */ +struct file *anon_inode_getfile_private(const char *name, + const struct file_operations *fops, + void *priv, int flags) +{ + struct qstr this; + struct path path; + struct file *file; + struct inode *inode; + + if (fops->owner && !try_module_get(fops->owner)) + return ERR_PTR(-ENOENT); + + inode = anon_inode_mkinode(anon_inode_mnt->mnt_sb); + if (IS_ERR(inode)) { + file = ERR_PTR(-ENOMEM); + goto err_module; + } + + /* + * Link the inode to a directory entry by creating a unique name + * using the inode sequence number. + */ + file = ERR_PTR(-ENOMEM); + this.name = name; + this.len = strlen(name); + this.hash = 0; + path.dentry = d_alloc_pseudo(anon_inode_mnt->mnt_sb, &this); + if (!path.dentry) + goto err_module; + + path.mnt = mntget(anon_inode_mnt); + + d_instantiate(path.dentry, inode); + + file = alloc_file(&path, OPEN_FMODE(flags), fops); + if (IS_ERR(file)) + goto err_dput; + + file->f_mapping = inode->i_mapping; + file->f_flags = flags & (O_ACCMODE | O_NONBLOCK); + file->private_data = priv; + + return file; + +err_dput: + path_put(&path); +err_module: + module_put(fops->owner); + return file; +} +EXPORT_SYMBOL_GPL(anon_inode_getfile_private); + +/** * anon_inode_getfile - creates a new file instance by hooking it up to an * anonymous inode, and a dentry that describe the "class" * of the file diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 3db70dae40d3..689e40d983ad 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -109,13 +109,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, pkt.hdr.proto_version = sbi->version; pkt.hdr.type = type; - mutex_lock(&sbi->wq_mutex); - /* Check if we have become catatonic */ - if (sbi->catatonic) { - mutex_unlock(&sbi->wq_mutex); - return; - } switch (type) { /* Kernel protocol v4 missing and expire packets */ case autofs_ptype_missing: @@ -427,7 +421,6 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, wq->tgid = current->tgid; wq->status = -EINTR; /* Status return if interrupted */ wq->wait_ctr = 2; - mutex_unlock(&sbi->wq_mutex); if (sbi->version < 5) { if (notify == NFY_MOUNT) @@ -449,15 +442,15 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, (unsigned long) wq->wait_queue_token, wq->name.len, wq->name.name, notify); - /* autofs4_notify_daemon() may block */ + /* autofs4_notify_daemon() may block; it will unlock ->wq_mutex */ autofs4_notify_daemon(sbi, wq, type); } else { wq->wait_ctr++; - mutex_unlock(&sbi->wq_mutex); - kfree(qstr.name); DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d", (unsigned long) wq->wait_queue_token, wq->name.len, wq->name.name, notify); + mutex_unlock(&sbi->wq_mutex); + kfree(qstr.name); } /* diff --git a/fs/bfs/file.c b/fs/bfs/file.c index ad3ea1497cc3..ae2892218335 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c @@ -166,7 +166,7 @@ static void bfs_write_failed(struct address_space *mapping, loff_t to) struct inode *inode = mapping->host; if (to > inode->i_size) - truncate_pagecache(inode, to, inode->i_size); + truncate_pagecache(inode, inode->i_size); } static int bfs_write_begin(struct file *file, struct address_space *mapping, diff --git a/fs/block_dev.c b/fs/block_dev.c index 1173a4ee0830..1e86823a9cbd 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -592,7 +592,7 @@ static struct block_device *bd_acquire(struct inode *inode) return bdev; } -static inline int sb_is_blkdev_sb(struct super_block *sb) +int sb_is_blkdev_sb(struct super_block *sb) { return sb == blockdev_superblock; } @@ -1542,7 +1542,7 @@ static ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov, return 0; size -= pos; - if (size < iocb->ki_left) + if (size < iocb->ki_nbytes) nr_segs = iov_shorten((struct iovec *)iov, nr_segs, size); return generic_file_aio_read(iocb, iov, nr_segs, pos); } diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index 2b3b83296977..398cbd517be2 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -72,3 +72,12 @@ config BTRFS_DEBUG performance, or export extra information via sysfs. If unsure, say N. + +config BTRFS_ASSERT + bool "Btrfs assert support" + depends on BTRFS_FS + help + Enable run-time assertion checking. This will result in panics if + any of the assertions trip. This is meant for btrfs developers only. + + If unsure, say N. diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 3932224f99e9..a91a6a355cc5 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -8,7 +8,10 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ export.o tree-log.o free-space-cache.o zlib.o lzo.o \ compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ - reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o + reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \ + uuid-tree.o btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o + +btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 8bc5e8ccb091..0552a599b28f 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -119,6 +119,26 @@ struct __prelim_ref { u64 wanted_disk_byte; }; +static struct kmem_cache *btrfs_prelim_ref_cache; + +int __init btrfs_prelim_ref_init(void) +{ + btrfs_prelim_ref_cache = kmem_cache_create("btrfs_prelim_ref", + sizeof(struct __prelim_ref), + 0, + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, + NULL); + if (!btrfs_prelim_ref_cache) + return -ENOMEM; + return 0; +} + +void btrfs_prelim_ref_exit(void) +{ + if (btrfs_prelim_ref_cache) + kmem_cache_destroy(btrfs_prelim_ref_cache); +} + /* * the rules for all callers of this function are: * - obtaining the parent is the goal @@ -160,12 +180,12 @@ struct __prelim_ref { static int __add_prelim_ref(struct list_head *head, u64 root_id, struct btrfs_key *key, int level, - u64 parent, u64 wanted_disk_byte, int count) + u64 parent, u64 wanted_disk_byte, int count, + gfp_t gfp_mask) { struct __prelim_ref *ref; - /* in case we're adding delayed refs, we're holding the refs spinlock */ - ref = kmalloc(sizeof(*ref), GFP_ATOMIC); + ref = kmem_cache_alloc(btrfs_prelim_ref_cache, gfp_mask); if (!ref) return -ENOMEM; @@ -295,10 +315,9 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq); pr_debug("search slot in root %llu (level %d, ref count %d) returned " "%d for key (%llu %u %llu)\n", - (unsigned long long)ref->root_id, level, ref->count, ret, - (unsigned long long)ref->key_for_search.objectid, - ref->key_for_search.type, - (unsigned long long)ref->key_for_search.offset); + ref->root_id, level, ref->count, ret, + ref->key_for_search.objectid, ref->key_for_search.type, + ref->key_for_search.offset); if (ret < 0) goto out; @@ -365,11 +384,12 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, node = ulist_next(parents, &uiter); ref->parent = node ? node->val : 0; ref->inode_list = node ? - (struct extent_inode_elem *)(uintptr_t)node->aux : 0; + (struct extent_inode_elem *)(uintptr_t)node->aux : NULL; /* additional parents require new refs being added here */ while ((node = ulist_next(parents, &uiter))) { - new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS); + new_ref = kmem_cache_alloc(btrfs_prelim_ref_cache, + GFP_NOFS); if (!new_ref) { ret = -ENOMEM; goto out; @@ -493,7 +513,7 @@ static void __merge_refs(struct list_head *head, int mode) ref1->count += ref2->count; list_del(&ref2->list); - kfree(ref2); + kmem_cache_free(btrfs_prelim_ref_cache, ref2); } } @@ -548,7 +568,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, ref = btrfs_delayed_node_to_tree_ref(node); ret = __add_prelim_ref(prefs, ref->root, &op_key, ref->level + 1, 0, node->bytenr, - node->ref_mod * sgn); + node->ref_mod * sgn, GFP_ATOMIC); break; } case BTRFS_SHARED_BLOCK_REF_KEY: { @@ -558,7 +578,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, ret = __add_prelim_ref(prefs, ref->root, NULL, ref->level + 1, ref->parent, node->bytenr, - node->ref_mod * sgn); + node->ref_mod * sgn, GFP_ATOMIC); break; } case BTRFS_EXTENT_DATA_REF_KEY: { @@ -570,7 +590,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, key.offset = ref->offset; ret = __add_prelim_ref(prefs, ref->root, &key, 0, 0, node->bytenr, - node->ref_mod * sgn); + node->ref_mod * sgn, GFP_ATOMIC); break; } case BTRFS_SHARED_DATA_REF_KEY: { @@ -583,7 +603,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, key.offset = ref->offset; ret = __add_prelim_ref(prefs, ref->root, &key, 0, ref->parent, node->bytenr, - node->ref_mod * sgn); + node->ref_mod * sgn, GFP_ATOMIC); break; } default: @@ -657,7 +677,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, case BTRFS_SHARED_BLOCK_REF_KEY: ret = __add_prelim_ref(prefs, 0, NULL, *info_level + 1, offset, - bytenr, 1); + bytenr, 1, GFP_NOFS); break; case BTRFS_SHARED_DATA_REF_KEY: { struct btrfs_shared_data_ref *sdref; @@ -666,13 +686,13 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, sdref = (struct btrfs_shared_data_ref *)(iref + 1); count = btrfs_shared_data_ref_count(leaf, sdref); ret = __add_prelim_ref(prefs, 0, NULL, 0, offset, - bytenr, count); + bytenr, count, GFP_NOFS); break; } case BTRFS_TREE_BLOCK_REF_KEY: ret = __add_prelim_ref(prefs, offset, NULL, *info_level + 1, 0, - bytenr, 1); + bytenr, 1, GFP_NOFS); break; case BTRFS_EXTENT_DATA_REF_KEY: { struct btrfs_extent_data_ref *dref; @@ -687,7 +707,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, key.offset = btrfs_extent_data_ref_offset(leaf, dref); root = btrfs_extent_data_ref_root(leaf, dref); ret = __add_prelim_ref(prefs, root, &key, 0, 0, - bytenr, count); + bytenr, count, GFP_NOFS); break; } default: @@ -738,7 +758,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, case BTRFS_SHARED_BLOCK_REF_KEY: ret = __add_prelim_ref(prefs, 0, NULL, info_level + 1, key.offset, - bytenr, 1); + bytenr, 1, GFP_NOFS); break; case BTRFS_SHARED_DATA_REF_KEY: { struct btrfs_shared_data_ref *sdref; @@ -748,13 +768,13 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, struct btrfs_shared_data_ref); count = btrfs_shared_data_ref_count(leaf, sdref); ret = __add_prelim_ref(prefs, 0, NULL, 0, key.offset, - bytenr, count); + bytenr, count, GFP_NOFS); break; } case BTRFS_TREE_BLOCK_REF_KEY: ret = __add_prelim_ref(prefs, key.offset, NULL, info_level + 1, 0, - bytenr, 1); + bytenr, 1, GFP_NOFS); break; case BTRFS_EXTENT_DATA_REF_KEY: { struct btrfs_extent_data_ref *dref; @@ -770,7 +790,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, key.offset = btrfs_extent_data_ref_offset(leaf, dref); root = btrfs_extent_data_ref_root(leaf, dref); ret = __add_prelim_ref(prefs, root, &key, 0, 0, - bytenr, count); + bytenr, count, GFP_NOFS); break; } default: @@ -911,7 +931,6 @@ again: while (!list_empty(&prefs)) { ref = list_first_entry(&prefs, struct __prelim_ref, list); - list_del(&ref->list); WARN_ON(ref->count < 0); if (ref->count && ref->root_id && ref->parent == 0) { /* no parent == root of tree */ @@ -935,8 +954,10 @@ again: } ret = find_extent_in_eb(eb, bytenr, *extent_item_pos, &eie); - ref->inode_list = eie; free_extent_buffer(eb); + if (ret < 0) + goto out; + ref->inode_list = eie; } ret = ulist_add_merge(refs, ref->parent, (uintptr_t)ref->inode_list, @@ -954,7 +975,8 @@ again: eie->next = ref->inode_list; } } - kfree(ref); + list_del(&ref->list); + kmem_cache_free(btrfs_prelim_ref_cache, ref); } out: @@ -962,13 +984,13 @@ out: while (!list_empty(&prefs)) { ref = list_first_entry(&prefs, struct __prelim_ref, list); list_del(&ref->list); - kfree(ref); + kmem_cache_free(btrfs_prelim_ref_cache, ref); } while (!list_empty(&prefs_delayed)) { ref = list_first_entry(&prefs_delayed, struct __prelim_ref, list); list_del(&ref->list); - kfree(ref); + kmem_cache_free(btrfs_prelim_ref_cache, ref); } return ret; @@ -1326,8 +1348,7 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, found_key->type != BTRFS_METADATA_ITEM_KEY) || found_key->objectid > logical || found_key->objectid + size <= logical) { - pr_debug("logical %llu is not within any extent\n", - (unsigned long long)logical); + pr_debug("logical %llu is not within any extent\n", logical); return -ENOENT; } @@ -1340,11 +1361,8 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, pr_debug("logical %llu is at position %llu within the extent (%llu " "EXTENT_ITEM %llu) flags %#llx size %u\n", - (unsigned long long)logical, - (unsigned long long)(logical - found_key->objectid), - (unsigned long long)found_key->objectid, - (unsigned long long)found_key->offset, - (unsigned long long)flags, item_size); + logical, logical - found_key->objectid, found_key->objectid, + found_key->offset, flags, item_size); WARN_ON(!flags_ret); if (flags_ret) { @@ -1516,7 +1534,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, while (!ret && (root_node = ulist_next(roots, &root_uiter))) { pr_debug("root %llu references leaf %llu, data list " "%#llx\n", root_node->val, ref_node->val, - (long long)ref_node->aux); + ref_node->aux); ret = iterate_leaf_refs((struct extent_inode_elem *) (uintptr_t)ref_node->aux, root_node->val, @@ -1608,9 +1626,8 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root, name_len = btrfs_inode_ref_name_len(eb, iref); /* path must be released before calling iterate()! */ pr_debug("following ref at offset %u for inode %llu in " - "tree %llu\n", cur, - (unsigned long long)found_key.objectid, - (unsigned long long)fs_root->objectid); + "tree %llu\n", cur, found_key.objectid, + fs_root->objectid); ret = iterate(parent, name_len, (unsigned long)(iref + 1), eb, ctx); if (ret) diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 8f2e76702932..a910b27a8ad9 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -72,4 +72,6 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, struct btrfs_inode_extref **ret_extref, u64 *found_off); +int __init btrfs_prelim_ref_init(void); +void btrfs_prelim_ref_exit(void); #endif diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 08b286b2a2c5..d0ae226926ee 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -218,6 +218,27 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) return 0; } +struct btrfs_dio_private { + struct inode *inode; + u64 logical_offset; + u64 disk_bytenr; + u64 bytes; + void *private; + + /* number of bios pending for this dio */ + atomic_t pending_bios; + + /* IO errors */ + int errors; + + /* orig_bio is our btrfs_io_bio */ + struct bio *orig_bio; + + /* dio_bio came from fs/direct-io.c */ + struct bio *dio_bio; + u8 csum[0]; +}; + /* * Disable DIO read nolock optimization, so new dio readers will be forced * to grab i_mutex. It is used to avoid the endless truncate due to diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 1431a6965017..1c47be187240 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -701,15 +701,13 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, next_bytenr = btrfs_super_root(selected_super); if (state->print_mask & BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) - printk(KERN_INFO "root@%llu\n", - (unsigned long long)next_bytenr); + printk(KERN_INFO "root@%llu\n", next_bytenr); break; case 1: next_bytenr = btrfs_super_chunk_root(selected_super); if (state->print_mask & BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) - printk(KERN_INFO "chunk@%llu\n", - (unsigned long long)next_bytenr); + printk(KERN_INFO "chunk@%llu\n", next_bytenr); break; case 2: next_bytenr = btrfs_super_log_root(selected_super); @@ -717,8 +715,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, continue; if (state->print_mask & BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) - printk(KERN_INFO "log@%llu\n", - (unsigned long long)next_bytenr); + printk(KERN_INFO "log@%llu\n", next_bytenr); break; } @@ -727,7 +724,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, next_bytenr, state->metablock_size); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", - (unsigned long long)next_bytenr, num_copies); + next_bytenr, num_copies); for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { struct btrfsic_block *next_block; @@ -742,8 +739,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, printk(KERN_INFO "btrfsic:" " btrfsic_map_block(root @%llu," " mirror %d) failed!\n", - (unsigned long long)next_bytenr, - mirror_num); + next_bytenr, mirror_num); kfree(selected_super); return -1; } @@ -767,7 +763,6 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, if (ret < (int)PAGE_CACHE_SIZE) { printk(KERN_INFO "btrfsic: read @logical %llu failed!\n", - (unsigned long long) tmp_next_block_ctx.start); btrfsic_release_block_ctx(&tmp_next_block_ctx); kfree(selected_super); @@ -813,7 +808,7 @@ static int btrfsic_process_superblock_dev_mirror( (bh->b_data + (dev_bytenr & 4095)); if (btrfs_super_bytenr(super_tmp) != dev_bytenr || - super_tmp->magic != cpu_to_le64(BTRFS_MAGIC) || + btrfs_super_magic(super_tmp) != BTRFS_MAGIC || memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) || btrfs_super_nodesize(super_tmp) != state->metablock_size || btrfs_super_leafsize(super_tmp) != state->metablock_size || @@ -847,10 +842,8 @@ static int btrfsic_process_superblock_dev_mirror( printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)" " @%llu (%s/%llu/%d)\n", superblock_bdev, - rcu_str_deref(device->name), - (unsigned long long)dev_bytenr, - dev_state->name, - (unsigned long long)dev_bytenr, + rcu_str_deref(device->name), dev_bytenr, + dev_state->name, dev_bytenr, superblock_mirror_num); list_add(&superblock_tmp->all_blocks_node, &state->all_blocks_list); @@ -880,20 +873,20 @@ static int btrfsic_process_superblock_dev_mirror( tmp_disk_key.offset = 0; switch (pass) { case 0: - tmp_disk_key.objectid = - cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID); + btrfs_set_disk_key_objectid(&tmp_disk_key, + BTRFS_ROOT_TREE_OBJECTID); additional_string = "initial root "; next_bytenr = btrfs_super_root(super_tmp); break; case 1: - tmp_disk_key.objectid = - cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID); + btrfs_set_disk_key_objectid(&tmp_disk_key, + BTRFS_CHUNK_TREE_OBJECTID); additional_string = "initial chunk "; next_bytenr = btrfs_super_chunk_root(super_tmp); break; case 2: - tmp_disk_key.objectid = - cpu_to_le64(BTRFS_TREE_LOG_OBJECTID); + btrfs_set_disk_key_objectid(&tmp_disk_key, + BTRFS_TREE_LOG_OBJECTID); additional_string = "initial log "; next_bytenr = btrfs_super_log_root(super_tmp); if (0 == next_bytenr) @@ -906,7 +899,7 @@ static int btrfsic_process_superblock_dev_mirror( next_bytenr, state->metablock_size); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", - (unsigned long long)next_bytenr, num_copies); + next_bytenr, num_copies); for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { struct btrfsic_block *next_block; struct btrfsic_block_data_ctx tmp_next_block_ctx; @@ -918,8 +911,7 @@ static int btrfsic_process_superblock_dev_mirror( mirror_num)) { printk(KERN_INFO "btrfsic: btrfsic_map_block(" "bytenr @%llu, mirror %d) failed!\n", - (unsigned long long)next_bytenr, - mirror_num); + next_bytenr, mirror_num); brelse(bh); return -1; } @@ -1003,19 +995,17 @@ continue_with_new_stack_frame: (struct btrfs_leaf *)sf->hdr; if (-1 == sf->i) { - sf->nr = le32_to_cpu(leafhdr->header.nritems); + sf->nr = btrfs_stack_header_nritems(&leafhdr->header); if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) printk(KERN_INFO "leaf %llu items %d generation %llu" " owner %llu\n", - (unsigned long long) - sf->block_ctx->start, - sf->nr, - (unsigned long long) - le64_to_cpu(leafhdr->header.generation), - (unsigned long long) - le64_to_cpu(leafhdr->header.owner)); + sf->block_ctx->start, sf->nr, + btrfs_stack_header_generation( + &leafhdr->header), + btrfs_stack_header_owner( + &leafhdr->header)); } continue_with_current_leaf_stack_frame: @@ -1047,10 +1037,10 @@ leaf_item_out_of_bounce_error: &disk_item, disk_item_offset, sizeof(struct btrfs_item)); - item_offset = le32_to_cpu(disk_item.offset); - item_size = le32_to_cpu(disk_item.size); + item_offset = btrfs_stack_item_offset(&disk_item); + item_size = btrfs_stack_item_offset(&disk_item); disk_key = &disk_item.key; - type = disk_key->type; + type = btrfs_disk_key_type(disk_key); if (BTRFS_ROOT_ITEM_KEY == type) { struct btrfs_root_item root_item; @@ -1066,7 +1056,7 @@ leaf_item_out_of_bounce_error: sf->block_ctx, &root_item, root_item_offset, item_size); - next_bytenr = le64_to_cpu(root_item.bytenr); + next_bytenr = btrfs_root_bytenr(&root_item); sf->error = btrfsic_create_link_to_next_block( @@ -1081,8 +1071,8 @@ leaf_item_out_of_bounce_error: &sf->num_copies, &sf->mirror_num, disk_key, - le64_to_cpu(root_item. - generation)); + btrfs_root_generation( + &root_item)); if (sf->error) goto one_stack_frame_backwards; @@ -1130,18 +1120,17 @@ leaf_item_out_of_bounce_error: struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr; if (-1 == sf->i) { - sf->nr = le32_to_cpu(nodehdr->header.nritems); + sf->nr = btrfs_stack_header_nritems(&nodehdr->header); if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) printk(KERN_INFO "node %llu level %d items %d" " generation %llu owner %llu\n", - (unsigned long long) sf->block_ctx->start, nodehdr->header.level, sf->nr, - (unsigned long long) - le64_to_cpu(nodehdr->header.generation), - (unsigned long long) - le64_to_cpu(nodehdr->header.owner)); + btrfs_stack_header_generation( + &nodehdr->header), + btrfs_stack_header_owner( + &nodehdr->header)); } continue_with_current_node_stack_frame: @@ -1168,7 +1157,7 @@ continue_with_current_node_stack_frame: btrfsic_read_from_block_data( sf->block_ctx, &key_ptr, key_ptr_offset, sizeof(struct btrfs_key_ptr)); - next_bytenr = le64_to_cpu(key_ptr.blockptr); + next_bytenr = btrfs_stack_key_blockptr(&key_ptr); sf->error = btrfsic_create_link_to_next_block( state, @@ -1182,7 +1171,7 @@ continue_with_current_node_stack_frame: &sf->num_copies, &sf->mirror_num, &key_ptr.key, - le64_to_cpu(key_ptr.generation)); + btrfs_stack_key_generation(&key_ptr)); if (sf->error) goto one_stack_frame_backwards; @@ -1247,8 +1236,7 @@ static void btrfsic_read_from_block_data( unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT; WARN_ON(offset + len > block_ctx->len); - offset_in_page = (start_offset + offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); + offset_in_page = (start_offset + offset) & (PAGE_CACHE_SIZE - 1); while (len > 0) { cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page)); @@ -1290,7 +1278,7 @@ static int btrfsic_create_link_to_next_block( next_bytenr, state->metablock_size); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", - (unsigned long long)next_bytenr, *num_copiesp); + next_bytenr, *num_copiesp); *mirror_nump = 1; } @@ -1307,7 +1295,7 @@ static int btrfsic_create_link_to_next_block( if (ret) { printk(KERN_INFO "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n", - (unsigned long long)next_bytenr, *mirror_nump); + next_bytenr, *mirror_nump); btrfsic_release_block_ctx(next_block_ctx); *next_blockp = NULL; return -1; @@ -1335,20 +1323,16 @@ static int btrfsic_create_link_to_next_block( "Referenced block @%llu (%s/%llu/%d)" " found in hash table, %c," " bytenr mismatch (!= stored %llu).\n", - (unsigned long long)next_bytenr, - next_block_ctx->dev->name, - (unsigned long long)next_block_ctx->dev_bytenr, - *mirror_nump, + next_bytenr, next_block_ctx->dev->name, + next_block_ctx->dev_bytenr, *mirror_nump, btrfsic_get_block_type(state, next_block), - (unsigned long long)next_block->logical_bytenr); + next_block->logical_bytenr); } else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) printk(KERN_INFO "Referenced block @%llu (%s/%llu/%d)" " found in hash table, %c.\n", - (unsigned long long)next_bytenr, - next_block_ctx->dev->name, - (unsigned long long)next_block_ctx->dev_bytenr, - *mirror_nump, + next_bytenr, next_block_ctx->dev->name, + next_block_ctx->dev_bytenr, *mirror_nump, btrfsic_get_block_type(state, next_block)); next_block->logical_bytenr = next_bytenr; @@ -1400,7 +1384,7 @@ static int btrfsic_create_link_to_next_block( if (ret < (int)next_block_ctx->len) { printk(KERN_INFO "btrfsic: read block @logical %llu failed!\n", - (unsigned long long)next_bytenr); + next_bytenr); btrfsic_release_block_ctx(next_block_ctx); *next_blockp = NULL; return -1; @@ -1444,12 +1428,12 @@ static int btrfsic_handle_extent_data( file_extent_item_offset, offsetof(struct btrfs_file_extent_item, disk_num_bytes)); if (BTRFS_FILE_EXTENT_REG != file_extent_item.type || - ((u64)0) == le64_to_cpu(file_extent_item.disk_bytenr)) { + btrfs_stack_file_extent_disk_bytenr(&file_extent_item) == 0) { if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n", file_extent_item.type, - (unsigned long long) - le64_to_cpu(file_extent_item.disk_bytenr)); + btrfs_stack_file_extent_disk_bytenr( + &file_extent_item)); return 0; } @@ -1463,20 +1447,19 @@ static int btrfsic_handle_extent_data( btrfsic_read_from_block_data(block_ctx, &file_extent_item, file_extent_item_offset, sizeof(struct btrfs_file_extent_item)); - next_bytenr = le64_to_cpu(file_extent_item.disk_bytenr) + - le64_to_cpu(file_extent_item.offset); - generation = le64_to_cpu(file_extent_item.generation); - num_bytes = le64_to_cpu(file_extent_item.num_bytes); - generation = le64_to_cpu(file_extent_item.generation); + next_bytenr = btrfs_stack_file_extent_disk_bytenr(&file_extent_item) + + btrfs_stack_file_extent_offset(&file_extent_item); + generation = btrfs_stack_file_extent_generation(&file_extent_item); + num_bytes = btrfs_stack_file_extent_num_bytes(&file_extent_item); + generation = btrfs_stack_file_extent_generation(&file_extent_item); if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu," " offset = %llu, num_bytes = %llu\n", file_extent_item.type, - (unsigned long long) - le64_to_cpu(file_extent_item.disk_bytenr), - (unsigned long long)le64_to_cpu(file_extent_item.offset), - (unsigned long long)num_bytes); + btrfs_stack_file_extent_disk_bytenr(&file_extent_item), + btrfs_stack_file_extent_offset(&file_extent_item), + num_bytes); while (num_bytes > 0) { u32 chunk_len; int num_copies; @@ -1492,7 +1475,7 @@ static int btrfsic_handle_extent_data( next_bytenr, state->datablock_size); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", - (unsigned long long)next_bytenr, num_copies); + next_bytenr, num_copies); for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { struct btrfsic_block_data_ctx next_block_ctx; struct btrfsic_block *next_block; @@ -1504,8 +1487,7 @@ static int btrfsic_handle_extent_data( if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) printk(KERN_INFO "\tdisk_bytenr = %llu, num_bytes %u\n", - (unsigned long long)next_bytenr, - chunk_len); + next_bytenr, chunk_len); ret = btrfsic_map_block(state, next_bytenr, chunk_len, &next_block_ctx, mirror_num); @@ -1513,8 +1495,7 @@ static int btrfsic_handle_extent_data( printk(KERN_INFO "btrfsic: btrfsic_map_block(@%llu," " mirror=%d) failed!\n", - (unsigned long long)next_bytenr, - mirror_num); + next_bytenr, mirror_num); return -1; } @@ -1543,12 +1524,10 @@ static int btrfsic_handle_extent_data( " found in hash table, D," " bytenr mismatch" " (!= stored %llu).\n", - (unsigned long long)next_bytenr, + next_bytenr, next_block_ctx.dev->name, - (unsigned long long) next_block_ctx.dev_bytenr, mirror_num, - (unsigned long long) next_block->logical_bytenr); } next_block->logical_bytenr = next_bytenr; @@ -1675,7 +1654,7 @@ static int btrfsic_read_block(struct btrfsic_state *state, if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) { printk(KERN_INFO "btrfsic: read_block() with unaligned bytenr %llu\n", - (unsigned long long)block_ctx->dev_bytenr); + block_ctx->dev_bytenr); return -1; } @@ -1772,10 +1751,8 @@ static void btrfsic_dump_database(struct btrfsic_state *state) printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n", btrfsic_get_block_type(state, b_all), - (unsigned long long)b_all->logical_bytenr, - b_all->dev_state->name, - (unsigned long long)b_all->dev_bytenr, - b_all->mirror_num); + b_all->logical_bytenr, b_all->dev_state->name, + b_all->dev_bytenr, b_all->mirror_num); list_for_each(elem_ref_to, &b_all->ref_to_list) { const struct btrfsic_block_link *const l = @@ -1787,16 +1764,13 @@ static void btrfsic_dump_database(struct btrfsic_state *state) " refers %u* to" " %c @%llu (%s/%llu/%d)\n", btrfsic_get_block_type(state, b_all), - (unsigned long long)b_all->logical_bytenr, - b_all->dev_state->name, - (unsigned long long)b_all->dev_bytenr, - b_all->mirror_num, + b_all->logical_bytenr, b_all->dev_state->name, + b_all->dev_bytenr, b_all->mirror_num, l->ref_cnt, btrfsic_get_block_type(state, l->block_ref_to), - (unsigned long long) l->block_ref_to->logical_bytenr, l->block_ref_to->dev_state->name, - (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num); } @@ -1810,16 +1784,12 @@ static void btrfsic_dump_database(struct btrfsic_state *state) " is ref %u* from" " %c @%llu (%s/%llu/%d)\n", btrfsic_get_block_type(state, b_all), - (unsigned long long)b_all->logical_bytenr, - b_all->dev_state->name, - (unsigned long long)b_all->dev_bytenr, - b_all->mirror_num, + b_all->logical_bytenr, b_all->dev_state->name, + b_all->dev_bytenr, b_all->mirror_num, l->ref_cnt, btrfsic_get_block_type(state, l->block_ref_from), - (unsigned long long) l->block_ref_from->logical_bytenr, l->block_ref_from->dev_state->name, - (unsigned long long) l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num); } @@ -1896,8 +1866,8 @@ again: struct list_head *tmp_ref_to; if (block->is_superblock) { - bytenr = le64_to_cpu(((struct btrfs_super_block *) - mapped_datav[0])->bytenr); + bytenr = btrfs_super_bytenr((struct btrfs_super_block *) + mapped_datav[0]); if (num_pages * PAGE_CACHE_SIZE < BTRFS_SUPER_INFO_SIZE) { printk(KERN_INFO @@ -1923,8 +1893,9 @@ again: return; } processed_len = state->metablock_size; - bytenr = le64_to_cpu(((struct btrfs_header *) - mapped_datav[0])->bytenr); + bytenr = btrfs_stack_header_bytenr( + (struct btrfs_header *) + mapped_datav[0]); btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state, dev_bytenr); @@ -1935,12 +1906,9 @@ again: " found in hash table, %c," " bytenr mismatch" " (!= stored %llu).\n", - (unsigned long long)bytenr, - dev_state->name, - (unsigned long long)dev_bytenr, + bytenr, dev_state->name, dev_bytenr, block->mirror_num, btrfsic_get_block_type(state, block), - (unsigned long long) block->logical_bytenr); block->logical_bytenr = bytenr; } else if (state->print_mask & @@ -1948,9 +1916,7 @@ again: printk(KERN_INFO "Written block @%llu (%s/%llu/%d)" " found in hash table, %c.\n", - (unsigned long long)bytenr, - dev_state->name, - (unsigned long long)dev_bytenr, + bytenr, dev_state->name, dev_bytenr, block->mirror_num, btrfsic_get_block_type(state, block)); } else { @@ -1966,9 +1932,7 @@ again: printk(KERN_INFO "Written block @%llu (%s/%llu/%d)" " found in hash table, %c.\n", - (unsigned long long)bytenr, - dev_state->name, - (unsigned long long)dev_bytenr, + bytenr, dev_state->name, dev_bytenr, block->mirror_num, btrfsic_get_block_type(state, block)); } @@ -1985,21 +1949,14 @@ again: " new(gen=%llu)," " which is referenced by most recent superblock" " (superblockgen=%llu)!\n", - btrfsic_get_block_type(state, block), - (unsigned long long)bytenr, - dev_state->name, - (unsigned long long)dev_bytenr, - block->mirror_num, - (unsigned long long)block->generation, - (unsigned long long) - le64_to_cpu(block->disk_key.objectid), + btrfsic_get_block_type(state, block), bytenr, + dev_state->name, dev_bytenr, block->mirror_num, + block->generation, + btrfs_disk_key_objectid(&block->disk_key), block->disk_key.type, - (unsigned long long) - le64_to_cpu(block->disk_key.offset), - (unsigned long long) - le64_to_cpu(((struct btrfs_header *) - mapped_datav[0])->generation), - (unsigned long long) + btrfs_disk_key_offset(&block->disk_key), + btrfs_stack_header_generation( + (struct btrfs_header *) mapped_datav[0]), state->max_superblock_generation); btrfsic_dump_tree(state); } @@ -2008,15 +1965,12 @@ again: printk(KERN_INFO "btrfs: attempt to overwrite %c-block" " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu," " which is not yet iodone!\n", - btrfsic_get_block_type(state, block), - (unsigned long long)bytenr, - dev_state->name, - (unsigned long long)dev_bytenr, - block->mirror_num, - (unsigned long long)block->generation, - (unsigned long long) - le64_to_cpu(((struct btrfs_header *) - mapped_datav[0])->generation)); + btrfsic_get_block_type(state, block), bytenr, + dev_state->name, dev_bytenr, block->mirror_num, + block->generation, + btrfs_stack_header_generation( + (struct btrfs_header *) + mapped_datav[0])); /* it would not be safe to go on */ btrfsic_dump_tree(state); goto continue_loop; @@ -2056,7 +2010,7 @@ again: if (ret) { printk(KERN_INFO "btrfsic: btrfsic_map_block(root @%llu)" - " failed!\n", (unsigned long long)bytenr); + " failed!\n", bytenr); goto continue_loop; } block_ctx.datav = mapped_datav; @@ -2140,7 +2094,7 @@ again: printk(KERN_INFO "btrfsic: btrfsic_process_metablock" "(root @%llu) failed!\n", - (unsigned long long)dev_bytenr); + dev_bytenr); } else { block->is_metadata = 0; block->mirror_num = 0; /* unknown */ @@ -2168,8 +2122,7 @@ again: if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) printk(KERN_INFO "Written block (%s/%llu/?)" " !found in hash table, D.\n", - dev_state->name, - (unsigned long long)dev_bytenr); + dev_state->name, dev_bytenr); if (!state->include_extent_data) { /* ignore that written D block */ goto continue_loop; @@ -2184,17 +2137,16 @@ again: block_ctx.pagev = NULL; } else { processed_len = state->metablock_size; - bytenr = le64_to_cpu(((struct btrfs_header *) - mapped_datav[0])->bytenr); + bytenr = btrfs_stack_header_bytenr( + (struct btrfs_header *) + mapped_datav[0]); btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state, dev_bytenr); if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) printk(KERN_INFO "Written block @%llu (%s/%llu/?)" " !found in hash table, M.\n", - (unsigned long long)bytenr, - dev_state->name, - (unsigned long long)dev_bytenr); + bytenr, dev_state->name, dev_bytenr); ret = btrfsic_map_block(state, bytenr, processed_len, &block_ctx, 0); @@ -2202,7 +2154,7 @@ again: printk(KERN_INFO "btrfsic: btrfsic_map_block(root @%llu)" " failed!\n", - (unsigned long long)dev_bytenr); + dev_bytenr); goto continue_loop; } } @@ -2267,10 +2219,8 @@ again: printk(KERN_INFO "New written %c-block @%llu (%s/%llu/%d)\n", is_metadata ? 'M' : 'D', - (unsigned long long)block->logical_bytenr, - block->dev_state->name, - (unsigned long long)block->dev_bytenr, - block->mirror_num); + block->logical_bytenr, block->dev_state->name, + block->dev_bytenr, block->mirror_num); list_add(&block->all_blocks_node, &state->all_blocks_list); btrfsic_block_hashtable_add(block, &state->block_hashtable); @@ -2281,7 +2231,7 @@ again: printk(KERN_INFO "btrfsic: process_metablock(root @%llu)" " failed!\n", - (unsigned long long)dev_bytenr); + dev_bytenr); } btrfsic_release_block_ctx(&block_ctx); } @@ -2319,10 +2269,8 @@ static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status) "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n", bio_error_status, btrfsic_get_block_type(dev_state->state, block), - (unsigned long long)block->logical_bytenr, - dev_state->name, - (unsigned long long)block->dev_bytenr, - block->mirror_num); + block->logical_bytenr, dev_state->name, + block->dev_bytenr, block->mirror_num); next_block = block->next_in_same_bio; block->iodone_w_error = iodone_w_error; if (block->submit_bio_bh_rw & REQ_FLUSH) { @@ -2332,7 +2280,6 @@ static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status) printk(KERN_INFO "bio_end_io() new %s flush_gen=%llu\n", dev_state->name, - (unsigned long long) dev_state->last_flush_gen); } if (block->submit_bio_bh_rw & REQ_FUA) @@ -2358,10 +2305,8 @@ static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate) "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n", iodone_w_error, btrfsic_get_block_type(dev_state->state, block), - (unsigned long long)block->logical_bytenr, - block->dev_state->name, - (unsigned long long)block->dev_bytenr, - block->mirror_num); + block->logical_bytenr, block->dev_state->name, + block->dev_bytenr, block->mirror_num); block->iodone_w_error = iodone_w_error; if (block->submit_bio_bh_rw & REQ_FLUSH) { @@ -2370,8 +2315,7 @@ static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate) BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) printk(KERN_INFO "bh_end_io() new %s flush_gen=%llu\n", - dev_state->name, - (unsigned long long)dev_state->last_flush_gen); + dev_state->name, dev_state->last_flush_gen); } if (block->submit_bio_bh_rw & REQ_FUA) block->flush_gen = 0; /* FUA completed means block is on disk */ @@ -2396,26 +2340,20 @@ static int btrfsic_process_written_superblock( printk(KERN_INFO "btrfsic: superblock @%llu (%s/%llu/%d)" " with old gen %llu <= %llu\n", - (unsigned long long)superblock->logical_bytenr, + superblock->logical_bytenr, superblock->dev_state->name, - (unsigned long long)superblock->dev_bytenr, - superblock->mirror_num, - (unsigned long long) + superblock->dev_bytenr, superblock->mirror_num, btrfs_super_generation(super_hdr), - (unsigned long long) state->max_superblock_generation); } else { if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) printk(KERN_INFO "btrfsic: got new superblock @%llu (%s/%llu/%d)" " with new gen %llu > %llu\n", - (unsigned long long)superblock->logical_bytenr, + superblock->logical_bytenr, superblock->dev_state->name, - (unsigned long long)superblock->dev_bytenr, - superblock->mirror_num, - (unsigned long long) + superblock->dev_bytenr, superblock->mirror_num, btrfs_super_generation(super_hdr), - (unsigned long long) state->max_superblock_generation); state->max_superblock_generation = @@ -2432,43 +2370,41 @@ static int btrfsic_process_written_superblock( int num_copies; int mirror_num; const char *additional_string = NULL; - struct btrfs_disk_key tmp_disk_key; + struct btrfs_disk_key tmp_disk_key = {0}; - tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY; - tmp_disk_key.offset = 0; + btrfs_set_disk_key_objectid(&tmp_disk_key, + BTRFS_ROOT_ITEM_KEY); + btrfs_set_disk_key_objectid(&tmp_disk_key, 0); switch (pass) { case 0: - tmp_disk_key.objectid = - cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID); + btrfs_set_disk_key_objectid(&tmp_disk_key, + BTRFS_ROOT_TREE_OBJECTID); additional_string = "root "; next_bytenr = btrfs_super_root(super_hdr); if (state->print_mask & BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) - printk(KERN_INFO "root@%llu\n", - (unsigned long long)next_bytenr); + printk(KERN_INFO "root@%llu\n", next_bytenr); break; case 1: - tmp_disk_key.objectid = - cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID); + btrfs_set_disk_key_objectid(&tmp_disk_key, + BTRFS_CHUNK_TREE_OBJECTID); additional_string = "chunk "; next_bytenr = btrfs_super_chunk_root(super_hdr); if (state->print_mask & BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) - printk(KERN_INFO "chunk@%llu\n", - (unsigned long long)next_bytenr); + printk(KERN_INFO "chunk@%llu\n", next_bytenr); break; case 2: - tmp_disk_key.objectid = - cpu_to_le64(BTRFS_TREE_LOG_OBJECTID); + btrfs_set_disk_key_objectid(&tmp_disk_key, + BTRFS_TREE_LOG_OBJECTID); additional_string = "log "; next_bytenr = btrfs_super_log_root(super_hdr); if (0 == next_bytenr) continue; if (state->print_mask & BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) - printk(KERN_INFO "log@%llu\n", - (unsigned long long)next_bytenr); + printk(KERN_INFO "log@%llu\n", next_bytenr); break; } @@ -2477,7 +2413,7 @@ static int btrfsic_process_written_superblock( next_bytenr, BTRFS_SUPER_INFO_SIZE); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", - (unsigned long long)next_bytenr, num_copies); + next_bytenr, num_copies); for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { int was_created; @@ -2493,8 +2429,7 @@ static int btrfsic_process_written_superblock( printk(KERN_INFO "btrfsic: btrfsic_map_block(@%llu," " mirror=%d) failed!\n", - (unsigned long long)next_bytenr, - mirror_num); + next_bytenr, mirror_num); return -1; } @@ -2579,26 +2514,22 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, " %u* refers to %c @%llu (%s/%llu/%d)\n", recursion_level, btrfsic_get_block_type(state, block), - (unsigned long long)block->logical_bytenr, - block->dev_state->name, - (unsigned long long)block->dev_bytenr, - block->mirror_num, + block->logical_bytenr, block->dev_state->name, + block->dev_bytenr, block->mirror_num, l->ref_cnt, btrfsic_get_block_type(state, l->block_ref_to), - (unsigned long long) l->block_ref_to->logical_bytenr, l->block_ref_to->dev_state->name, - (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num); if (l->block_ref_to->never_written) { printk(KERN_INFO "btrfs: attempt to write superblock" " which references block %c @%llu (%s/%llu/%d)" " which is never written!\n", btrfsic_get_block_type(state, l->block_ref_to), - (unsigned long long) l->block_ref_to->logical_bytenr, l->block_ref_to->dev_state->name, - (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num); ret = -1; } else if (!l->block_ref_to->is_iodone) { @@ -2606,10 +2537,9 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, " which references block %c @%llu (%s/%llu/%d)" " which is not yet iodone!\n", btrfsic_get_block_type(state, l->block_ref_to), - (unsigned long long) l->block_ref_to->logical_bytenr, l->block_ref_to->dev_state->name, - (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num); ret = -1; } else if (l->block_ref_to->iodone_w_error) { @@ -2617,10 +2547,9 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, " which references block %c @%llu (%s/%llu/%d)" " which has write error!\n", btrfsic_get_block_type(state, l->block_ref_to), - (unsigned long long) l->block_ref_to->logical_bytenr, l->block_ref_to->dev_state->name, - (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num); ret = -1; } else if (l->parent_generation != @@ -2634,13 +2563,12 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, " with generation %llu !=" " parent generation %llu!\n", btrfsic_get_block_type(state, l->block_ref_to), - (unsigned long long) l->block_ref_to->logical_bytenr, l->block_ref_to->dev_state->name, - (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num, - (unsigned long long)l->block_ref_to->generation, - (unsigned long long)l->parent_generation); + l->block_ref_to->generation, + l->parent_generation); ret = -1; } else if (l->block_ref_to->flush_gen > l->block_ref_to->dev_state->last_flush_gen) { @@ -2650,13 +2578,10 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, " (block flush_gen=%llu," " dev->flush_gen=%llu)!\n", btrfsic_get_block_type(state, l->block_ref_to), - (unsigned long long) l->block_ref_to->logical_bytenr, l->block_ref_to->dev_state->name, - (unsigned long long)l->block_ref_to->dev_bytenr, - l->block_ref_to->mirror_num, - (unsigned long long)block->flush_gen, - (unsigned long long) + l->block_ref_to->dev_bytenr, + l->block_ref_to->mirror_num, block->flush_gen, l->block_ref_to->dev_state->last_flush_gen); ret = -1; } else if (-1 == btrfsic_check_all_ref_blocks(state, @@ -2701,16 +2626,12 @@ static int btrfsic_is_block_ref_by_superblock( " is ref %u* from %c @%llu (%s/%llu/%d)\n", recursion_level, btrfsic_get_block_type(state, block), - (unsigned long long)block->logical_bytenr, - block->dev_state->name, - (unsigned long long)block->dev_bytenr, - block->mirror_num, + block->logical_bytenr, block->dev_state->name, + block->dev_bytenr, block->mirror_num, l->ref_cnt, btrfsic_get_block_type(state, l->block_ref_from), - (unsigned long long) l->block_ref_from->logical_bytenr, l->block_ref_from->dev_state->name, - (unsigned long long) l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num); if (l->block_ref_from->is_superblock && @@ -2737,14 +2658,12 @@ static void btrfsic_print_add_link(const struct btrfsic_state *state, " to %c @%llu (%s/%llu/%d).\n", l->ref_cnt, btrfsic_get_block_type(state, l->block_ref_from), - (unsigned long long)l->block_ref_from->logical_bytenr, + l->block_ref_from->logical_bytenr, l->block_ref_from->dev_state->name, - (unsigned long long)l->block_ref_from->dev_bytenr, - l->block_ref_from->mirror_num, + l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num, btrfsic_get_block_type(state, l->block_ref_to), - (unsigned long long)l->block_ref_to->logical_bytenr, - l->block_ref_to->dev_state->name, - (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->logical_bytenr, + l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num); } @@ -2756,14 +2675,12 @@ static void btrfsic_print_rem_link(const struct btrfsic_state *state, " to %c @%llu (%s/%llu/%d).\n", l->ref_cnt, btrfsic_get_block_type(state, l->block_ref_from), - (unsigned long long)l->block_ref_from->logical_bytenr, + l->block_ref_from->logical_bytenr, l->block_ref_from->dev_state->name, - (unsigned long long)l->block_ref_from->dev_bytenr, - l->block_ref_from->mirror_num, + l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num, btrfsic_get_block_type(state, l->block_ref_to), - (unsigned long long)l->block_ref_to->logical_bytenr, - l->block_ref_to->dev_state->name, - (unsigned long long)l->block_ref_to->dev_bytenr, + l->block_ref_to->logical_bytenr, + l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num); } @@ -2807,10 +2724,8 @@ static void btrfsic_dump_tree_sub(const struct btrfsic_state *state, */ indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)", btrfsic_get_block_type(state, block), - (unsigned long long)block->logical_bytenr, - block->dev_state->name, - (unsigned long long)block->dev_bytenr, - block->mirror_num); + block->logical_bytenr, block->dev_state->name, + block->dev_bytenr, block->mirror_num); if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) { printk("[...]\n"); return; @@ -2943,10 +2858,8 @@ static struct btrfsic_block *btrfsic_block_lookup_or_add( "New %s%c-block @%llu (%s/%llu/%d)\n", additional_string, btrfsic_get_block_type(state, block), - (unsigned long long)block->logical_bytenr, - dev_state->name, - (unsigned long long)block->dev_bytenr, - mirror_num); + block->logical_bytenr, dev_state->name, + block->dev_bytenr, mirror_num); list_add(&block->all_blocks_node, &state->all_blocks_list); btrfsic_block_hashtable_add(block, &state->block_hashtable); if (NULL != was_created) @@ -2980,7 +2893,7 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, printk(KERN_INFO "btrfsic:" " btrfsic_map_block(logical @%llu," " mirror %d) failed!\n", - (unsigned long long)bytenr, mirror_num); + bytenr, mirror_num); continue; } @@ -2997,8 +2910,7 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio," " buffer->log_bytenr=%llu, submit_bio(bdev=%s," " phys_bytenr=%llu)!\n", - (unsigned long long)bytenr, dev_state->name, - (unsigned long long)dev_bytenr); + bytenr, dev_state->name, dev_bytenr); for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { ret = btrfsic_map_block(state, bytenr, state->metablock_size, @@ -3008,10 +2920,8 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, printk(KERN_INFO "Read logical bytenr @%llu maps to" " (%s/%llu/%d)\n", - (unsigned long long)bytenr, - block_ctx.dev->name, - (unsigned long long)block_ctx.dev_bytenr, - mirror_num); + bytenr, block_ctx.dev->name, + block_ctx.dev_bytenr, mirror_num); } WARN_ON(1); } @@ -3048,12 +2958,10 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh) if (dev_state->state->print_mask & BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) printk(KERN_INFO - "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu)," - " size=%lu, data=%p, bdev=%p)\n", - rw, (unsigned long)bh->b_blocknr, - (unsigned long long)dev_bytenr, - (unsigned long)bh->b_size, bh->b_data, - bh->b_bdev); + "submit_bh(rw=0x%x, blocknr=%llu (bytenr %llu)," + " size=%zu, data=%p, bdev=%p)\n", + rw, (unsigned long long)bh->b_blocknr, + dev_bytenr, bh->b_size, bh->b_data, bh->b_bdev); btrfsic_process_written_block(dev_state, dev_bytenr, &bh->b_data, 1, NULL, NULL, bh, rw); @@ -3118,9 +3026,9 @@ void btrfsic_submit_bio(int rw, struct bio *bio) BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) printk(KERN_INFO "submit_bio(rw=0x%x, bi_vcnt=%u," - " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n", - rw, bio->bi_vcnt, (unsigned long)bio->bi_sector, - (unsigned long long)dev_bytenr, + " bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n", + rw, bio->bi_vcnt, + (unsigned long long)bio->bi_sector, dev_bytenr, bio->bi_bdev); mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt, @@ -3213,19 +3121,19 @@ int btrfsic_mount(struct btrfs_root *root, if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) { printk(KERN_INFO "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", - root->nodesize, (unsigned long)PAGE_CACHE_SIZE); + root->nodesize, PAGE_CACHE_SIZE); return -1; } if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) { printk(KERN_INFO "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", - root->leafsize, (unsigned long)PAGE_CACHE_SIZE); + root->leafsize, PAGE_CACHE_SIZE); return -1; } if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) { printk(KERN_INFO "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", - root->sectorsize, (unsigned long)PAGE_CACHE_SIZE); + root->sectorsize, PAGE_CACHE_SIZE); return -1; } state = kzalloc(sizeof(*state), GFP_NOFS); @@ -3369,10 +3277,8 @@ void btrfsic_unmount(struct btrfs_root *root, " @%llu (%s/%llu/%d) on umount which is" " not yet iodone!\n", btrfsic_get_block_type(state, b_all), - (unsigned long long)b_all->logical_bytenr, - b_all->dev_state->name, - (unsigned long long)b_all->dev_bytenr, - b_all->mirror_num); + b_all->logical_bytenr, b_all->dev_state->name, + b_all->dev_bytenr, b_all->mirror_num); } mutex_unlock(&btrfsic_mutex); diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index b189bd1e7a3e..6aad98cb343f 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -132,9 +132,8 @@ static int check_compressed_csum(struct inode *inode, printk(KERN_INFO "btrfs csum failed ino %llu " "extent %llu csum %u " "wanted %u mirror %d\n", - (unsigned long long)btrfs_ino(inode), - (unsigned long long)disk_start, - csum, *cb_sum, cb->mirror_num); + btrfs_ino(inode), disk_start, csum, *cb_sum, + cb->mirror_num); ret = -EIO; goto fail; } @@ -639,7 +638,11 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, faili = nr_pages - 1; cb->nr_pages = nr_pages; - add_ra_bio_pages(inode, em_start + em_len, cb); + /* In the parent-locked case, we only locked the range we are + * interested in. In all other cases, we can opportunistically + * cache decompressed data that goes beyond the requested range. */ + if (!(bio_flags & EXTENT_BIO_PARENT_LOCKED)) + add_ra_bio_pages(inode, em_start + em_len, cb); /* include any pages we added in add_ra-bio_pages */ uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index ed504607d8ec..64346721173f 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -274,8 +274,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, else btrfs_set_header_owner(cow, new_root_objectid); - write_extent_buffer(cow, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(cow), + write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(cow), BTRFS_FSID_SIZE); WARN_ON(btrfs_header_generation(buf) > trans->transid); @@ -484,8 +483,27 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) struct rb_node **new; struct rb_node *parent = NULL; struct tree_mod_elem *cur; + int ret = 0; + + BUG_ON(!tm); + + tree_mod_log_write_lock(fs_info); + if (list_empty(&fs_info->tree_mod_seq_list)) { + tree_mod_log_write_unlock(fs_info); + /* + * Ok we no longer care about logging modifications, free up tm + * and return 0. Any callers shouldn't be using tm after + * calling tree_mod_log_insert, but if they do we can just + * change this to return a special error code to let the callers + * do their own thing. + */ + kfree(tm); + return 0; + } - BUG_ON(!tm || !tm->seq); + spin_lock(&fs_info->tree_mod_seq_lock); + tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info); + spin_unlock(&fs_info->tree_mod_seq_lock); tm_root = &fs_info->tree_mod_log; new = &tm_root->rb_node; @@ -501,14 +519,17 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) else if (cur->seq > tm->seq) new = &((*new)->rb_right); else { + ret = -EEXIST; kfree(tm); - return -EEXIST; + goto out; } } rb_link_node(&tm->node, parent, new); rb_insert_color(&tm->node, tm_root); - return 0; +out: + tree_mod_log_write_unlock(fs_info); + return ret; } /* @@ -524,57 +545,19 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, return 1; if (eb && btrfs_header_level(eb) == 0) return 1; - - tree_mod_log_write_lock(fs_info); - if (list_empty(&fs_info->tree_mod_seq_list)) { - /* - * someone emptied the list while we were waiting for the lock. - * we must not add to the list when no blocker exists. - */ - tree_mod_log_write_unlock(fs_info); - return 1; - } - return 0; } -/* - * This allocates memory and gets a tree modification sequence number. - * - * Returns <0 on error. - * Returns >0 (the added sequence number) on success. - */ -static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, - struct tree_mod_elem **tm_ret) -{ - struct tree_mod_elem *tm; - - /* - * once we switch from spin locks to something different, we should - * honor the flags parameter here. - */ - tm = *tm_ret = kzalloc(sizeof(*tm), GFP_ATOMIC); - if (!tm) - return -ENOMEM; - - spin_lock(&fs_info->tree_mod_seq_lock); - tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info); - spin_unlock(&fs_info->tree_mod_seq_lock); - - return tm->seq; -} - static inline int __tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, int slot, enum mod_log_op op, gfp_t flags) { - int ret; struct tree_mod_elem *tm; - ret = tree_mod_alloc(fs_info, flags, &tm); - if (ret < 0) - return ret; + tm = kzalloc(sizeof(*tm), flags); + if (!tm) + return -ENOMEM; tm->index = eb->start >> PAGE_CACHE_SHIFT; if (op != MOD_LOG_KEY_ADD) { @@ -589,34 +572,14 @@ __tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, } static noinline int -tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int slot, - enum mod_log_op op, gfp_t flags) +tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot, + enum mod_log_op op, gfp_t flags) { - int ret; - if (tree_mod_dont_log(fs_info, eb)) return 0; - ret = __tree_mod_log_insert_key(fs_info, eb, slot, op, flags); - - tree_mod_log_write_unlock(fs_info); - return ret; -} - -static noinline int -tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, - int slot, enum mod_log_op op) -{ - return tree_mod_log_insert_key_mask(fs_info, eb, slot, op, GFP_NOFS); -} - -static noinline int -tree_mod_log_insert_key_locked(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int slot, - enum mod_log_op op) -{ - return __tree_mod_log_insert_key(fs_info, eb, slot, op, GFP_NOFS); + return __tree_mod_log_insert_key(fs_info, eb, slot, op, flags); } static noinline int @@ -637,14 +600,14 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, * buffer, i.e. dst_slot < src_slot. */ for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { - ret = tree_mod_log_insert_key_locked(fs_info, eb, i + dst_slot, - MOD_LOG_KEY_REMOVE_WHILE_MOVING); + ret = __tree_mod_log_insert_key(fs_info, eb, i + dst_slot, + MOD_LOG_KEY_REMOVE_WHILE_MOVING, GFP_NOFS); BUG_ON(ret < 0); } - ret = tree_mod_alloc(fs_info, flags, &tm); - if (ret < 0) - goto out; + tm = kzalloc(sizeof(*tm), flags); + if (!tm) + return -ENOMEM; tm->index = eb->start >> PAGE_CACHE_SHIFT; tm->slot = src_slot; @@ -652,10 +615,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, tm->move.nr_items = nr_items; tm->op = MOD_LOG_MOVE_KEYS; - ret = __tree_mod_log_insert(fs_info, tm); -out: - tree_mod_log_write_unlock(fs_info); - return ret; + return __tree_mod_log_insert(fs_info, tm); } static inline void @@ -670,8 +630,8 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) nritems = btrfs_header_nritems(eb); for (i = nritems - 1; i >= 0; i--) { - ret = tree_mod_log_insert_key_locked(fs_info, eb, i, - MOD_LOG_KEY_REMOVE_WHILE_FREEING); + ret = __tree_mod_log_insert_key(fs_info, eb, i, + MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS); BUG_ON(ret < 0); } } @@ -683,7 +643,6 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, int log_removal) { struct tree_mod_elem *tm; - int ret; if (tree_mod_dont_log(fs_info, NULL)) return 0; @@ -691,9 +650,9 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, if (log_removal) __tree_mod_log_free_eb(fs_info, old_root); - ret = tree_mod_alloc(fs_info, flags, &tm); - if (ret < 0) - goto out; + tm = kzalloc(sizeof(*tm), flags); + if (!tm) + return -ENOMEM; tm->index = new_root->start >> PAGE_CACHE_SHIFT; tm->old_root.logical = old_root->start; @@ -701,10 +660,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, tm->generation = btrfs_header_generation(old_root); tm->op = MOD_LOG_ROOT_REPLACE; - ret = __tree_mod_log_insert(fs_info, tm); -out: - tree_mod_log_write_unlock(fs_info); - return ret; + return __tree_mod_log_insert(fs_info, tm); } static struct tree_mod_elem * @@ -784,23 +740,20 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, if (tree_mod_dont_log(fs_info, NULL)) return; - if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) { - tree_mod_log_write_unlock(fs_info); + if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) return; - } for (i = 0; i < nr_items; i++) { - ret = tree_mod_log_insert_key_locked(fs_info, src, + ret = __tree_mod_log_insert_key(fs_info, src, i + src_offset, - MOD_LOG_KEY_REMOVE); + MOD_LOG_KEY_REMOVE, GFP_NOFS); BUG_ON(ret < 0); - ret = tree_mod_log_insert_key_locked(fs_info, dst, + ret = __tree_mod_log_insert_key(fs_info, dst, i + dst_offset, - MOD_LOG_KEY_ADD); + MOD_LOG_KEY_ADD, + GFP_NOFS); BUG_ON(ret < 0); } - - tree_mod_log_write_unlock(fs_info); } static inline void @@ -819,9 +772,9 @@ tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, { int ret; - ret = tree_mod_log_insert_key_mask(fs_info, eb, slot, - MOD_LOG_KEY_REPLACE, - atomic ? GFP_ATOMIC : GFP_NOFS); + ret = __tree_mod_log_insert_key(fs_info, eb, slot, + MOD_LOG_KEY_REPLACE, + atomic ? GFP_ATOMIC : GFP_NOFS); BUG_ON(ret < 0); } @@ -830,10 +783,7 @@ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) { if (tree_mod_dont_log(fs_info, eb)) return; - __tree_mod_log_free_eb(fs_info, eb); - - tree_mod_log_write_unlock(fs_info); } static noinline void @@ -1046,8 +996,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, else btrfs_set_header_owner(cow, root->root_key.objectid); - write_extent_buffer(cow, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(cow), + write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(cow), BTRFS_FSID_SIZE); ret = update_ref_for_cow(trans, root, buf, cow, &last_ref); @@ -1083,7 +1032,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, WARN_ON(trans->transid != btrfs_header_generation(parent)); tree_mod_log_insert_key(root->fs_info, parent, parent_slot, - MOD_LOG_KEY_REPLACE); + MOD_LOG_KEY_REPLACE, GFP_NOFS); btrfs_set_node_blockptr(parent, parent_slot, cow->start); btrfs_set_node_ptr_generation(parent, parent_slot, @@ -1116,7 +1065,7 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, int looped = 0; if (!time_seq) - return 0; + return NULL; /* * the very last operation that's logged for a root is the replacement @@ -1127,7 +1076,7 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, tm = tree_mod_log_search_oldest(fs_info, root_logical, time_seq); if (!looped && !tm) - return 0; + return NULL; /* * if there are no tree operation for the oldest root, we simply * return it. this should only happen if that (old) root is at @@ -1240,8 +1189,8 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, * is freed (its refcount is decremented). */ static struct extent_buffer * -tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, - u64 time_seq) +tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path, + struct extent_buffer *eb, u64 time_seq) { struct extent_buffer *eb_rewin; struct tree_mod_elem *tm; @@ -1256,11 +1205,18 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, if (!tm) return eb; + btrfs_set_path_blocking(path); + btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); + if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { BUG_ON(tm->slot != 0); eb_rewin = alloc_dummy_extent_buffer(eb->start, fs_info->tree_root->nodesize); - BUG_ON(!eb_rewin); + if (!eb_rewin) { + btrfs_tree_read_unlock_blocking(eb); + free_extent_buffer(eb); + return NULL; + } btrfs_set_header_bytenr(eb_rewin, eb->start); btrfs_set_header_backref_rev(eb_rewin, btrfs_header_backref_rev(eb)); @@ -1268,10 +1224,15 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, btrfs_set_header_level(eb_rewin, btrfs_header_level(eb)); } else { eb_rewin = btrfs_clone_extent_buffer(eb); - BUG_ON(!eb_rewin); + if (!eb_rewin) { + btrfs_tree_read_unlock_blocking(eb); + free_extent_buffer(eb); + return NULL; + } } - btrfs_tree_read_unlock(eb); + btrfs_clear_path_blocking(path, NULL, BTRFS_READ_LOCK); + btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); extent_buffer_get(eb_rewin); @@ -1335,8 +1296,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq) free_extent_buffer(eb_root); eb = alloc_dummy_extent_buffer(logical, root->nodesize); } else { + btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK); eb = btrfs_clone_extent_buffer(eb_root); - btrfs_tree_read_unlock(eb_root); + btrfs_tree_read_unlock_blocking(eb_root); free_extent_buffer(eb_root); } @@ -1419,14 +1381,12 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, if (trans->transaction != root->fs_info->running_transaction) WARN(1, KERN_CRIT "trans %llu running %llu\n", - (unsigned long long)trans->transid, - (unsigned long long) + trans->transid, root->fs_info->running_transaction->transid); if (trans->transid != root->fs_info->generation) WARN(1, KERN_CRIT "trans %llu running %llu\n", - (unsigned long long)trans->transid, - (unsigned long long)root->fs_info->generation); + trans->transid, root->fs_info->generation); if (!should_cow_block(trans, root, buf)) { *cow_ret = buf; @@ -2466,6 +2426,40 @@ done: return ret; } +static void key_search_validate(struct extent_buffer *b, + struct btrfs_key *key, + int level) +{ +#ifdef CONFIG_BTRFS_ASSERT + struct btrfs_disk_key disk_key; + + btrfs_cpu_key_to_disk(&disk_key, key); + + if (level == 0) + ASSERT(!memcmp_extent_buffer(b, &disk_key, + offsetof(struct btrfs_leaf, items[0].key), + sizeof(disk_key))); + else + ASSERT(!memcmp_extent_buffer(b, &disk_key, + offsetof(struct btrfs_node, ptrs[0].key), + sizeof(disk_key))); +#endif +} + +static int key_search(struct extent_buffer *b, struct btrfs_key *key, + int level, int *prev_cmp, int *slot) +{ + if (*prev_cmp != 0) { + *prev_cmp = bin_search(b, key, level, slot); + return *prev_cmp; + } + + key_search_validate(b, key, level); + *slot = 0; + + return 0; +} + /* * look for key in the tree. path is filled in with nodes along the way * if key is found, we return zero and you can find the item in the leaf @@ -2494,6 +2488,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root int write_lock_level = 0; u8 lowest_level = 0; int min_write_lock_level; + int prev_cmp; lowest_level = p->lowest_level; WARN_ON(lowest_level && ins_len > 0); @@ -2524,6 +2519,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root min_write_lock_level = write_lock_level; again: + prev_cmp = -1; /* * we try very hard to do read locks on the root */ @@ -2624,7 +2620,7 @@ cow_done: if (!cow) btrfs_unlock_up_safe(p, level + 1); - ret = bin_search(b, key, level, &slot); + ret = key_search(b, key, level, &prev_cmp, &slot); if (level != 0) { int dec = 0; @@ -2759,6 +2755,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, int level; int lowest_unlock = 1; u8 lowest_level = 0; + int prev_cmp; lowest_level = p->lowest_level; WARN_ON(p->nodes[0] != NULL); @@ -2769,6 +2766,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, } again: + prev_cmp = -1; b = get_old_root(root, time_seq); level = btrfs_header_level(b); p->locks[level] = BTRFS_READ_LOCK; @@ -2786,7 +2784,7 @@ again: */ btrfs_unlock_up_safe(p, level + 1); - ret = bin_search(b, key, level, &slot); + ret = key_search(b, key, level, &prev_cmp, &slot); if (level != 0) { int dec = 0; @@ -2820,7 +2818,11 @@ again: btrfs_clear_path_blocking(p, b, BTRFS_READ_LOCK); } - b = tree_mod_log_rewind(root->fs_info, b, time_seq); + b = tree_mod_log_rewind(root->fs_info, p, b, time_seq); + if (!b) { + ret = -ENOMEM; + goto done; + } p->locks[level] = BTRFS_READ_LOCK; p->nodes[level] = b; } else { @@ -3143,13 +3145,11 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(c, root->root_key.objectid); - write_extent_buffer(c, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(c), + write_extent_buffer(c, root->fs_info->fsid, btrfs_header_fsid(c), BTRFS_FSID_SIZE); write_extent_buffer(c, root->fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(c), - BTRFS_UUID_SIZE); + btrfs_header_chunk_tree_uuid(c), BTRFS_UUID_SIZE); btrfs_set_node_key(c, &lower_key, 0); btrfs_set_node_blockptr(c, 0, lower->start); @@ -3208,7 +3208,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans, } if (level) { ret = tree_mod_log_insert_key(root->fs_info, lower, slot, - MOD_LOG_KEY_ADD); + MOD_LOG_KEY_ADD, GFP_NOFS); BUG_ON(ret < 0); } btrfs_set_node_key(lower, key, slot); @@ -3284,10 +3284,9 @@ static noinline int split_node(struct btrfs_trans_handle *trans, btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(split, root->root_key.objectid); write_extent_buffer(split, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(split), - BTRFS_FSID_SIZE); + btrfs_header_fsid(split), BTRFS_FSID_SIZE); write_extent_buffer(split, root->fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(split), + btrfs_header_chunk_tree_uuid(split), BTRFS_UUID_SIZE); tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid); @@ -4040,11 +4039,10 @@ again: btrfs_set_header_owner(right, root->root_key.objectid); btrfs_set_header_level(right, 0); write_extent_buffer(right, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(right), - BTRFS_FSID_SIZE); + btrfs_header_fsid(right), BTRFS_FSID_SIZE); write_extent_buffer(right, root->fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(right), + btrfs_header_chunk_tree_uuid(right), BTRFS_UUID_SIZE); if (split == 0) { @@ -4642,7 +4640,7 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path, (nritems - slot - 1)); } else if (level) { ret = tree_mod_log_insert_key(root->fs_info, parent, slot, - MOD_LOG_KEY_REMOVE); + MOD_LOG_KEY_REMOVE, GFP_NOFS); BUG_ON(ret < 0); } @@ -4814,7 +4812,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, * This may release the path, and so you may lose any locks held at the * time you call it. */ -int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) +static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) { struct btrfs_key key; struct btrfs_disk_key found_key; @@ -5329,19 +5327,20 @@ int btrfs_compare_trees(struct btrfs_root *left_root, goto out; advance_right = ADVANCE; } else { + enum btrfs_compare_tree_result cmp; + WARN_ON(!extent_buffer_uptodate(left_path->nodes[0])); ret = tree_compare_item(left_root, left_path, right_path, tmp_buf); - if (ret) { - WARN_ON(!extent_buffer_uptodate(left_path->nodes[0])); - ret = changed_cb(left_root, right_root, - left_path, right_path, - &left_key, - BTRFS_COMPARE_TREE_CHANGED, - ctx); - if (ret < 0) - goto out; - } + if (ret) + cmp = BTRFS_COMPARE_TREE_CHANGED; + else + cmp = BTRFS_COMPARE_TREE_SAME; + ret = changed_cb(left_root, right_root, + left_path, right_path, + &left_key, cmp, ctx); + if (ret < 0) + goto out; advance_left = ADVANCE; advance_right = ADVANCE; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e795bf135e80..3c1da6f98a4d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -23,6 +23,7 @@ #include <linux/highmem.h> #include <linux/fs.h> #include <linux/rwsem.h> +#include <linux/semaphore.h> #include <linux/completion.h> #include <linux/backing-dev.h> #include <linux/wait.h> @@ -91,6 +92,9 @@ struct btrfs_ordered_sum; /* holds quota configuration and tracking */ #define BTRFS_QUOTA_TREE_OBJECTID 8ULL +/* for storing items that use the BTRFS_UUID_KEY* types */ +#define BTRFS_UUID_TREE_OBJECTID 9ULL + /* for storing balance parameters in the root tree */ #define BTRFS_BALANCE_OBJECTID -4ULL @@ -142,7 +146,7 @@ struct btrfs_ordered_sum; #define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2 -#define BTRFS_DEV_REPLACE_DEVID 0 +#define BTRFS_DEV_REPLACE_DEVID 0ULL /* * the max metadata block size. This limit is somewhat artificial, @@ -478,9 +482,10 @@ struct btrfs_super_block { char label[BTRFS_LABEL_SIZE]; __le64 cache_generation; + __le64 uuid_tree_generation; /* future expansion */ - __le64 reserved[31]; + __le64 reserved[30]; u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS]; } __attribute__ ((__packed__)); @@ -1188,6 +1193,7 @@ enum btrfs_caching_type { BTRFS_CACHE_STARTED = 1, BTRFS_CACHE_FAST = 2, BTRFS_CACHE_FINISHED = 3, + BTRFS_CACHE_ERROR = 4, }; enum btrfs_disk_cache_state { @@ -1302,6 +1308,7 @@ struct btrfs_fs_info { struct btrfs_root *fs_root; struct btrfs_root *csum_root; struct btrfs_root *quota_root; + struct btrfs_root *uuid_root; /* the log root tree is a directory of all the other log roots */ struct btrfs_root *log_root_tree; @@ -1350,6 +1357,7 @@ struct btrfs_fs_info { u64 last_trans_log_full_commit; unsigned long mount_opt; unsigned long compress_type:4; + int commit_interval; /* * It is a suggestive number, the read side is safe even it gets a * wrong number because we will write out the data into a regular @@ -1411,6 +1419,13 @@ struct btrfs_fs_info { * before jumping into the main commit. */ struct mutex ordered_operations_mutex; + + /* + * Same as ordered_operations_mutex except this is for ordered extents + * and not the operations. + */ + struct mutex ordered_extent_flush_mutex; + struct rw_semaphore extent_commit_sem; struct rw_semaphore cleanup_work_sem; @@ -1641,6 +1656,9 @@ struct btrfs_fs_info { struct btrfs_dev_replace dev_replace; atomic_t mutually_exclusive_operation_running; + + struct semaphore uuid_tree_rescan_sem; + unsigned int update_uuid_tree_gen:1; }; /* @@ -1934,6 +1952,19 @@ struct btrfs_ioctl_defrag_range_args { #define BTRFS_DEV_REPLACE_KEY 250 /* + * Stores items that allow to quickly map UUIDs to something else. + * These items are part of the filesystem UUID tree. + * The key is built like this: + * (UUID_upper_64_bits, BTRFS_UUID_KEY*, UUID_lower_64_bits). + */ +#if BTRFS_UUID_SIZE != 16 +#error "UUID items require BTRFS_UUID_SIZE == 16!" +#endif +#define BTRFS_UUID_KEY_SUBVOL 251 /* for UUIDs assigned to subvols */ +#define BTRFS_UUID_KEY_RECEIVED_SUBVOL 252 /* for UUIDs assigned to + * received subvols */ + +/* * string items are for debugging. They just store a short string of * data in the FS */ @@ -1967,6 +1998,9 @@ struct btrfs_ioctl_defrag_range_args { #define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20) #define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21) #define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22) +#define BTRFS_MOUNT_RESCAN_UUID_TREE (1 << 23) + +#define BTRFS_DEFAULT_COMMIT_INTERVAL (30) #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) @@ -2130,14 +2164,14 @@ BTRFS_SETGET_STACK_FUNCS(stack_device_bandwidth, struct btrfs_dev_item, BTRFS_SETGET_STACK_FUNCS(stack_device_generation, struct btrfs_dev_item, generation, 64); -static inline char *btrfs_device_uuid(struct btrfs_dev_item *d) +static inline unsigned long btrfs_device_uuid(struct btrfs_dev_item *d) { - return (char *)d + offsetof(struct btrfs_dev_item, uuid); + return (unsigned long)d + offsetof(struct btrfs_dev_item, uuid); } -static inline char *btrfs_device_fsid(struct btrfs_dev_item *d) +static inline unsigned long btrfs_device_fsid(struct btrfs_dev_item *d) { - return (char *)d + offsetof(struct btrfs_dev_item, fsid); + return (unsigned long)d + offsetof(struct btrfs_dev_item, fsid); } BTRFS_SETGET_FUNCS(chunk_length, struct btrfs_chunk, length, 64); @@ -2240,6 +2274,23 @@ BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32); BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32); BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64); BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item, + generation, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item, + sequence, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item, + transid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item, + nbytes, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item, + block_group, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32); +BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32); +BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32); +BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32); +BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64); static inline struct btrfs_timespec * btrfs_inode_atime(struct btrfs_inode_item *inode_item) @@ -2267,6 +2318,8 @@ btrfs_inode_ctime(struct btrfs_inode_item *inode_item) BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64); BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); +BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64); +BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32); /* struct btrfs_dev_extent */ BTRFS_SETGET_FUNCS(dev_extent_chunk_tree, struct btrfs_dev_extent, @@ -2277,10 +2330,10 @@ BTRFS_SETGET_FUNCS(dev_extent_chunk_offset, struct btrfs_dev_extent, chunk_offset, 64); BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64); -static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev) +static inline unsigned long btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev) { unsigned long ptr = offsetof(struct btrfs_dev_extent, chunk_tree_uuid); - return (u8 *)((unsigned long)dev + ptr); + return (unsigned long)dev + ptr; } BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 64); @@ -2348,6 +2401,10 @@ BTRFS_SETGET_FUNCS(ref_count_v0, struct btrfs_extent_ref_v0, count, 32); /* struct btrfs_node */ BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64); BTRFS_SETGET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64); +BTRFS_SETGET_STACK_FUNCS(stack_key_blockptr, struct btrfs_key_ptr, + blockptr, 64); +BTRFS_SETGET_STACK_FUNCS(stack_key_generation, struct btrfs_key_ptr, + generation, 64); static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr) { @@ -2404,6 +2461,8 @@ static inline void btrfs_set_node_key(struct extent_buffer *eb, /* struct btrfs_item */ BTRFS_SETGET_FUNCS(item_offset, struct btrfs_item, offset, 32); BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32); +BTRFS_SETGET_STACK_FUNCS(stack_item_offset, struct btrfs_item, offset, 32); +BTRFS_SETGET_STACK_FUNCS(stack_item_size, struct btrfs_item, size, 32); static inline unsigned long btrfs_item_nr_offset(int nr) { @@ -2466,6 +2525,13 @@ BTRFS_SETGET_FUNCS(dir_data_len, struct btrfs_dir_item, data_len, 16); BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8); BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16); BTRFS_SETGET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_dir_type, struct btrfs_dir_item, type, 8); +BTRFS_SETGET_STACK_FUNCS(stack_dir_data_len, struct btrfs_dir_item, + data_len, 16); +BTRFS_SETGET_STACK_FUNCS(stack_dir_name_len, struct btrfs_dir_item, + name_len, 16); +BTRFS_SETGET_STACK_FUNCS(stack_dir_transid, struct btrfs_dir_item, + transid, 64); static inline void btrfs_dir_item_key(struct extent_buffer *eb, struct btrfs_dir_item *item, @@ -2568,6 +2634,12 @@ BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64); BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32); BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 64); BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8); +BTRFS_SETGET_STACK_FUNCS(stack_header_generation, struct btrfs_header, + generation, 64); +BTRFS_SETGET_STACK_FUNCS(stack_header_owner, struct btrfs_header, owner, 64); +BTRFS_SETGET_STACK_FUNCS(stack_header_nritems, struct btrfs_header, + nritems, 32); +BTRFS_SETGET_STACK_FUNCS(stack_header_bytenr, struct btrfs_header, bytenr, 64); static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag) { @@ -2603,16 +2675,14 @@ static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb, btrfs_set_header_flags(eb, flags); } -static inline u8 *btrfs_header_fsid(struct extent_buffer *eb) +static inline unsigned long btrfs_header_fsid(struct extent_buffer *eb) { - unsigned long ptr = offsetof(struct btrfs_header, fsid); - return (u8 *)ptr; + return offsetof(struct btrfs_header, fsid); } -static inline u8 *btrfs_header_chunk_tree_uuid(struct extent_buffer *eb) +static inline unsigned long btrfs_header_chunk_tree_uuid(struct extent_buffer *eb) { - unsigned long ptr = offsetof(struct btrfs_header, chunk_tree_uuid); - return (u8 *)ptr; + return offsetof(struct btrfs_header, chunk_tree_uuid); } static inline int btrfs_is_leaf(struct extent_buffer *eb) @@ -2830,6 +2900,9 @@ BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block, csum_type, 16); BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block, cache_generation, 64); +BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64); +BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block, + uuid_tree_generation, 64); static inline int btrfs_super_csum_size(struct btrfs_super_block *s) { @@ -2847,6 +2920,14 @@ static inline unsigned long btrfs_leaf_data(struct extent_buffer *l) /* struct btrfs_file_extent_item */ BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_bytenr, + struct btrfs_file_extent_item, disk_bytenr, 64); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_offset, + struct btrfs_file_extent_item, offset, 64); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_generation, + struct btrfs_file_extent_item, generation, 64); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_num_bytes, + struct btrfs_file_extent_item, num_bytes, 64); static inline unsigned long btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e) @@ -3107,11 +3188,9 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 root_objectid, u64 owner, u64 offset, struct btrfs_key *ins); -int btrfs_reserve_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 num_bytes, u64 min_alloc_size, - u64 empty_size, u64 hint_byte, - struct btrfs_key *ins, int is_data); +int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes, + u64 min_alloc_size, u64 empty_size, u64 hint_byte, + struct btrfs_key *ins, int is_data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, int full_backref, int for_cow); int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -3175,7 +3254,7 @@ void btrfs_orphan_release_metadata(struct inode *inode); int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, struct btrfs_block_rsv *rsv, int nitems, - u64 *qgroup_reserved); + u64 *qgroup_reserved, bool use_global_rsv); void btrfs_subvolume_release_metadata(struct btrfs_root *root, struct btrfs_block_rsv *rsv, u64 qgroup_reserved); @@ -3245,6 +3324,7 @@ enum btrfs_compare_tree_result { BTRFS_COMPARE_TREE_NEW, BTRFS_COMPARE_TREE_DELETED, BTRFS_COMPARE_TREE_CHANGED, + BTRFS_COMPARE_TREE_SAME, }; typedef int (*btrfs_changed_cb_t)(struct btrfs_root *left_root, struct btrfs_root *right_root, @@ -3380,6 +3460,7 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info) kfree(fs_info->dev_root); kfree(fs_info->csum_root); kfree(fs_info->quota_root); + kfree(fs_info->uuid_root); kfree(fs_info->super_copy); kfree(fs_info->super_for_commit); kfree(fs_info); @@ -3414,8 +3495,6 @@ int __must_check btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_root_item *item); -void btrfs_read_root_item(struct extent_buffer *eb, int slot, - struct btrfs_root_item *item); int btrfs_find_root(struct btrfs_root *root, struct btrfs_key *search_key, struct btrfs_path *path, struct btrfs_root_item *root_item, struct btrfs_key *root_key); @@ -3426,6 +3505,17 @@ void btrfs_check_and_init_root_item(struct btrfs_root_item *item); void btrfs_update_root_times(struct btrfs_trans_handle *trans, struct btrfs_root *root); +/* uuid-tree.c */ +int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, + struct btrfs_root *uuid_root, u8 *uuid, u8 type, + u64 subid); +int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans, + struct btrfs_root *uuid_root, u8 *uuid, u8 type, + u64 subid); +int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info, + int (*check_func)(struct btrfs_fs_info *, u8 *, u8, + u64)); + /* dir-item.c */ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, const char *name, int name_len); @@ -3509,12 +3599,14 @@ int btrfs_find_name_in_ext_backref(struct btrfs_path *path, struct btrfs_inode_extref **extref_ret); /* file-item.c */ +struct btrfs_dio_private; int btrfs_del_csums(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 len); int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, struct bio *bio, u32 *dst); int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, - struct bio *bio, u64 logical_offset); + struct btrfs_dio_private *dip, struct bio *bio, + u64 logical_offset); int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, @@ -3552,8 +3644,7 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work); struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page, size_t pg_offset, u64 start, u64 len, int create); -noinline int can_nocow_extent(struct btrfs_trans_handle *trans, - struct inode *inode, u64 offset, u64 *len, +noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, u64 *orig_start, u64 *orig_block_len, u64 *ram_bytes); @@ -3643,11 +3734,15 @@ extern const struct dentry_operations btrfs_dentry_operations; long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); void btrfs_update_iflags(struct inode *inode); void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); +int btrfs_is_empty_uuid(u8 *uuid); int btrfs_defrag_file(struct inode *inode, struct file *file, struct btrfs_ioctl_defrag_range_args *range, u64 newer_than, unsigned long max_pages); void btrfs_get_block_group_info(struct list_head *groups_list, struct btrfs_ioctl_space_info *space); +void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, + struct btrfs_ioctl_balance_args *bargs); + /* file.c */ int btrfs_auto_defrag_init(void); @@ -3720,6 +3815,22 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) #define btrfs_debug(fs_info, fmt, args...) \ btrfs_printk(fs_info, KERN_DEBUG fmt, ##args) +#ifdef CONFIG_BTRFS_ASSERT + +static inline void assfail(char *expr, char *file, int line) +{ + printk(KERN_ERR "BTRFS assertion failed: %s, file: %s, line: %d", + expr, file, line); + BUG(); +} + +#define ASSERT(expr) \ + (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) +#else +#define ASSERT(expr) ((void)0) +#endif + +#define btrfs_assert() __printf(5, 6) void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, unsigned int line, int errno, const char *fmt, ...); diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 375510913fe7..cbd9523ad09c 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -21,6 +21,7 @@ #include "delayed-inode.h" #include "disk-io.h" #include "transaction.h" +#include "ctree.h" #define BTRFS_DELAYED_WRITEBACK 512 #define BTRFS_DELAYED_BACKGROUND 128 @@ -1453,10 +1454,10 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, dir_item = (struct btrfs_dir_item *)delayed_item->data; dir_item->location = *disk_key; - dir_item->transid = cpu_to_le64(trans->transid); - dir_item->data_len = 0; - dir_item->name_len = cpu_to_le16(name_len); - dir_item->type = type; + btrfs_set_stack_dir_transid(dir_item, trans->transid); + btrfs_set_stack_dir_data_len(dir_item, 0); + btrfs_set_stack_dir_name_len(dir_item, name_len); + btrfs_set_stack_dir_type(dir_item, type); memcpy((char *)(dir_item + 1), name, name_len); ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item); @@ -1470,13 +1471,11 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, mutex_lock(&delayed_node->mutex); ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item); if (unlikely(ret)) { - printk(KERN_ERR "err add delayed dir index item(name: %s) into " - "the insertion tree of the delayed node" + printk(KERN_ERR "err add delayed dir index item(name: %.*s) " + "into the insertion tree of the delayed node" "(root id: %llu, inode id: %llu, errno: %d)\n", - name, - (unsigned long long)delayed_node->root->objectid, - (unsigned long long)delayed_node->inode_id, - ret); + name_len, name, delayed_node->root->objectid, + delayed_node->inode_id, ret); BUG(); } mutex_unlock(&delayed_node->mutex); @@ -1547,9 +1546,7 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, printk(KERN_ERR "err add delayed dir index item(index: %llu) " "into the deletion tree of the delayed node" "(root id: %llu, inode id: %llu, errno: %d)\n", - (unsigned long long)index, - (unsigned long long)node->root->objectid, - (unsigned long long)node->inode_id, + index, node->root->objectid, node->inode_id, ret); BUG(); } @@ -1699,7 +1696,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, di = (struct btrfs_dir_item *)curr->data; name = (char *)(di + 1); - name_len = le16_to_cpu(di->name_len); + name_len = btrfs_stack_dir_name_len(di); d_type = btrfs_filetype_table[di->type]; btrfs_disk_key_to_cpu(&location, &di->location); @@ -1716,27 +1713,6 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, return 0; } -BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item, - generation, 64); -BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item, - sequence, 64); -BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item, - transid, 64); -BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64); -BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item, - nbytes, 64); -BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item, - block_group, 64); -BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32); -BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32); -BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32); -BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32); -BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64); -BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64); - -BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64); -BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32); - static void fill_stack_inode_item(struct btrfs_trans_handle *trans, struct btrfs_inode_item *inode_item, struct inode *inode) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index c219463fb1fd..e4d467be2dd4 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -241,7 +241,7 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, return 0; } -static void inline drop_delayed_ref(struct btrfs_trans_handle *trans, +static inline void drop_delayed_ref(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_root *delayed_refs, struct btrfs_delayed_ref_node *ref) { @@ -600,7 +600,7 @@ static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info, INIT_LIST_HEAD(&head_ref->cluster); mutex_init(&head_ref->mutex); - trace_btrfs_delayed_ref_head(ref, head_ref, action); + trace_add_delayed_ref_head(ref, head_ref, action); existing = tree_insert(&delayed_refs->root, &ref->rb_node); @@ -661,7 +661,7 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info, ref->type = BTRFS_TREE_BLOCK_REF_KEY; full_ref->level = level; - trace_btrfs_delayed_tree_ref(ref, full_ref, action); + trace_add_delayed_tree_ref(ref, full_ref, action); existing = tree_insert(&delayed_refs->root, &ref->rb_node); @@ -722,7 +722,7 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info, full_ref->objectid = owner; full_ref->offset = offset; - trace_btrfs_delayed_data_ref(ref, full_ref, action); + trace_add_delayed_data_ref(ref, full_ref, action); existing = tree_insert(&delayed_refs->root, &ref->rb_node); diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 5f8f3341c099..a64435359385 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -148,13 +148,13 @@ no_valid_dev_replace_entry_found: !btrfs_test_opt(dev_root, DEGRADED)) { ret = -EIO; pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?\n", - (unsigned long long)src_devid); + src_devid); } if (!dev_replace->tgtdev && !btrfs_test_opt(dev_root, DEGRADED)) { ret = -EIO; pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "tgtdev (devid %llu) is missing, need to run btrfs dev scan?\n", - (unsigned long long)BTRFS_DEV_REPLACE_DEVID); + BTRFS_DEV_REPLACE_DEVID); } if (dev_replace->tgtdev) { if (dev_replace->srcdev) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6b092a1c4e37..4cbb00af92ff 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -31,6 +31,7 @@ #include <linux/migrate.h> #include <linux/ratelimit.h> #include <linux/uuid.h> +#include <linux/semaphore.h> #include <asm/unaligned.h> #include "compat.h" #include "ctree.h" @@ -302,9 +303,8 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, printk_ratelimited(KERN_INFO "btrfs: %s checksum verify " "failed on %llu wanted %X found %X " "level %d\n", - root->fs_info->sb->s_id, - (unsigned long long)buf->start, val, found, - btrfs_header_level(buf)); + root->fs_info->sb->s_id, buf->start, + val, found, btrfs_header_level(buf)); if (result != (char *)&inline_result) kfree(result); return 1; @@ -345,9 +345,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, } printk_ratelimited("parent transid verify failed on %llu wanted %llu " "found %llu\n", - (unsigned long long)eb->start, - (unsigned long long)parent_transid, - (unsigned long long)btrfs_header_generation(eb)); + eb->start, parent_transid, btrfs_header_generation(eb)); ret = 1; clear_extent_buffer_uptodate(eb); out: @@ -497,8 +495,7 @@ static int check_tree_block_fsid(struct btrfs_root *root, u8 fsid[BTRFS_UUID_SIZE]; int ret = 1; - read_extent_buffer(eb, fsid, (unsigned long)btrfs_header_fsid(eb), - BTRFS_FSID_SIZE); + read_extent_buffer(eb, fsid, btrfs_header_fsid(eb), BTRFS_FSID_SIZE); while (fs_devices) { if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) { ret = 0; @@ -512,8 +509,7 @@ static int check_tree_block_fsid(struct btrfs_root *root, #define CORRUPT(reason, eb, root, slot) \ printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \ "root=%llu, slot=%d\n", reason, \ - (unsigned long long)btrfs_header_bytenr(eb), \ - (unsigned long long)root->objectid, slot) + btrfs_header_bytenr(eb), root->objectid, slot) static noinline int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf) @@ -576,8 +572,9 @@ static noinline int check_leaf(struct btrfs_root *root, return 0; } -static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, - struct extent_state *state, int mirror) +static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, + u64 phy_offset, struct page *page, + u64 start, u64 end, int mirror) { struct extent_io_tree *tree; u64 found_start; @@ -612,14 +609,13 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, if (found_start != eb->start) { printk_ratelimited(KERN_INFO "btrfs bad tree block start " "%llu %llu\n", - (unsigned long long)found_start, - (unsigned long long)eb->start); + found_start, eb->start); ret = -EIO; goto err; } if (check_tree_block_fsid(root, eb)) { printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n", - (unsigned long long)eb->start); + eb->start); ret = -EIO; goto err; } @@ -1148,6 +1144,10 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, return NULL; ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); + if (ret) { + free_extent_buffer(buf); + return NULL; + } return buf; } @@ -1291,11 +1291,10 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, btrfs_set_header_owner(leaf, objectid); root->node = leaf; - write_extent_buffer(leaf, fs_info->fsid, - (unsigned long)btrfs_header_fsid(leaf), + write_extent_buffer(leaf, fs_info->fsid, btrfs_header_fsid(leaf), BTRFS_FSID_SIZE); write_extent_buffer(leaf, fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(leaf), + btrfs_header_chunk_tree_uuid(leaf), BTRFS_UUID_SIZE); btrfs_mark_buffer_dirty(leaf); @@ -1379,8 +1378,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, root->node = leaf; write_extent_buffer(root->node, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(root->node), - BTRFS_FSID_SIZE); + btrfs_header_fsid(root->node), BTRFS_FSID_SIZE); btrfs_mark_buffer_dirty(root->node); btrfs_tree_unlock(root->node); return root; @@ -1413,11 +1411,11 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, log_root->root_key.offset = root->root_key.objectid; inode_item = &log_root->root_item.inode; - inode_item->generation = cpu_to_le64(1); - inode_item->size = cpu_to_le64(3); - inode_item->nlink = cpu_to_le32(1); - inode_item->nbytes = cpu_to_le64(root->leafsize); - inode_item->mode = cpu_to_le32(S_IFDIR | 0755); + btrfs_set_stack_inode_generation(inode_item, 1); + btrfs_set_stack_inode_size(inode_item, 3); + btrfs_set_stack_inode_nlink(inode_item, 1); + btrfs_set_stack_inode_nbytes(inode_item, root->leafsize); + btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); btrfs_set_root_node(&log_root->root_item, log_root->node); @@ -1428,8 +1426,8 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, return 0; } -struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, - struct btrfs_key *key) +static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, + struct btrfs_key *key) { struct btrfs_root *root; struct btrfs_fs_info *fs_info = tree_root->fs_info; @@ -1529,8 +1527,8 @@ fail: return ret; } -struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, - u64 root_id) +static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, + u64 root_id) { struct btrfs_root *root; @@ -1581,10 +1579,16 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, if (location->objectid == BTRFS_QUOTA_TREE_OBJECTID) return fs_info->quota_root ? fs_info->quota_root : ERR_PTR(-ENOENT); + if (location->objectid == BTRFS_UUID_TREE_OBJECTID) + return fs_info->uuid_root ? fs_info->uuid_root : + ERR_PTR(-ENOENT); again: root = btrfs_lookup_fs_root(fs_info, location->objectid); - if (root) + if (root) { + if (btrfs_root_refs(&root->root_item) == 0) + return ERR_PTR(-ENOENT); return root; + } root = btrfs_read_fs_root(fs_info->tree_root, location); if (IS_ERR(root)) @@ -1737,7 +1741,7 @@ static int transaction_kthread(void *arg) do { cannot_commit = false; - delay = HZ * 30; + delay = HZ * root->fs_info->commit_interval; mutex_lock(&root->fs_info->transaction_kthread_mutex); spin_lock(&root->fs_info->trans_lock); @@ -1749,7 +1753,8 @@ static int transaction_kthread(void *arg) now = get_seconds(); if (cur->state < TRANS_STATE_BLOCKED && - (now < cur->start_time || now - cur->start_time < 30)) { + (now < cur->start_time || + now - cur->start_time < root->fs_info->commit_interval)) { spin_unlock(&root->fs_info->trans_lock); delay = HZ * 5; goto sleep; @@ -2038,6 +2043,12 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) info->quota_root->node = NULL; info->quota_root->commit_root = NULL; } + if (info->uuid_root) { + free_extent_buffer(info->uuid_root->node); + free_extent_buffer(info->uuid_root->commit_root); + info->uuid_root->node = NULL; + info->uuid_root->commit_root = NULL; + } if (chunk_root) { free_extent_buffer(info->chunk_root->node); free_extent_buffer(info->chunk_root->commit_root); @@ -2098,11 +2109,14 @@ int open_ctree(struct super_block *sb, struct btrfs_root *chunk_root; struct btrfs_root *dev_root; struct btrfs_root *quota_root; + struct btrfs_root *uuid_root; struct btrfs_root *log_tree_root; int ret; int err = -EINVAL; int num_backups_tried = 0; int backup_index = 0; + bool create_uuid_tree; + bool check_uuid_tree; tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info); chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); @@ -2189,6 +2203,7 @@ int open_ctree(struct super_block *sb, fs_info->defrag_inodes = RB_ROOT; fs_info->free_chunk_space = 0; fs_info->tree_mod_log = RB_ROOT; + fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; /* readahead state */ INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); @@ -2270,6 +2285,7 @@ int open_ctree(struct super_block *sb, mutex_init(&fs_info->ordered_operations_mutex); + mutex_init(&fs_info->ordered_extent_flush_mutex); mutex_init(&fs_info->tree_log_mutex); mutex_init(&fs_info->chunk_mutex); mutex_init(&fs_info->transaction_kthread_mutex); @@ -2278,6 +2294,7 @@ int open_ctree(struct super_block *sb, init_rwsem(&fs_info->extent_commit_sem); init_rwsem(&fs_info->cleanup_work_sem); init_rwsem(&fs_info->subvol_sem); + sema_init(&fs_info->uuid_tree_rescan_sem, 1); fs_info->dev_replace.lock_owner = 0; atomic_set(&fs_info->dev_replace.nesting_level, 0); mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount); @@ -2383,7 +2400,7 @@ int open_ctree(struct super_block *sb, if (features) { printk(KERN_ERR "BTRFS: couldn't mount because of " "unsupported optional features (%Lx).\n", - (unsigned long long)features); + features); err = -EINVAL; goto fail_alloc; } @@ -2453,7 +2470,7 @@ int open_ctree(struct super_block *sb, if (!(sb->s_flags & MS_RDONLY) && features) { printk(KERN_ERR "BTRFS: couldn't mount RDWR because of " "unsupported option features (%Lx).\n", - (unsigned long long)features); + features); err = -EINVAL; goto fail_alloc; } @@ -2466,20 +2483,17 @@ int open_ctree(struct super_block *sb, &fs_info->generic_worker); btrfs_init_workers(&fs_info->delalloc_workers, "delalloc", - fs_info->thread_pool_size, - &fs_info->generic_worker); + fs_info->thread_pool_size, NULL); btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc", - fs_info->thread_pool_size, - &fs_info->generic_worker); + fs_info->thread_pool_size, NULL); btrfs_init_workers(&fs_info->submit_workers, "submit", min_t(u64, fs_devices->num_devices, - fs_info->thread_pool_size), - &fs_info->generic_worker); + fs_info->thread_pool_size), NULL); btrfs_init_workers(&fs_info->caching_workers, "cache", - 2, &fs_info->generic_worker); + fs_info->thread_pool_size, NULL); /* a higher idle thresh on the submit workers makes it much more * likely that bios will be send down in a sane order to the @@ -2575,7 +2589,7 @@ int open_ctree(struct super_block *sb, sb->s_blocksize = sectorsize; sb->s_blocksize_bits = blksize_bits(sectorsize); - if (disk_super->magic != cpu_to_le64(BTRFS_MAGIC)) { + if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) { printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id); goto fail_sb_buffer; } @@ -2615,8 +2629,7 @@ int open_ctree(struct super_block *sb, chunk_root->commit_root = btrfs_root_node(chunk_root); read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), - BTRFS_UUID_SIZE); + btrfs_header_chunk_tree_uuid(chunk_root->node), BTRFS_UUID_SIZE); ret = btrfs_read_chunk_tree(chunk_root); if (ret) { @@ -2696,6 +2709,22 @@ retry_root_backup: fs_info->quota_root = quota_root; } + location.objectid = BTRFS_UUID_TREE_OBJECTID; + uuid_root = btrfs_read_tree_root(tree_root, &location); + if (IS_ERR(uuid_root)) { + ret = PTR_ERR(uuid_root); + if (ret != -ENOENT) + goto recovery_tree_root; + create_uuid_tree = true; + check_uuid_tree = false; + } else { + uuid_root->track_dirty = 1; + fs_info->uuid_root = uuid_root; + create_uuid_tree = false; + check_uuid_tree = + generation != btrfs_super_uuid_tree_generation(disk_super); + } + fs_info->generation = generation; fs_info->last_trans_committed = generation; @@ -2882,6 +2911,29 @@ retry_root_backup: btrfs_qgroup_rescan_resume(fs_info); + if (create_uuid_tree) { + pr_info("btrfs: creating UUID tree\n"); + ret = btrfs_create_uuid_tree(fs_info); + if (ret) { + pr_warn("btrfs: failed to create the UUID tree %d\n", + ret); + close_ctree(tree_root); + return ret; + } + } else if (check_uuid_tree || + btrfs_test_opt(tree_root, RESCAN_UUID_TREE)) { + pr_info("btrfs: checking UUID tree\n"); + ret = btrfs_check_uuid_tree(fs_info); + if (ret) { + pr_warn("btrfs: failed to check the UUID tree %d\n", + ret); + close_ctree(tree_root); + return ret; + } + } else { + fs_info->update_uuid_tree_gen = 1; + } + return 0; fail_qgroup: @@ -2983,15 +3035,17 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev) */ for (i = 0; i < 1; i++) { bytenr = btrfs_sb_offset(i); - if (bytenr + 4096 >= i_size_read(bdev->bd_inode)) + if (bytenr + BTRFS_SUPER_INFO_SIZE >= + i_size_read(bdev->bd_inode)) break; - bh = __bread(bdev, bytenr / 4096, 4096); + bh = __bread(bdev, bytenr / 4096, + BTRFS_SUPER_INFO_SIZE); if (!bh) continue; super = (struct btrfs_super_block *)bh->b_data; if (btrfs_super_bytenr(super) != bytenr || - super->magic != cpu_to_le64(BTRFS_MAGIC)) { + btrfs_super_magic(super) != BTRFS_MAGIC) { brelse(bh); continue; } @@ -3311,7 +3365,6 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) int total_errors = 0; u64 flags; - max_errors = btrfs_super_num_devices(root->fs_info->super_copy) - 1; do_barriers = !btrfs_test_opt(root, NOBARRIER); backup_super_roots(root->fs_info); @@ -3320,6 +3373,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) mutex_lock(&root->fs_info->fs_devices->device_list_mutex); head = &root->fs_info->fs_devices->devices; + max_errors = btrfs_super_num_devices(root->fs_info->super_copy) - 1; if (do_barriers) { ret = barrier_all_devices(root->fs_info); @@ -3362,8 +3416,10 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) printk(KERN_ERR "btrfs: %d errors while writing supers\n", total_errors); - /* This shouldn't happen. FUA is masked off if unsupported */ - BUG(); + /* FUA is masked off if unsupported and can't be the reason */ + btrfs_error(root->fs_info, -EIO, + "%d errors while writing supers", total_errors); + return -EIO; } total_errors = 0; @@ -3421,6 +3477,8 @@ static void free_fs_root(struct btrfs_root *root) { iput(root->cache_inode); WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); + btrfs_free_block_rsv(root, root->orphan_block_rsv); + root->orphan_block_rsv = NULL; if (root->anon_dev) free_anon_bdev(root->anon_dev); free_extent_buffer(root->node); @@ -3510,6 +3568,11 @@ int close_ctree(struct btrfs_root *root) fs_info->closing = 1; smp_mb(); + /* wait for the uuid_scan task to finish */ + down(&fs_info->uuid_tree_rescan_sem); + /* avoid complains from lockdep et al., set sem back to initial state */ + up(&fs_info->uuid_tree_rescan_sem); + /* pause restriper - we want to resume on mount */ btrfs_pause_balance(fs_info); @@ -3573,6 +3636,9 @@ int close_ctree(struct btrfs_root *root) btrfs_free_stripe_hash_table(fs_info); + btrfs_free_block_rsv(root, root->orphan_block_rsv); + root->orphan_block_rsv = NULL; + return 0; } @@ -3608,9 +3674,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) if (transid != root->fs_info->generation) WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, " "found %llu running %llu\n", - (unsigned long long)buf->start, - (unsigned long long)transid, - (unsigned long long)root->fs_info->generation); + buf->start, transid, root->fs_info->generation); was_dirty = set_extent_buffer_dirty(buf); if (!was_dirty) __percpu_counter_add(&root->fs_info->dirty_metadata_bytes, @@ -3744,8 +3808,8 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info) spin_unlock(&fs_info->ordered_root_lock); } -int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, - struct btrfs_root *root) +static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, + struct btrfs_root *root) { struct rb_node *node; struct btrfs_delayed_ref_root *delayed_refs; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1204c8ef6f32..cfb3cf711b34 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -113,7 +113,8 @@ static noinline int block_group_cache_done(struct btrfs_block_group_cache *cache) { smp_mb(); - return cache->cached == BTRFS_CACHE_FINISHED; + return cache->cached == BTRFS_CACHE_FINISHED || + cache->cached == BTRFS_CACHE_ERROR; } static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) @@ -389,7 +390,7 @@ static noinline void caching_thread(struct btrfs_work *work) u64 total_found = 0; u64 last = 0; u32 nritems; - int ret = 0; + int ret = -ENOMEM; caching_ctl = container_of(work, struct btrfs_caching_control, work); block_group = caching_ctl->block_group; @@ -420,6 +421,7 @@ again: /* need to make sure the commit_root doesn't disappear */ down_read(&fs_info->extent_commit_sem); +next: ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); if (ret < 0) goto err; @@ -459,6 +461,16 @@ again: continue; } + if (key.objectid < last) { + key.objectid = last; + key.offset = 0; + key.type = BTRFS_EXTENT_ITEM_KEY; + + caching_ctl->progress = last; + btrfs_release_path(path); + goto next; + } + if (key.objectid < block_group->key.objectid) { path->slots[0]++; continue; @@ -506,6 +518,12 @@ err: mutex_unlock(&caching_ctl->mutex); out: + if (ret) { + spin_lock(&block_group->lock); + block_group->caching_ctl = NULL; + block_group->cached = BTRFS_CACHE_ERROR; + spin_unlock(&block_group->lock); + } wake_up(&caching_ctl->wait); put_caching_control(caching_ctl); @@ -771,10 +789,23 @@ again: goto out_free; if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) { - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = root->leafsize; - btrfs_release_path(path); - goto again; + metadata = 0; + if (path->slots[0]) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid == bytenr && + key.type == BTRFS_EXTENT_ITEM_KEY && + key.offset == root->leafsize) + ret = 0; + } + if (ret) { + key.objectid = bytenr; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = root->leafsize; + btrfs_release_path(path); + goto again; + } } if (ret == 0) { @@ -2011,6 +2042,8 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, ins.type = BTRFS_EXTENT_ITEM_KEY; ref = btrfs_delayed_node_to_data_ref(node); + trace_run_delayed_data_ref(node, ref, node->action); + if (node->type == BTRFS_SHARED_DATA_REF_KEY) parent = ref->parent; else @@ -2154,6 +2187,8 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, SKINNY_METADATA); ref = btrfs_delayed_node_to_tree_ref(node); + trace_run_delayed_tree_ref(node, ref, node->action); + if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) parent = ref->parent; else @@ -2212,6 +2247,8 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, */ BUG_ON(extent_op); head = btrfs_delayed_node_to_head(node); + trace_run_delayed_ref_head(node, head, node->action); + if (insert_reserved) { btrfs_pin_extent(root, node->bytenr, node->num_bytes, 1); @@ -2403,6 +2440,8 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, default: WARN_ON(1); } + } else { + list_del_init(&locked_ref->cluster); } spin_unlock(&delayed_refs->lock); @@ -2425,7 +2464,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, * list before we release it. */ if (btrfs_delayed_ref_is_head(ref)) { - list_del_init(&locked_ref->cluster); btrfs_delayed_ref_unlock(locked_ref); locked_ref = NULL; } @@ -3799,8 +3837,12 @@ again: if (force < space_info->force_alloc) force = space_info->force_alloc; if (space_info->full) { + if (should_alloc_chunk(extent_root, space_info, force)) + ret = -ENOSPC; + else + ret = 0; spin_unlock(&space_info->lock); - return 0; + return ret; } if (!should_alloc_chunk(extent_root, space_info, force)) { @@ -4320,6 +4362,9 @@ static struct btrfs_block_rsv *get_block_rsv( if (root == root->fs_info->csum_root && trans->adding_csums) block_rsv = trans->block_rsv; + if (root == root->fs_info->uuid_root) + block_rsv = trans->block_rsv; + if (!block_rsv) block_rsv = root->block_rsv; @@ -4729,10 +4774,12 @@ void btrfs_orphan_release_metadata(struct inode *inode) int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, struct btrfs_block_rsv *rsv, int items, - u64 *qgroup_reserved) + u64 *qgroup_reserved, + bool use_global_rsv) { u64 num_bytes; int ret; + struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; if (root->fs_info->quota_enabled) { /* One for parent inode, two for dir entries */ @@ -4751,6 +4798,10 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, BTRFS_BLOCK_GROUP_METADATA); ret = btrfs_block_rsv_add(root, rsv, num_bytes, BTRFS_RESERVE_FLUSH_ALL); + + if (ret == -ENOSPC && use_global_rsv) + ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes); + if (ret) { if (*qgroup_reserved) btrfs_qgroup_free(root, *qgroup_reserved); @@ -5668,7 +5719,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, if (ret) { btrfs_err(info, "umm, got %d back from search, was looking for %llu", - ret, (unsigned long long)bytenr); + ret, bytenr); if (ret > 0) btrfs_print_leaf(extent_root, path->nodes[0]); @@ -5684,11 +5735,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, WARN_ON(1); btrfs_err(info, "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu", - (unsigned long long)bytenr, - (unsigned long long)parent, - (unsigned long long)root_objectid, - (unsigned long long)owner_objectid, - (unsigned long long)owner_offset); + bytenr, parent, root_objectid, owner_objectid, + owner_offset); } else { btrfs_abort_transaction(trans, extent_root, ret); goto out; @@ -5717,7 +5765,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, -1, 1); if (ret) { btrfs_err(info, "umm, got %d back from search, was looking for %llu", - ret, (unsigned long long)bytenr); + ret, bytenr); btrfs_print_leaf(extent_root, path->nodes[0]); } if (ret < 0) { @@ -5999,8 +6047,11 @@ static u64 stripe_align(struct btrfs_root *root, * for our min num_bytes. Another option is to have it go ahead * and look in the rbtree for a free extent of a given size, but this * is a good start. + * + * Callers of this must check if cache->cached == BTRFS_CACHE_ERROR before using + * any of the information in this block group. */ -static noinline int +static noinline void wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, u64 num_bytes) { @@ -6008,28 +6059,29 @@ wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, caching_ctl = get_caching_control(cache); if (!caching_ctl) - return 0; + return; wait_event(caching_ctl->wait, block_group_cache_done(cache) || (cache->free_space_ctl->free_space >= num_bytes)); put_caching_control(caching_ctl); - return 0; } static noinline int wait_block_group_cache_done(struct btrfs_block_group_cache *cache) { struct btrfs_caching_control *caching_ctl; + int ret = 0; caching_ctl = get_caching_control(cache); if (!caching_ctl) - return 0; + return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0; wait_event(caching_ctl->wait, block_group_cache_done(cache)); - + if (cache->cached == BTRFS_CACHE_ERROR) + ret = -EIO; put_caching_control(caching_ctl); - return 0; + return ret; } int __get_raid_index(u64 flags) @@ -6070,8 +6122,7 @@ enum btrfs_loop_type { * ins->offset == number of blocks * Any available blocks before search_start are skipped. */ -static noinline int find_free_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *orig_root, +static noinline int find_free_extent(struct btrfs_root *orig_root, u64 num_bytes, u64 empty_size, u64 hint_byte, struct btrfs_key *ins, u64 flags) @@ -6212,6 +6263,8 @@ have_block_group: ret = 0; } + if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) + goto loop; if (unlikely(block_group->ro)) goto loop; @@ -6292,10 +6345,10 @@ refill_cluster: block_group->full_stripe_len); /* allocate a cluster in this block group */ - ret = btrfs_find_space_cluster(trans, root, - block_group, last_ptr, - search_start, num_bytes, - aligned_cluster); + ret = btrfs_find_space_cluster(root, block_group, + last_ptr, search_start, + num_bytes, + aligned_cluster); if (ret == 0) { /* * now pull our allocation out of this @@ -6426,17 +6479,28 @@ loop: index = 0; loop++; if (loop == LOOP_ALLOC_CHUNK) { + struct btrfs_trans_handle *trans; + + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + ret = do_chunk_alloc(trans, root, flags, CHUNK_ALLOC_FORCE); /* * Do not bail out on ENOSPC since we * can do more things. */ - if (ret < 0 && ret != -ENOSPC) { + if (ret < 0 && ret != -ENOSPC) btrfs_abort_transaction(trans, root, ret); + else + ret = 0; + btrfs_end_transaction(trans, root); + if (ret) goto out; - } } if (loop == LOOP_NO_EMPTY_SIZE) { @@ -6463,19 +6527,15 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, spin_lock(&info->lock); printk(KERN_INFO "space_info %llu has %llu free, is %sfull\n", - (unsigned long long)info->flags, - (unsigned long long)(info->total_bytes - info->bytes_used - - info->bytes_pinned - info->bytes_reserved - - info->bytes_readonly), + info->flags, + info->total_bytes - info->bytes_used - info->bytes_pinned - + info->bytes_reserved - info->bytes_readonly, (info->full) ? "" : "not "); printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, " "reserved=%llu, may_use=%llu, readonly=%llu\n", - (unsigned long long)info->total_bytes, - (unsigned long long)info->bytes_used, - (unsigned long long)info->bytes_pinned, - (unsigned long long)info->bytes_reserved, - (unsigned long long)info->bytes_may_use, - (unsigned long long)info->bytes_readonly); + info->total_bytes, info->bytes_used, info->bytes_pinned, + info->bytes_reserved, info->bytes_may_use, + info->bytes_readonly); spin_unlock(&info->lock); if (!dump_block_groups) @@ -6486,12 +6546,9 @@ again: list_for_each_entry(cache, &info->block_groups[index], list) { spin_lock(&cache->lock); printk(KERN_INFO "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s\n", - (unsigned long long)cache->key.objectid, - (unsigned long long)cache->key.offset, - (unsigned long long)btrfs_block_group_used(&cache->item), - (unsigned long long)cache->pinned, - (unsigned long long)cache->reserved, - cache->ro ? "[readonly]" : ""); + cache->key.objectid, cache->key.offset, + btrfs_block_group_used(&cache->item), cache->pinned, + cache->reserved, cache->ro ? "[readonly]" : ""); btrfs_dump_free_space(cache, bytes); spin_unlock(&cache->lock); } @@ -6500,8 +6557,7 @@ again: up_read(&info->groups_sem); } -int btrfs_reserve_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes, u64 min_alloc_size, u64 empty_size, u64 hint_byte, struct btrfs_key *ins, int is_data) @@ -6513,8 +6569,8 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, flags = btrfs_get_alloc_profile(root, is_data); again: WARN_ON(num_bytes < root->sectorsize); - ret = find_free_extent(trans, root, num_bytes, empty_size, - hint_byte, ins, flags); + ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins, + flags); if (ret == -ENOSPC) { if (!final_tried) { @@ -6529,8 +6585,7 @@ again: sinfo = __find_space_info(root->fs_info, flags); btrfs_err(root->fs_info, "allocation failed flags %llu, wanted %llu", - (unsigned long long)flags, - (unsigned long long)num_bytes); + flags, num_bytes); if (sinfo) dump_space_info(sinfo, num_bytes, 1); } @@ -6550,7 +6605,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root, cache = btrfs_lookup_block_group(root->fs_info, start); if (!cache) { btrfs_err(root->fs_info, "Unable to find block group for %llu", - (unsigned long long)start); + start); return -ENOSPC; } @@ -6646,8 +6701,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, ret = update_block_group(root, ins->objectid, ins->offset, 1); if (ret) { /* -ENOENT, logic error */ btrfs_err(fs_info, "update block group failed for %llu %llu", - (unsigned long long)ins->objectid, - (unsigned long long)ins->offset); + ins->objectid, ins->offset); BUG(); } return ret; @@ -6719,8 +6773,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, ret = update_block_group(root, ins->objectid, root->leafsize, 1); if (ret) { /* -ENOENT, logic error */ btrfs_err(fs_info, "update block group failed for %llu %llu", - (unsigned long long)ins->objectid, - (unsigned long long)ins->offset); + ins->objectid, ins->offset); BUG(); } return ret; @@ -6902,7 +6955,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, if (IS_ERR(block_rsv)) return ERR_CAST(block_rsv); - ret = btrfs_reserve_extent(trans, root, blocksize, blocksize, + ret = btrfs_reserve_extent(root, blocksize, blocksize, empty_size, hint, &ins, 0); if (ret) { unuse_block_rsv(root->fs_info, block_rsv, blocksize); @@ -7173,6 +7226,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, next = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!next) return -ENOMEM; + btrfs_set_buffer_lockdep_class(root->root_key.objectid, next, + level - 1); reada = 1; } btrfs_tree_lock(next); @@ -7658,7 +7713,7 @@ out: * don't have it in the radix (like when we recover after a power fail * or unmount) so we don't leak memory. */ - if (root_dropped == false) + if (!for_reloc && root_dropped == false) btrfs_add_dead_root(root); if (err) btrfs_std_error(root->fs_info, err); @@ -8192,7 +8247,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) * We haven't cached this block group, which means we could * possibly have excluded extents on this block group. */ - if (block_group->cached == BTRFS_CACHE_NO) + if (block_group->cached == BTRFS_CACHE_NO || + block_group->cached == BTRFS_CACHE_ERROR) free_excluded_extents(info->extent_root, block_group); btrfs_remove_free_space_cache(block_group); @@ -8409,9 +8465,13 @@ int btrfs_read_block_groups(struct btrfs_root *root) * avoid allocating from un-mirrored block group if there are * mirrored block groups. */ - list_for_each_entry(cache, &space_info->block_groups[3], list) + list_for_each_entry(cache, + &space_info->block_groups[BTRFS_RAID_RAID0], + list) set_block_group_ro(cache, 1); - list_for_each_entry(cache, &space_info->block_groups[4], list) + list_for_each_entry(cache, + &space_info->block_groups[BTRFS_RAID_SINGLE], + list) set_block_group_ro(cache, 1); } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index fe443fece851..09582b81640c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -61,9 +61,8 @@ void btrfs_leak_debug_check(void) state = list_entry(states.next, struct extent_state, leak_list); printk(KERN_ERR "btrfs state leak: start %llu end %llu " "state %lu in tree %p refs %d\n", - (unsigned long long)state->start, - (unsigned long long)state->end, - state->state, state->tree, atomic_read(&state->refs)); + state->start, state->end, state->state, state->tree, + atomic_read(&state->refs)); list_del(&state->leak_list); kmem_cache_free(extent_state_cache, state); } @@ -71,8 +70,8 @@ void btrfs_leak_debug_check(void) while (!list_empty(&buffers)) { eb = list_entry(buffers.next, struct extent_buffer, leak_list); printk(KERN_ERR "btrfs buffer leak start %llu len %lu " - "refs %d\n", (unsigned long long)eb->start, - eb->len, atomic_read(&eb->refs)); + "refs %d\n", + eb->start, eb->len, atomic_read(&eb->refs)); list_del(&eb->leak_list); kmem_cache_free(extent_buffer_cache, eb); } @@ -88,11 +87,7 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller, if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { printk_ratelimited(KERN_DEBUG "btrfs: %s: ino %llu isize %llu odd range [%llu,%llu]\n", - caller, - (unsigned long long)btrfs_ino(inode), - (unsigned long long)isize, - (unsigned long long)start, - (unsigned long long)end); + caller, btrfs_ino(inode), isize, start, end); } } #else @@ -388,8 +383,7 @@ static int insert_state(struct extent_io_tree *tree, if (end < start) WARN(1, KERN_ERR "btrfs end < start %llu %llu\n", - (unsigned long long)end, - (unsigned long long)start); + end, start); state->start = start; state->end = end; @@ -400,9 +394,8 @@ static int insert_state(struct extent_io_tree *tree, struct extent_state *found; found = rb_entry(node, struct extent_state, rb_node); printk(KERN_ERR "btrfs found node %llu %llu on insert of " - "%llu %llu\n", (unsigned long long)found->start, - (unsigned long long)found->end, - (unsigned long long)start, (unsigned long long)end); + "%llu %llu\n", + found->start, found->end, start, end); return -EEXIST; } state->tree = tree; @@ -762,15 +755,6 @@ static void cache_state(struct extent_state *state, } } -static void uncache_state(struct extent_state **cached_ptr) -{ - if (cached_ptr && (*cached_ptr)) { - struct extent_state *state = *cached_ptr; - *cached_ptr = NULL; - free_extent_state(state); - } -} - /* * set some bits on a range in the tree. This may require allocations or * sleeping, so the gfp mask is used to indicate what is allowed. @@ -1687,31 +1671,21 @@ out_failed: return found; } -int extent_clear_unlock_delalloc(struct inode *inode, - struct extent_io_tree *tree, - u64 start, u64 end, struct page *locked_page, - unsigned long op) +int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, + struct page *locked_page, + unsigned long clear_bits, + unsigned long page_ops) { + struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; int ret; struct page *pages[16]; unsigned long index = start >> PAGE_CACHE_SHIFT; unsigned long end_index = end >> PAGE_CACHE_SHIFT; unsigned long nr_pages = end_index - index + 1; int i; - unsigned long clear_bits = 0; - - if (op & EXTENT_CLEAR_UNLOCK) - clear_bits |= EXTENT_LOCKED; - if (op & EXTENT_CLEAR_DIRTY) - clear_bits |= EXTENT_DIRTY; - - if (op & EXTENT_CLEAR_DELALLOC) - clear_bits |= EXTENT_DELALLOC; clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); - if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | - EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK | - EXTENT_SET_PRIVATE2))) + if (page_ops == 0) return 0; while (nr_pages > 0) { @@ -1720,20 +1694,20 @@ int extent_clear_unlock_delalloc(struct inode *inode, nr_pages, ARRAY_SIZE(pages)), pages); for (i = 0; i < ret; i++) { - if (op & EXTENT_SET_PRIVATE2) + if (page_ops & PAGE_SET_PRIVATE2) SetPagePrivate2(pages[i]); if (pages[i] == locked_page) { page_cache_release(pages[i]); continue; } - if (op & EXTENT_CLEAR_DIRTY) + if (page_ops & PAGE_CLEAR_DIRTY) clear_page_dirty_for_io(pages[i]); - if (op & EXTENT_SET_WRITEBACK) + if (page_ops & PAGE_SET_WRITEBACK) set_page_writeback(pages[i]); - if (op & EXTENT_END_WRITEBACK) + if (page_ops & PAGE_END_WRITEBACK) end_page_writeback(pages[i]); - if (op & EXTENT_CLEAR_UNLOCK_PAGE) + if (page_ops & PAGE_UNLOCK) unlock_page(pages[i]); page_cache_release(pages[i]); } @@ -1810,7 +1784,7 @@ out: * set the private field for a given byte offset in the tree. If there isn't * an extent_state there already, this does nothing. */ -int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) +static int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) { struct rb_node *node; struct extent_state *state; @@ -1837,64 +1811,6 @@ out: return ret; } -void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[], - int count) -{ - struct rb_node *node; - struct extent_state *state; - - spin_lock(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(tree, start); - BUG_ON(!node); - - state = rb_entry(node, struct extent_state, rb_node); - BUG_ON(state->start != start); - - while (count) { - state->private = *csums++; - count--; - state = next_state(state); - } - spin_unlock(&tree->lock); -} - -static inline u64 __btrfs_get_bio_offset(struct bio *bio, int bio_index) -{ - struct bio_vec *bvec = bio->bi_io_vec + bio_index; - - return page_offset(bvec->bv_page) + bvec->bv_offset; -} - -void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio, int bio_index, - u32 csums[], int count) -{ - struct rb_node *node; - struct extent_state *state = NULL; - u64 start; - - spin_lock(&tree->lock); - do { - start = __btrfs_get_bio_offset(bio, bio_index); - if (state == NULL || state->start != start) { - node = tree_search(tree, start); - BUG_ON(!node); - - state = rb_entry(node, struct extent_state, rb_node); - BUG_ON(state->start != start); - } - state->private = *csums++; - count--; - bio_index++; - - state = next_state(state); - } while (count); - spin_unlock(&tree->lock); -} - int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) { struct rb_node *node; @@ -2173,7 +2089,8 @@ static int clean_io_failure(u64 start, struct page *page) EXTENT_LOCKED); spin_unlock(&BTRFS_I(inode)->io_tree.lock); - if (state && state->start == failrec->start) { + if (state && state->start <= failrec->start && + state->end >= failrec->start + failrec->len - 1) { fs_info = BTRFS_I(inode)->root->fs_info; num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len); @@ -2201,9 +2118,9 @@ out: * needed */ -static int bio_readpage_error(struct bio *failed_bio, struct page *page, - u64 start, u64 end, int failed_mirror, - struct extent_state *state) +static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, + struct page *page, u64 start, u64 end, + int failed_mirror) { struct io_failure_record *failrec = NULL; u64 private; @@ -2213,6 +2130,8 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct bio *bio; + struct btrfs_io_bio *btrfs_failed_bio; + struct btrfs_io_bio *btrfs_bio; int num_copies; int ret; int read_mode; @@ -2296,23 +2215,12 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, * all the retry and error correction code that follows. no * matter what the error is, it is very likely to persist. */ - pr_debug("bio_readpage_error: cannot repair, num_copies == 1. " - "state=%p, num_copies=%d, next_mirror %d, " - "failed_mirror %d\n", state, num_copies, - failrec->this_mirror, failed_mirror); + pr_debug("bio_readpage_error: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n", + num_copies, failrec->this_mirror, failed_mirror); free_io_failure(inode, failrec, 0); return -EIO; } - if (!state) { - spin_lock(&tree->lock); - state = find_first_extent_bit_state(tree, failrec->start, - EXTENT_LOCKED); - if (state && state->start != failrec->start) - state = NULL; - spin_unlock(&tree->lock); - } - /* * there are two premises: * a) deliver good data to the caller @@ -2349,9 +2257,8 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, read_mode = READ_SYNC; } - if (!state || failrec->this_mirror > num_copies) { - pr_debug("bio_readpage_error: (fail) state=%p, num_copies=%d, " - "next_mirror %d, failed_mirror %d\n", state, + if (failrec->this_mirror > num_copies) { + pr_debug("bio_readpage_error: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n", num_copies, failrec->this_mirror, failed_mirror); free_io_failure(inode, failrec, 0); return -EIO; @@ -2362,12 +2269,24 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, free_io_failure(inode, failrec, 0); return -EIO; } - bio->bi_private = state; bio->bi_end_io = failed_bio->bi_end_io; bio->bi_sector = failrec->logical >> 9; bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; bio->bi_size = 0; + btrfs_failed_bio = btrfs_io_bio(failed_bio); + if (btrfs_failed_bio->csum) { + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; + u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); + + btrfs_bio = btrfs_io_bio(bio); + btrfs_bio->csum = btrfs_bio->csum_inline; + phy_offset >>= inode->i_sb->s_blocksize_bits; + phy_offset *= csum_size; + memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + phy_offset, + csum_size); + } + bio_add_page(bio, page, failrec->len, start - page_offset(page)); pr_debug("bio_readpage_error: submitting new read[%#x] to " @@ -2450,6 +2369,18 @@ static void end_bio_extent_writepage(struct bio *bio, int err) bio_put(bio); } +static void +endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len, + int uptodate) +{ + struct extent_state *cached = NULL; + u64 end = start + len - 1; + + if (uptodate && tree->track_uptodate) + set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC); + unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); +} + /* * after a readpage IO is done, we need to: * clear the uptodate bits on error @@ -2466,9 +2397,14 @@ static void end_bio_extent_readpage(struct bio *bio, int err) int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; struct bio_vec *bvec = bio->bi_io_vec; + struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); struct extent_io_tree *tree; + u64 offset = 0; u64 start; u64 end; + u64 len; + u64 extent_start = 0; + u64 extent_len = 0; int mirror; int ret; @@ -2477,9 +2413,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err) do { struct page *page = bvec->bv_page; - struct extent_state *cached = NULL; - struct extent_state *state; - struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); struct inode *inode = page->mapping->host; pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " @@ -2500,37 +2433,32 @@ static void end_bio_extent_readpage(struct bio *bio, int err) start = page_offset(page); end = start + bvec->bv_offset + bvec->bv_len - 1; + len = bvec->bv_len; if (++bvec <= bvec_end) prefetchw(&bvec->bv_page->flags); - spin_lock(&tree->lock); - state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED); - if (state && state->start == start) { - /* - * take a reference on the state, unlock will drop - * the ref - */ - cache_state(state, &cached); - } - spin_unlock(&tree->lock); - mirror = io_bio->mirror_num; - if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { - ret = tree->ops->readpage_end_io_hook(page, start, end, - state, mirror); + if (likely(uptodate && tree->ops && + tree->ops->readpage_end_io_hook)) { + ret = tree->ops->readpage_end_io_hook(io_bio, offset, + page, start, end, + mirror); if (ret) uptodate = 0; else clean_io_failure(start, page); } - if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { + if (likely(uptodate)) + goto readpage_ok; + + if (tree->ops && tree->ops->readpage_io_failed_hook) { ret = tree->ops->readpage_io_failed_hook(page, mirror); if (!ret && !err && test_bit(BIO_UPTODATE, &bio->bi_flags)) uptodate = 1; - } else if (!uptodate) { + } else { /* * The generic bio_readpage_error handles errors the * following way: If possible, new read requests are @@ -2541,24 +2469,18 @@ static void end_bio_extent_readpage(struct bio *bio, int err) * can't handle the error it will return -EIO and we * remain responsible for that page. */ - ret = bio_readpage_error(bio, page, start, end, mirror, NULL); + ret = bio_readpage_error(bio, offset, page, start, end, + mirror); if (ret == 0) { uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); if (err) uptodate = 0; - uncache_state(&cached); continue; } } - - if (uptodate && tree->track_uptodate) { - set_extent_uptodate(tree, start, end, &cached, - GFP_ATOMIC); - } - unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); - - if (uptodate) { +readpage_ok: + if (likely(uptodate)) { loff_t i_size = i_size_read(inode); pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; unsigned offset; @@ -2573,8 +2495,36 @@ static void end_bio_extent_readpage(struct bio *bio, int err) SetPageError(page); } unlock_page(page); + offset += len; + + if (unlikely(!uptodate)) { + if (extent_len) { + endio_readpage_release_extent(tree, + extent_start, + extent_len, 1); + extent_start = 0; + extent_len = 0; + } + endio_readpage_release_extent(tree, start, + end - start + 1, 0); + } else if (!extent_len) { + extent_start = start; + extent_len = end + 1 - start; + } else if (extent_start + extent_len == start) { + extent_len += end + 1 - start; + } else { + endio_readpage_release_extent(tree, extent_start, + extent_len, uptodate); + extent_start = start; + extent_len = end + 1 - start; + } } while (bvec <= bvec_end); + if (extent_len) + endio_readpage_release_extent(tree, extent_start, extent_len, + uptodate); + if (io_bio->end_io) + io_bio->end_io(io_bio, err); bio_put(bio); } @@ -2586,6 +2536,7 @@ struct bio * btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, gfp_t gfp_flags) { + struct btrfs_io_bio *btrfs_bio; struct bio *bio; bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset); @@ -2601,6 +2552,10 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, bio->bi_size = 0; bio->bi_bdev = bdev; bio->bi_sector = first_sector; + btrfs_bio = btrfs_io_bio(bio); + btrfs_bio->csum = NULL; + btrfs_bio->csum_allocated = NULL; + btrfs_bio->end_io = NULL; } return bio; } @@ -2614,7 +2569,17 @@ struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask) /* this also allocates from the btrfs_bioset */ struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) { - return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset); + struct btrfs_io_bio *btrfs_bio; + struct bio *bio; + + bio = bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset); + if (bio) { + btrfs_bio = btrfs_io_bio(bio); + btrfs_bio->csum = NULL; + btrfs_bio->csum_allocated = NULL; + btrfs_bio->end_io = NULL; + } + return bio; } @@ -2738,17 +2703,45 @@ void set_page_extent_mapped(struct page *page) } } +static struct extent_map * +__get_extent_map(struct inode *inode, struct page *page, size_t pg_offset, + u64 start, u64 len, get_extent_t *get_extent, + struct extent_map **em_cached) +{ + struct extent_map *em; + + if (em_cached && *em_cached) { + em = *em_cached; + if (em->in_tree && start >= em->start && + start < extent_map_end(em)) { + atomic_inc(&em->refs); + return em; + } + + free_extent_map(em); + *em_cached = NULL; + } + + em = get_extent(inode, page, pg_offset, start, len, 0); + if (em_cached && !IS_ERR_OR_NULL(em)) { + BUG_ON(*em_cached); + atomic_inc(&em->refs); + *em_cached = em; + } + return em; +} /* * basic readpage implementation. Locked extent state structs are inserted * into the tree that are removed when the IO is done (by the end_io * handlers) * XXX JDM: This needs looking at to ensure proper page locking */ -static int __extent_read_full_page(struct extent_io_tree *tree, - struct page *page, - get_extent_t *get_extent, - struct bio **bio, int mirror_num, - unsigned long *bio_flags, int rw) +static int __do_readpage(struct extent_io_tree *tree, + struct page *page, + get_extent_t *get_extent, + struct extent_map **em_cached, + struct bio **bio, int mirror_num, + unsigned long *bio_flags, int rw) { struct inode *inode = page->mapping->host; u64 start = page_offset(page); @@ -2762,35 +2755,26 @@ static int __extent_read_full_page(struct extent_io_tree *tree, sector_t sector; struct extent_map *em; struct block_device *bdev; - struct btrfs_ordered_extent *ordered; int ret; int nr = 0; + int parent_locked = *bio_flags & EXTENT_BIO_PARENT_LOCKED; size_t pg_offset = 0; size_t iosize; size_t disk_io_size; size_t blocksize = inode->i_sb->s_blocksize; - unsigned long this_bio_flag = 0; + unsigned long this_bio_flag = *bio_flags & EXTENT_BIO_PARENT_LOCKED; set_page_extent_mapped(page); + end = page_end; if (!PageUptodate(page)) { if (cleancache_get_page(page) == 0) { BUG_ON(blocksize != PAGE_SIZE); + unlock_extent(tree, start, end); goto out; } } - end = page_end; - while (1) { - lock_extent(tree, start, end); - ordered = btrfs_lookup_ordered_extent(inode, start); - if (!ordered) - break; - unlock_extent(tree, start, end); - btrfs_start_ordered_extent(inode, ordered, 1); - btrfs_put_ordered_extent(ordered); - } - if (page->index == last_byte >> PAGE_CACHE_SHIFT) { char *userpage; size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1); @@ -2817,15 +2801,18 @@ static int __extent_read_full_page(struct extent_io_tree *tree, kunmap_atomic(userpage); set_extent_uptodate(tree, cur, cur + iosize - 1, &cached, GFP_NOFS); - unlock_extent_cached(tree, cur, cur + iosize - 1, - &cached, GFP_NOFS); + if (!parent_locked) + unlock_extent_cached(tree, cur, + cur + iosize - 1, + &cached, GFP_NOFS); break; } - em = get_extent(inode, page, pg_offset, cur, - end - cur + 1, 0); + em = __get_extent_map(inode, page, pg_offset, cur, + end - cur + 1, get_extent, em_cached); if (IS_ERR_OR_NULL(em)) { SetPageError(page); - unlock_extent(tree, cur, end); + if (!parent_locked) + unlock_extent(tree, cur, end); break; } extent_offset = cur - em->start; @@ -2833,7 +2820,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, BUG_ON(end < cur); if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { - this_bio_flag = EXTENT_BIO_COMPRESSED; + this_bio_flag |= EXTENT_BIO_COMPRESSED; extent_set_compress_type(&this_bio_flag, em->compress_type); } @@ -2877,7 +2864,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree, if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1, NULL)) { check_page_uptodate(tree, page); - unlock_extent(tree, cur, cur + iosize - 1); + if (!parent_locked) + unlock_extent(tree, cur, cur + iosize - 1); cur = cur + iosize; pg_offset += iosize; continue; @@ -2887,7 +2875,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree, */ if (block_start == EXTENT_MAP_INLINE) { SetPageError(page); - unlock_extent(tree, cur, cur + iosize - 1); + if (!parent_locked) + unlock_extent(tree, cur, cur + iosize - 1); cur = cur + iosize; pg_offset += iosize; continue; @@ -2905,7 +2894,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree, *bio_flags = this_bio_flag; } else { SetPageError(page); - unlock_extent(tree, cur, cur + iosize - 1); + if (!parent_locked) + unlock_extent(tree, cur, cur + iosize - 1); } cur = cur + iosize; pg_offset += iosize; @@ -2919,6 +2909,104 @@ out: return 0; } +static inline void __do_contiguous_readpages(struct extent_io_tree *tree, + struct page *pages[], int nr_pages, + u64 start, u64 end, + get_extent_t *get_extent, + struct extent_map **em_cached, + struct bio **bio, int mirror_num, + unsigned long *bio_flags, int rw) +{ + struct inode *inode; + struct btrfs_ordered_extent *ordered; + int index; + + inode = pages[0]->mapping->host; + while (1) { + lock_extent(tree, start, end); + ordered = btrfs_lookup_ordered_range(inode, start, + end - start + 1); + if (!ordered) + break; + unlock_extent(tree, start, end); + btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + } + + for (index = 0; index < nr_pages; index++) { + __do_readpage(tree, pages[index], get_extent, em_cached, bio, + mirror_num, bio_flags, rw); + page_cache_release(pages[index]); + } +} + +static void __extent_readpages(struct extent_io_tree *tree, + struct page *pages[], + int nr_pages, get_extent_t *get_extent, + struct extent_map **em_cached, + struct bio **bio, int mirror_num, + unsigned long *bio_flags, int rw) +{ + u64 start = 0; + u64 end = 0; + u64 page_start; + int index; + int first_index = 0; + + for (index = 0; index < nr_pages; index++) { + page_start = page_offset(pages[index]); + if (!end) { + start = page_start; + end = start + PAGE_CACHE_SIZE - 1; + first_index = index; + } else if (end + 1 == page_start) { + end += PAGE_CACHE_SIZE; + } else { + __do_contiguous_readpages(tree, &pages[first_index], + index - first_index, start, + end, get_extent, em_cached, + bio, mirror_num, bio_flags, + rw); + start = page_start; + end = start + PAGE_CACHE_SIZE - 1; + first_index = index; + } + } + + if (end) + __do_contiguous_readpages(tree, &pages[first_index], + index - first_index, start, + end, get_extent, em_cached, bio, + mirror_num, bio_flags, rw); +} + +static int __extent_read_full_page(struct extent_io_tree *tree, + struct page *page, + get_extent_t *get_extent, + struct bio **bio, int mirror_num, + unsigned long *bio_flags, int rw) +{ + struct inode *inode = page->mapping->host; + struct btrfs_ordered_extent *ordered; + u64 start = page_offset(page); + u64 end = start + PAGE_CACHE_SIZE - 1; + int ret; + + while (1) { + lock_extent(tree, start, end); + ordered = btrfs_lookup_ordered_extent(inode, start); + if (!ordered) + break; + unlock_extent(tree, start, end); + btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + } + + ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num, + bio_flags, rw); + return ret; +} + int extent_read_full_page(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent, int mirror_num) { @@ -2933,6 +3021,20 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page, return ret; } +int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page, + get_extent_t *get_extent, int mirror_num) +{ + struct bio *bio = NULL; + unsigned long bio_flags = EXTENT_BIO_PARENT_LOCKED; + int ret; + + ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num, + &bio_flags, READ); + if (bio) + ret = submit_one_bio(READ, bio, mirror_num, bio_flags); + return ret; +} + static noinline void update_nr_written(struct page *page, struct writeback_control *wbc, unsigned long nr_written) @@ -3189,8 +3291,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, if (!PageWriteback(page)) { printk(KERN_ERR "btrfs warning page %lu not " "writeback, cur %llu end %llu\n", - page->index, (unsigned long long)cur, - (unsigned long long)end); + page->index, cur, end); } ret = submit_extent_page(write_flags, tree, page, @@ -3769,7 +3870,7 @@ int extent_readpages(struct extent_io_tree *tree, unsigned long bio_flags = 0; struct page *pagepool[16]; struct page *page; - int i = 0; + struct extent_map *em_cached = NULL; int nr = 0; for (page_idx = 0; page_idx < nr_pages; page_idx++) { @@ -3786,18 +3887,16 @@ int extent_readpages(struct extent_io_tree *tree, pagepool[nr++] = page; if (nr < ARRAY_SIZE(pagepool)) continue; - for (i = 0; i < nr; i++) { - __extent_read_full_page(tree, pagepool[i], get_extent, - &bio, 0, &bio_flags, READ); - page_cache_release(pagepool[i]); - } + __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, + &bio, 0, &bio_flags, READ); nr = 0; } - for (i = 0; i < nr; i++) { - __extent_read_full_page(tree, pagepool[i], get_extent, - &bio, 0, &bio_flags, READ); - page_cache_release(pagepool[i]); - } + if (nr) + __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, + &bio, 0, &bio_flags, READ); + + if (em_cached) + free_extent_map(em_cached); BUG_ON(!list_empty(pages)); if (bio) @@ -4136,6 +4235,76 @@ static void __free_extent_buffer(struct extent_buffer *eb) kmem_cache_free(extent_buffer_cache, eb); } +static int extent_buffer_under_io(struct extent_buffer *eb) +{ + return (atomic_read(&eb->io_pages) || + test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) || + test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); +} + +/* + * Helper for releasing extent buffer page. + */ +static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, + unsigned long start_idx) +{ + unsigned long index; + unsigned long num_pages; + struct page *page; + int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); + + BUG_ON(extent_buffer_under_io(eb)); + + num_pages = num_extent_pages(eb->start, eb->len); + index = start_idx + num_pages; + if (start_idx >= index) + return; + + do { + index--; + page = extent_buffer_page(eb, index); + if (page && mapped) { + spin_lock(&page->mapping->private_lock); + /* + * We do this since we'll remove the pages after we've + * removed the eb from the radix tree, so we could race + * and have this page now attached to the new eb. So + * only clear page_private if it's still connected to + * this eb. + */ + if (PagePrivate(page) && + page->private == (unsigned long)eb) { + BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); + BUG_ON(PageDirty(page)); + BUG_ON(PageWriteback(page)); + /* + * We need to make sure we haven't be attached + * to a new eb. + */ + ClearPagePrivate(page); + set_page_private(page, 0); + /* One for the page private */ + page_cache_release(page); + } + spin_unlock(&page->mapping->private_lock); + + } + if (page) { + /* One for when we alloced the page */ + page_cache_release(page); + } + } while (index != start_idx); +} + +/* + * Helper for releasing the extent buffer. + */ +static inline void btrfs_release_extent_buffer(struct extent_buffer *eb) +{ + btrfs_release_extent_buffer_page(eb, 0); + __free_extent_buffer(eb); +} + static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, u64 start, unsigned long len, @@ -4184,13 +4353,16 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src) struct extent_buffer *new; unsigned long num_pages = num_extent_pages(src->start, src->len); - new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_ATOMIC); + new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_NOFS); if (new == NULL) return NULL; for (i = 0; i < num_pages; i++) { - p = alloc_page(GFP_ATOMIC); - BUG_ON(!p); + p = alloc_page(GFP_NOFS); + if (!p) { + btrfs_release_extent_buffer(new); + return NULL; + } attach_extent_buffer_page(new, p); WARN_ON(PageDirty(p)); SetPageUptodate(p); @@ -4210,12 +4382,12 @@ struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len) unsigned long num_pages = num_extent_pages(0, len); unsigned long i; - eb = __alloc_extent_buffer(NULL, start, len, GFP_ATOMIC); + eb = __alloc_extent_buffer(NULL, start, len, GFP_NOFS); if (!eb) return NULL; for (i = 0; i < num_pages; i++) { - eb->pages[i] = alloc_page(GFP_ATOMIC); + eb->pages[i] = alloc_page(GFP_NOFS); if (!eb->pages[i]) goto err; } @@ -4231,76 +4403,6 @@ err: return NULL; } -static int extent_buffer_under_io(struct extent_buffer *eb) -{ - return (atomic_read(&eb->io_pages) || - test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) || - test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); -} - -/* - * Helper for releasing extent buffer page. - */ -static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, - unsigned long start_idx) -{ - unsigned long index; - unsigned long num_pages; - struct page *page; - int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); - - BUG_ON(extent_buffer_under_io(eb)); - - num_pages = num_extent_pages(eb->start, eb->len); - index = start_idx + num_pages; - if (start_idx >= index) - return; - - do { - index--; - page = extent_buffer_page(eb, index); - if (page && mapped) { - spin_lock(&page->mapping->private_lock); - /* - * We do this since we'll remove the pages after we've - * removed the eb from the radix tree, so we could race - * and have this page now attached to the new eb. So - * only clear page_private if it's still connected to - * this eb. - */ - if (PagePrivate(page) && - page->private == (unsigned long)eb) { - BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); - BUG_ON(PageDirty(page)); - BUG_ON(PageWriteback(page)); - /* - * We need to make sure we haven't be attached - * to a new eb. - */ - ClearPagePrivate(page); - set_page_private(page, 0); - /* One for the page private */ - page_cache_release(page); - } - spin_unlock(&page->mapping->private_lock); - - } - if (page) { - /* One for when we alloced the page */ - page_cache_release(page); - } - } while (index != start_idx); -} - -/* - * Helper for releasing the extent buffer. - */ -static inline void btrfs_release_extent_buffer(struct extent_buffer *eb) -{ - btrfs_release_extent_buffer_page(eb, 0); - __free_extent_buffer(eb); -} - static void check_buffer_tree_ref(struct extent_buffer *eb) { int refs; @@ -4771,7 +4873,7 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); while (len > 0) { page = extent_buffer_page(eb, i); @@ -4813,8 +4915,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, if (start + min_len > eb->len) { WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, " - "wanted %lu %lu\n", (unsigned long long)eb->start, - eb->len, start, min_len); + "wanted %lu %lu\n", + eb->start, eb->len, start, min_len); return -EINVAL; } @@ -4841,7 +4943,7 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); while (len > 0) { page = extent_buffer_page(eb, i); @@ -4875,7 +4977,7 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); while (len > 0) { page = extent_buffer_page(eb, i); @@ -4905,7 +5007,7 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, WARN_ON(start > eb->len); WARN_ON(start + len > eb->start + eb->len); - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); while (len > 0) { page = extent_buffer_page(eb, i); @@ -4936,7 +5038,7 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, WARN_ON(src->len != dst_len); offset = (start_offset + dst_offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); + (PAGE_CACHE_SIZE - 1); while (len > 0) { page = extent_buffer_page(dst, i); @@ -5022,9 +5124,9 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, while (len > 0) { dst_off_in_page = (start_offset + dst_offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); + (PAGE_CACHE_SIZE - 1); src_off_in_page = (start_offset + src_offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); + (PAGE_CACHE_SIZE - 1); dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; @@ -5075,9 +5177,9 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; dst_off_in_page = (start_offset + dst_end) & - ((unsigned long)PAGE_CACHE_SIZE - 1); + (PAGE_CACHE_SIZE - 1); src_off_in_page = (start_offset + src_end) & - ((unsigned long)PAGE_CACHE_SIZE - 1); + (PAGE_CACHE_SIZE - 1); cur = min_t(unsigned long, len, src_off_in_page + 1); cur = min(cur, dst_off_in_page + 1); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 3b8c4e26e1da..6dbc645f1f3d 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -29,6 +29,7 @@ */ #define EXTENT_BIO_COMPRESSED 1 #define EXTENT_BIO_TREE_LOG 2 +#define EXTENT_BIO_PARENT_LOCKED 4 #define EXTENT_BIO_FLAG_SHIFT 16 /* these are bit numbers for test/set bit */ @@ -44,14 +45,11 @@ #define EXTENT_BUFFER_DUMMY 9 /* these are flags for extent_clear_unlock_delalloc */ -#define EXTENT_CLEAR_UNLOCK_PAGE 0x1 -#define EXTENT_CLEAR_UNLOCK 0x2 -#define EXTENT_CLEAR_DELALLOC 0x4 -#define EXTENT_CLEAR_DIRTY 0x8 -#define EXTENT_SET_WRITEBACK 0x10 -#define EXTENT_END_WRITEBACK 0x20 -#define EXTENT_SET_PRIVATE2 0x40 -#define EXTENT_CLEAR_ACCOUNTING 0x80 +#define PAGE_UNLOCK (1 << 0) +#define PAGE_CLEAR_DIRTY (1 << 1) +#define PAGE_SET_WRITEBACK (1 << 2) +#define PAGE_END_WRITEBACK (1 << 3) +#define PAGE_SET_PRIVATE2 (1 << 4) /* * page->private values. Every page that is controlled by the extent @@ -62,6 +60,7 @@ struct extent_state; struct btrfs_root; +struct btrfs_io_bio; typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, struct bio *bio, int mirror_num, @@ -77,8 +76,9 @@ struct extent_io_ops { size_t size, struct bio *bio, unsigned long bio_flags); int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); - int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, - struct extent_state *state, int mirror); + int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset, + struct page *page, u64 start, u64 end, + int mirror); int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state, int uptodate); void (*set_bit_hook)(struct inode *inode, struct extent_state *state, @@ -200,6 +200,8 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end); int extent_read_full_page(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent, int mirror_num); +int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page, + get_extent_t *get_extent, int mirror_num); int __init extent_io_init(void); void extent_io_exit(void); @@ -261,11 +263,6 @@ int extent_readpages(struct extent_io_tree *tree, get_extent_t get_extent); int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len, get_extent_t *get_extent); -int set_state_private(struct extent_io_tree *tree, u64 start, u64 private); -void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[], - int count); -void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio, - int bvec_index, u32 csums[], int count); int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); void set_page_extent_mapped(struct page *page); @@ -330,10 +327,10 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, unsigned long *map_len); int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); -int extent_clear_unlock_delalloc(struct inode *inode, - struct extent_io_tree *tree, - u64 start, u64 end, struct page *locked_page, - unsigned long op); +int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, + struct page *locked_page, + unsigned long bits_to_clear, + unsigned long page_ops); struct bio * btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, gfp_t gfp_flags); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index a7bfc9541803..4f53159bdb9d 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -23,6 +23,7 @@ #include "ctree.h" #include "disk-io.h" #include "transaction.h" +#include "volumes.h" #include "print-tree.h" #define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \ @@ -152,28 +153,54 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, return ret; } +static void btrfs_io_bio_endio_readpage(struct btrfs_io_bio *bio, int err) +{ + kfree(bio->csum_allocated); +} + static int __btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, struct bio *bio, u64 logical_offset, u32 *dst, int dio) { - u32 sum[16]; - int len; struct bio_vec *bvec = bio->bi_io_vec; - int bio_index = 0; + struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio); + struct btrfs_csum_item *item = NULL; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct btrfs_path *path; + u8 *csum; u64 offset = 0; u64 item_start_offset = 0; u64 item_last_offset = 0; u64 disk_bytenr; u32 diff; - u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); + int nblocks; + int bio_index = 0; int count; - struct btrfs_path *path; - struct btrfs_csum_item *item = NULL; - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); path = btrfs_alloc_path(); if (!path) return -ENOMEM; + + nblocks = bio->bi_size >> inode->i_sb->s_blocksize_bits; + if (!dst) { + if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) { + btrfs_bio->csum_allocated = kmalloc(nblocks * csum_size, + GFP_NOFS); + if (!btrfs_bio->csum_allocated) { + btrfs_free_path(path); + return -ENOMEM; + } + btrfs_bio->csum = btrfs_bio->csum_allocated; + btrfs_bio->end_io = btrfs_io_bio_endio_readpage; + } else { + btrfs_bio->csum = btrfs_bio->csum_inline; + } + csum = btrfs_bio->csum; + } else { + csum = (u8 *)dst; + } + if (bio->bi_size > PAGE_CACHE_SIZE * 8) path->reada = 2; @@ -194,11 +221,10 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, if (dio) offset = logical_offset; while (bio_index < bio->bi_vcnt) { - len = min_t(int, ARRAY_SIZE(sum), bio->bi_vcnt - bio_index); if (!dio) offset = page_offset(bvec->bv_page) + bvec->bv_offset; - count = btrfs_find_ordered_sum(inode, offset, disk_bytenr, sum, - len); + count = btrfs_find_ordered_sum(inode, offset, disk_bytenr, + (u32 *)csum, nblocks); if (count) goto found; @@ -213,7 +239,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, path, disk_bytenr, 0); if (IS_ERR(item)) { count = 1; - sum[0] = 0; + memset(csum, 0, csum_size); if (BTRFS_I(inode)->root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) { set_extent_bits(io_tree, offset, @@ -222,9 +248,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, } else { printk(KERN_INFO "btrfs no csum found " "for inode %llu start %llu\n", - (unsigned long long) - btrfs_ino(inode), - (unsigned long long)offset); + btrfs_ino(inode), offset); } item = NULL; btrfs_release_path(path); @@ -249,23 +273,14 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, diff = disk_bytenr - item_start_offset; diff = diff / root->sectorsize; diff = diff * csum_size; - count = min_t(int, len, (item_last_offset - disk_bytenr) >> - inode->i_sb->s_blocksize_bits); - read_extent_buffer(path->nodes[0], sum, + count = min_t(int, nblocks, (item_last_offset - disk_bytenr) >> + inode->i_sb->s_blocksize_bits); + read_extent_buffer(path->nodes[0], csum, ((unsigned long)item) + diff, csum_size * count); found: - if (dst) { - memcpy(dst, sum, count * csum_size); - dst += count; - } else { - if (dio) - extent_cache_csums_dio(io_tree, offset, sum, - count); - else - extent_cache_csums(io_tree, bio, bio_index, sum, - count); - } + csum += count * csum_size; + nblocks -= count; while (count--) { disk_bytenr += bvec->bv_len; offset += bvec->bv_len; @@ -284,9 +299,19 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, } int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, - struct bio *bio, u64 offset) + struct btrfs_dio_private *dip, struct bio *bio, + u64 offset) { - return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1); + int len = (bio->bi_sector << 9) - dip->disk_bytenr; + u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); + int ret; + + len >>= inode->i_sb->s_blocksize_bits; + len *= csum_size; + + ret = __btrfs_lookup_bio_sums(root, inode, bio, offset, + (u32 *)(dip->csum + len), 1); + return ret; } int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 4d2eb6417145..bc5072b2db53 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1334,7 +1334,6 @@ fail: static noinline int check_can_nocow(struct inode *inode, loff_t pos, size_t *write_bytes) { - struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ordered_extent *ordered; u64 lockstart, lockend; @@ -1356,16 +1355,8 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos, btrfs_put_ordered_extent(ordered); } - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); - return PTR_ERR(trans); - } - num_bytes = lockend - lockstart + 1; - ret = can_nocow_extent(trans, inode, lockstart, &num_bytes, NULL, NULL, - NULL); - btrfs_end_transaction(trans, root); + ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL); if (ret <= 0) { ret = 0; } else { diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index b21a3cd667d8..3f0ddfce96e6 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -221,12 +221,10 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, struct btrfs_path *path, struct inode *inode) { - loff_t oldsize; int ret = 0; - oldsize = i_size_read(inode); btrfs_i_size_write(inode, 0); - truncate_pagecache(inode, oldsize, 0); + truncate_pagecache(inode, 0); /* * We don't need an orphan item because truncating the free space cache @@ -308,7 +306,7 @@ static void io_ctl_unmap_page(struct io_ctl *io_ctl) static void io_ctl_map_page(struct io_ctl *io_ctl, int clear) { - BUG_ON(io_ctl->index >= io_ctl->num_pages); + ASSERT(io_ctl->index < io_ctl->num_pages); io_ctl->page = io_ctl->pages[io_ctl->index++]; io_ctl->cur = kmap(io_ctl->page); io_ctl->orig = io_ctl->cur; @@ -673,8 +671,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, btrfs_err(root->fs_info, "free space inode generation (%llu) " "did not match free space cache generation (%llu)", - (unsigned long long)BTRFS_I(inode)->generation, - (unsigned long long)generation); + BTRFS_I(inode)->generation, generation); return 0; } @@ -729,7 +726,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, goto free_cache; } } else { - BUG_ON(!num_bitmaps); + ASSERT(num_bitmaps); num_bitmaps--; e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); if (!e->bitmap) { @@ -1029,7 +1026,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, leaf = path->nodes[0]; if (ret > 0) { struct btrfs_key found_key; - BUG_ON(!path->slots[0]); + ASSERT(path->slots[0]); path->slots[0]--; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || @@ -1117,7 +1114,7 @@ int btrfs_write_out_cache(struct btrfs_root *root, static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, u64 offset) { - BUG_ON(offset < bitmap_start); + ASSERT(offset >= bitmap_start); offset -= bitmap_start; return (unsigned long)(div_u64(offset, unit)); } @@ -1272,7 +1269,7 @@ tree_search_offset(struct btrfs_free_space_ctl *ctl, if (n) { entry = rb_entry(n, struct btrfs_free_space, offset_index); - BUG_ON(entry->offset > offset); + ASSERT(entry->offset <= offset); } else { if (fuzzy) return entry; @@ -1336,7 +1333,7 @@ static int link_free_space(struct btrfs_free_space_ctl *ctl, { int ret = 0; - BUG_ON(!info->bitmap && !info->bytes); + ASSERT(info->bytes || info->bitmap); ret = tree_insert_offset(&ctl->free_space_offset, info->offset, &info->offset_index, (info->bitmap != NULL)); if (ret) @@ -1359,7 +1356,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) max_bitmaps = max(max_bitmaps, 1); - BUG_ON(ctl->total_bitmaps > max_bitmaps); + ASSERT(ctl->total_bitmaps <= max_bitmaps); /* * The goal is to keep the total amount of memory used per 1gb of space @@ -1403,7 +1400,7 @@ static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, start = offset_to_bit(info->offset, ctl->unit, offset); count = bytes_to_bits(bytes, ctl->unit); - BUG_ON(start + count > BITS_PER_BITMAP); + ASSERT(start + count <= BITS_PER_BITMAP); bitmap_clear(info->bitmap, start, count); @@ -1426,7 +1423,7 @@ static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, start = offset_to_bit(info->offset, ctl->unit, offset); count = bytes_to_bits(bytes, ctl->unit); - BUG_ON(start + count > BITS_PER_BITMAP); + ASSERT(start + count <= BITS_PER_BITMAP); bitmap_set(info->bitmap, start, count); @@ -1742,7 +1739,7 @@ no_cluster_bitmap: bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1, 0); if (!bitmap_info) { - BUG_ON(added); + ASSERT(added == 0); goto new_bitmap; } @@ -1882,7 +1879,7 @@ out: if (ret) { printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); - BUG_ON(ret == -EEXIST); + ASSERT(ret != -EEXIST); } return ret; @@ -1991,8 +1988,7 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, if (info->bytes >= bytes && !block_group->ro) count++; printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n", - (unsigned long long)info->offset, - (unsigned long long)info->bytes, + info->offset, info->bytes, (info->bitmap) ? "yes" : "no"); } printk(KERN_INFO "block group has cluster?: %s\n", @@ -2371,7 +2367,7 @@ again: rb_erase(&entry->offset_index, &ctl->free_space_offset); ret = tree_insert_offset(&cluster->root, entry->offset, &entry->offset_index, 1); - BUG_ON(ret); /* -EEXIST; Logic error */ + ASSERT(!ret); /* -EEXIST; Logic error */ trace_btrfs_setup_cluster(block_group, cluster, total_found * ctl->unit, 1); @@ -2464,7 +2460,7 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, ret = tree_insert_offset(&cluster->root, entry->offset, &entry->offset_index, 0); total_size += entry->bytes; - BUG_ON(ret); /* -EEXIST; Logic error */ + ASSERT(!ret); /* -EEXIST; Logic error */ } while (node && entry != last); cluster->max_size = max_extent; @@ -2525,8 +2521,7 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, * returns zero and sets up cluster if things worked out, otherwise * it returns -enospc */ -int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +int btrfs_find_space_cluster(struct btrfs_root *root, struct btrfs_block_group_cache *block_group, struct btrfs_free_cluster *cluster, u64 offset, u64 bytes, u64 empty_size) @@ -2856,7 +2851,7 @@ u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) ret = search_bitmap(ctl, entry, &offset, &count); /* Logic error; Should be empty if it can't find anything */ - BUG_ON(ret); + ASSERT(!ret); ino = offset; bitmap_clear_bits(ctl, entry, offset, 1); @@ -2973,33 +2968,68 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root, } #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS -static struct btrfs_block_group_cache *init_test_block_group(void) +/* + * Use this if you need to make a bitmap or extent entry specifically, it + * doesn't do any of the merging that add_free_space does, this acts a lot like + * how the free space cache loading stuff works, so you can get really weird + * configurations. + */ +int test_add_free_space_entry(struct btrfs_block_group_cache *cache, + u64 offset, u64 bytes, bool bitmap) { - struct btrfs_block_group_cache *cache; + struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; + struct btrfs_free_space *info = NULL, *bitmap_info; + void *map = NULL; + u64 bytes_added; + int ret; - cache = kzalloc(sizeof(*cache), GFP_NOFS); - if (!cache) - return NULL; - cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), - GFP_NOFS); - if (!cache->free_space_ctl) { - kfree(cache); - return NULL; +again: + if (!info) { + info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); + if (!info) + return -ENOMEM; } - cache->key.objectid = 0; - cache->key.offset = 1024 * 1024 * 1024; - cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; - cache->sectorsize = 4096; + if (!bitmap) { + spin_lock(&ctl->tree_lock); + info->offset = offset; + info->bytes = bytes; + ret = link_free_space(ctl, info); + spin_unlock(&ctl->tree_lock); + if (ret) + kmem_cache_free(btrfs_free_space_cachep, info); + return ret; + } + + if (!map) { + map = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); + if (!map) { + kmem_cache_free(btrfs_free_space_cachep, info); + return -ENOMEM; + } + } + + spin_lock(&ctl->tree_lock); + bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), + 1, 0); + if (!bitmap_info) { + info->bitmap = map; + map = NULL; + add_new_bitmap(ctl, info, offset); + bitmap_info = info; + } - spin_lock_init(&cache->lock); - INIT_LIST_HEAD(&cache->list); - INIT_LIST_HEAD(&cache->cluster_list); - INIT_LIST_HEAD(&cache->new_bg_list); + bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); + bytes -= bytes_added; + offset += bytes_added; + spin_unlock(&ctl->tree_lock); - btrfs_init_free_space_ctl(cache); + if (bytes) + goto again; - return cache; + if (map) + kfree(map); + return 0; } /* @@ -3007,8 +3037,8 @@ static struct btrfs_block_group_cache *init_test_block_group(void) * just used to check the absence of space, so if there is free space in the * range at all we will return 1. */ -static int check_exists(struct btrfs_block_group_cache *cache, u64 offset, - u64 bytes) +int test_check_exists(struct btrfs_block_group_cache *cache, + u64 offset, u64 bytes) { struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; struct btrfs_free_space *info; @@ -3085,411 +3115,4 @@ out: spin_unlock(&ctl->tree_lock); return ret; } - -/* - * Use this if you need to make a bitmap or extent entry specifically, it - * doesn't do any of the merging that add_free_space does, this acts a lot like - * how the free space cache loading stuff works, so you can get really weird - * configurations. - */ -static int add_free_space_entry(struct btrfs_block_group_cache *cache, - u64 offset, u64 bytes, bool bitmap) -{ - struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; - struct btrfs_free_space *info = NULL, *bitmap_info; - void *map = NULL; - u64 bytes_added; - int ret; - -again: - if (!info) { - info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); - if (!info) - return -ENOMEM; - } - - if (!bitmap) { - spin_lock(&ctl->tree_lock); - info->offset = offset; - info->bytes = bytes; - ret = link_free_space(ctl, info); - spin_unlock(&ctl->tree_lock); - if (ret) - kmem_cache_free(btrfs_free_space_cachep, info); - return ret; - } - - if (!map) { - map = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); - if (!map) { - kmem_cache_free(btrfs_free_space_cachep, info); - return -ENOMEM; - } - } - - spin_lock(&ctl->tree_lock); - bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), - 1, 0); - if (!bitmap_info) { - info->bitmap = map; - map = NULL; - add_new_bitmap(ctl, info, offset); - bitmap_info = info; - } - - bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); - bytes -= bytes_added; - offset += bytes_added; - spin_unlock(&ctl->tree_lock); - - if (bytes) - goto again; - - if (map) - kfree(map); - return 0; -} - -#define test_msg(fmt, ...) printk(KERN_INFO "btrfs: selftest: " fmt, ##__VA_ARGS__) - -/* - * This test just does basic sanity checking, making sure we can add an exten - * entry and remove space from either end and the middle, and make sure we can - * remove space that covers adjacent extent entries. - */ -static int test_extents(struct btrfs_block_group_cache *cache) -{ - int ret = 0; - - test_msg("Running extent only tests\n"); - - /* First just make sure we can remove an entire entry */ - ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024); - if (ret) { - test_msg("Error adding initial extents %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024); - if (ret) { - test_msg("Error removing extent %d\n", ret); - return ret; - } - - if (check_exists(cache, 0, 4 * 1024 * 1024)) { - test_msg("Full remove left some lingering space\n"); - return -1; - } - - /* Ok edge and middle cases now */ - ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024); - if (ret) { - test_msg("Error adding half extent %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 1 * 1024 * 1024); - if (ret) { - test_msg("Error removing tail end %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024); - if (ret) { - test_msg("Error removing front end %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096); - if (ret) { - test_msg("Error removing middle piece %d\n", ret); - return ret; - } - - if (check_exists(cache, 0, 1 * 1024 * 1024)) { - test_msg("Still have space at the front\n"); - return -1; - } - - if (check_exists(cache, 2 * 1024 * 1024, 4096)) { - test_msg("Still have space in the middle\n"); - return -1; - } - - if (check_exists(cache, 3 * 1024 * 1024, 1 * 1024 * 1024)) { - test_msg("Still have space at the end\n"); - return -1; - } - - /* Cleanup */ - __btrfs_remove_free_space_cache(cache->free_space_ctl); - - return 0; -} - -static int test_bitmaps(struct btrfs_block_group_cache *cache) -{ - u64 next_bitmap_offset; - int ret; - - test_msg("Running bitmap only tests\n"); - - ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1); - if (ret) { - test_msg("Couldn't create a bitmap entry %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024); - if (ret) { - test_msg("Error removing bitmap full range %d\n", ret); - return ret; - } - - if (check_exists(cache, 0, 4 * 1024 * 1024)) { - test_msg("Left some space in bitmap\n"); - return -1; - } - - ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1); - if (ret) { - test_msg("Couldn't add to our bitmap entry %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 2 * 1024 * 1024); - if (ret) { - test_msg("Couldn't remove middle chunk %d\n", ret); - return ret; - } - - /* - * The first bitmap we have starts at offset 0 so the next one is just - * at the end of the first bitmap. - */ - next_bitmap_offset = (u64)(BITS_PER_BITMAP * 4096); - - /* Test a bit straddling two bitmaps */ - ret = add_free_space_entry(cache, next_bitmap_offset - - (2 * 1024 * 1024), 4 * 1024 * 1024, 1); - if (ret) { - test_msg("Couldn't add space that straddles two bitmaps %d\n", - ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, next_bitmap_offset - - (1 * 1024 * 1024), 2 * 1024 * 1024); - if (ret) { - test_msg("Couldn't remove overlapping space %d\n", ret); - return ret; - } - - if (check_exists(cache, next_bitmap_offset - (1 * 1024 * 1024), - 2 * 1024 * 1024)) { - test_msg("Left some space when removing overlapping\n"); - return -1; - } - - __btrfs_remove_free_space_cache(cache->free_space_ctl); - - return 0; -} - -/* This is the high grade jackassery */ -static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) -{ - u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096); - int ret; - - test_msg("Running bitmap and extent tests\n"); - - /* - * First let's do something simple, an extent at the same offset as the - * bitmap, but the free space completely in the extent and then - * completely in the bitmap. - */ - ret = add_free_space_entry(cache, 4 * 1024 * 1024, 1 * 1024 * 1024, 1); - if (ret) { - test_msg("Couldn't create bitmap entry %d\n", ret); - return ret; - } - - ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0); - if (ret) { - test_msg("Couldn't add extent entry %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024); - if (ret) { - test_msg("Couldn't remove extent entry %d\n", ret); - return ret; - } - - if (check_exists(cache, 0, 1 * 1024 * 1024)) { - test_msg("Left remnants after our remove\n"); - return -1; - } - - /* Now to add back the extent entry and remove from the bitmap */ - ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0); - if (ret) { - test_msg("Couldn't re-add extent entry %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 4 * 1024 * 1024, 1 * 1024 * 1024); - if (ret) { - test_msg("Couldn't remove from bitmap %d\n", ret); - return ret; - } - - if (check_exists(cache, 4 * 1024 * 1024, 1 * 1024 * 1024)) { - test_msg("Left remnants in the bitmap\n"); - return -1; - } - - /* - * Ok so a little more evil, extent entry and bitmap at the same offset, - * removing an overlapping chunk. - */ - ret = add_free_space_entry(cache, 1 * 1024 * 1024, 4 * 1024 * 1024, 1); - if (ret) { - test_msg("Couldn't add to a bitmap %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 512 * 1024, 3 * 1024 * 1024); - if (ret) { - test_msg("Couldn't remove overlapping space %d\n", ret); - return ret; - } - - if (check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) { - test_msg("Left over peices after removing overlapping\n"); - return -1; - } - - __btrfs_remove_free_space_cache(cache->free_space_ctl); - - /* Now with the extent entry offset into the bitmap */ - ret = add_free_space_entry(cache, 4 * 1024 * 1024, 4 * 1024 * 1024, 1); - if (ret) { - test_msg("Couldn't add space to the bitmap %d\n", ret); - return ret; - } - - ret = add_free_space_entry(cache, 2 * 1024 * 1024, 2 * 1024 * 1024, 0); - if (ret) { - test_msg("Couldn't add extent to the cache %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 4 * 1024 * 1024); - if (ret) { - test_msg("Problem removing overlapping space %d\n", ret); - return ret; - } - - if (check_exists(cache, 3 * 1024 * 1024, 4 * 1024 * 1024)) { - test_msg("Left something behind when removing space"); - return -1; - } - - /* - * This has blown up in the past, the extent entry starts before the - * bitmap entry, but we're trying to remove an offset that falls - * completely within the bitmap range and is in both the extent entry - * and the bitmap entry, looks like this - * - * [ extent ] - * [ bitmap ] - * [ del ] - */ - __btrfs_remove_free_space_cache(cache->free_space_ctl); - ret = add_free_space_entry(cache, bitmap_offset + 4 * 1024 * 1024, - 4 * 1024 * 1024, 1); - if (ret) { - test_msg("Couldn't add bitmap %d\n", ret); - return ret; - } - - ret = add_free_space_entry(cache, bitmap_offset - 1 * 1024 * 1024, - 5 * 1024 * 1024, 0); - if (ret) { - test_msg("Couldn't add extent entry %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, bitmap_offset + 1 * 1024 * 1024, - 5 * 1024 * 1024); - if (ret) { - test_msg("Failed to free our space %d\n", ret); - return ret; - } - - if (check_exists(cache, bitmap_offset + 1 * 1024 * 1024, - 5 * 1024 * 1024)) { - test_msg("Left stuff over\n"); - return -1; - } - - __btrfs_remove_free_space_cache(cache->free_space_ctl); - - /* - * This blew up before, we have part of the free space in a bitmap and - * then the entirety of the rest of the space in an extent. This used - * to return -EAGAIN back from btrfs_remove_extent, make sure this - * doesn't happen. - */ - ret = add_free_space_entry(cache, 1 * 1024 * 1024, 2 * 1024 * 1024, 1); - if (ret) { - test_msg("Couldn't add bitmap entry %d\n", ret); - return ret; - } - - ret = add_free_space_entry(cache, 3 * 1024 * 1024, 1 * 1024 * 1024, 0); - if (ret) { - test_msg("Couldn't add extent entry %d\n", ret); - return ret; - } - - ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 3 * 1024 * 1024); - if (ret) { - test_msg("Error removing bitmap and extent overlapping %d\n", ret); - return ret; - } - - __btrfs_remove_free_space_cache(cache->free_space_ctl); - return 0; -} - -void btrfs_test_free_space_cache(void) -{ - struct btrfs_block_group_cache *cache; - - test_msg("Running btrfs free space cache tests\n"); - - cache = init_test_block_group(); - if (!cache) { - test_msg("Couldn't run the tests\n"); - return; - } - - if (test_extents(cache)) - goto out; - if (test_bitmaps(cache)) - goto out; - if (test_bitmaps_and_extents(cache)) - goto out; -out: - __btrfs_remove_free_space_cache(cache->free_space_ctl); - kfree(cache->free_space_ctl); - kfree(cache); - test_msg("Free space cache tests finished\n"); -} -#undef test_msg -#else /* !CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ -void btrfs_test_free_space_cache(void) {} -#endif /* !CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ +#endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index 894116b71304..c74904167476 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h @@ -98,8 +98,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root); void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, u64 bytes); -int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +int btrfs_find_space_cluster(struct btrfs_root *root, struct btrfs_block_group_cache *block_group, struct btrfs_free_cluster *cluster, u64 offset, u64 bytes, u64 empty_size); @@ -113,6 +112,12 @@ int btrfs_return_cluster_to_free_space( int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, u64 *trimmed, u64 start, u64 end, u64 minlen); -void btrfs_test_free_space_cache(void); +/* Support functions for runnint our sanity tests */ +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS +int test_add_free_space_entry(struct btrfs_block_group_cache *cache, + u64 offset, u64 bytes, bool bitmap); +int test_check_exists(struct btrfs_block_group_cache *cache, + u64 offset, u64 bytes); +#endif #endif diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7bdc83d04d54..f338c5672d58 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -230,12 +230,13 @@ fail: * does the checks required to make sure the data is small enough * to fit as an inline extent. */ -static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode, u64 start, u64 end, - size_t compressed_size, int compress_type, - struct page **compressed_pages) +static noinline int cow_file_range_inline(struct btrfs_root *root, + struct inode *inode, u64 start, + u64 end, size_t compressed_size, + int compress_type, + struct page **compressed_pages) { + struct btrfs_trans_handle *trans; u64 isize = i_size_read(inode); u64 actual_end = min(end + 1, isize); u64 inline_len = actual_end - start; @@ -256,9 +257,16 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, return 1; } + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) + return PTR_ERR(trans); + trans->block_rsv = &root->fs_info->delalloc_block_rsv; + ret = btrfs_drop_extents(trans, root, inode, start, aligned_end, 1); - if (ret) - return ret; + if (ret) { + btrfs_abort_transaction(trans, root, ret); + goto out; + } if (isize > actual_end) inline_len = min_t(u64, isize, actual_end); @@ -267,15 +275,18 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, compress_type, compressed_pages); if (ret && ret != -ENOSPC) { btrfs_abort_transaction(trans, root, ret); - return ret; + goto out; } else if (ret == -ENOSPC) { - return 1; + ret = 1; + goto out; } set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); btrfs_delalloc_release_metadata(inode, end + 1 - start); btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); - return 0; +out: + btrfs_end_transaction(trans, root); + return ret; } struct async_extent { @@ -343,7 +354,6 @@ static noinline int compress_file_range(struct inode *inode, int *num_added) { struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; u64 num_bytes; u64 blocksize = root->sectorsize; u64 actual_end; @@ -461,45 +471,36 @@ again: } cont: if (start == 0) { - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - trans = NULL; - goto cleanup_and_out; - } - trans->block_rsv = &root->fs_info->delalloc_block_rsv; - /* lets try to make an inline extent */ if (ret || total_in < (actual_end - start)) { /* we didn't compress the entire range, try * to make an uncompressed inline extent. */ - ret = cow_file_range_inline(trans, root, inode, - start, end, 0, 0, NULL); + ret = cow_file_range_inline(root, inode, start, end, + 0, 0, NULL); } else { /* try making a compressed inline extent */ - ret = cow_file_range_inline(trans, root, inode, - start, end, + ret = cow_file_range_inline(root, inode, start, end, total_compressed, compress_type, pages); } if (ret <= 0) { + unsigned long clear_flags = EXTENT_DELALLOC | + EXTENT_DEFRAG; + clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0; + /* * inline extent creation worked or returned error, * we don't need to create any more async work items. * Unlock and free up our temp pages. */ - extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - start, end, NULL, - EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | - EXTENT_CLEAR_DELALLOC | - EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); - - btrfs_end_transaction(trans, root); + extent_clear_unlock_delalloc(inode, start, end, NULL, + clear_flags, PAGE_UNLOCK | + PAGE_CLEAR_DIRTY | + PAGE_SET_WRITEBACK | + PAGE_END_WRITEBACK); goto free_pages_out; } - btrfs_end_transaction(trans, root); } if (will_compress) { @@ -590,20 +591,6 @@ free_pages_out: kfree(pages); goto out; - -cleanup_and_out: - extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, - start, end, NULL, - EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_DIRTY | - EXTENT_CLEAR_DELALLOC | - EXTENT_SET_WRITEBACK | - EXTENT_END_WRITEBACK); - if (!trans || IS_ERR(trans)) - btrfs_error(root->fs_info, ret, "Failed to join transaction"); - else - btrfs_abort_transaction(trans, root, ret); - goto free_pages_out; } /* @@ -617,7 +604,6 @@ static noinline int submit_compressed_extents(struct inode *inode, { struct async_extent *async_extent; u64 alloc_hint = 0; - struct btrfs_trans_handle *trans; struct btrfs_key ins; struct extent_map *em; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -678,20 +664,10 @@ retry: lock_extent(io_tree, async_extent->start, async_extent->start + async_extent->ram_size - 1); - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - } else { - trans->block_rsv = &root->fs_info->delalloc_block_rsv; - ret = btrfs_reserve_extent(trans, root, + ret = btrfs_reserve_extent(root, async_extent->compressed_size, async_extent->compressed_size, 0, alloc_hint, &ins, 1); - if (ret && ret != -ENOSPC) - btrfs_abort_transaction(trans, root, ret); - btrfs_end_transaction(trans, root); - } - if (ret) { int i; @@ -770,16 +746,12 @@ retry: /* * clear dirty, set writeback and unlock the pages. */ - extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - async_extent->start, + extent_clear_unlock_delalloc(inode, async_extent->start, async_extent->start + async_extent->ram_size - 1, - NULL, EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_UNLOCK | - EXTENT_CLEAR_DELALLOC | - EXTENT_CLEAR_DIRTY | EXTENT_SET_WRITEBACK); - + NULL, EXTENT_LOCKED | EXTENT_DELALLOC, + PAGE_UNLOCK | PAGE_CLEAR_DIRTY | + PAGE_SET_WRITEBACK); ret = btrfs_submit_compressed_write(inode, async_extent->start, async_extent->ram_size, @@ -798,16 +770,13 @@ out: out_free_reserve: btrfs_free_reserved_extent(root, ins.objectid, ins.offset); out_free: - extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, - async_extent->start, + extent_clear_unlock_delalloc(inode, async_extent->start, async_extent->start + async_extent->ram_size - 1, - NULL, EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_UNLOCK | - EXTENT_CLEAR_DELALLOC | - EXTENT_CLEAR_DIRTY | - EXTENT_SET_WRITEBACK | - EXTENT_END_WRITEBACK); + NULL, EXTENT_LOCKED | EXTENT_DELALLOC | + EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING, + PAGE_UNLOCK | PAGE_CLEAR_DIRTY | + PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK); kfree(async_extent); goto again; } @@ -857,14 +826,13 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start, * required to start IO on it. It may be clean and already done with * IO when we return. */ -static noinline int __cow_file_range(struct btrfs_trans_handle *trans, - struct inode *inode, - struct btrfs_root *root, - struct page *locked_page, - u64 start, u64 end, int *page_started, - unsigned long *nr_written, - int unlock) +static noinline int cow_file_range(struct inode *inode, + struct page *locked_page, + u64 start, u64 end, int *page_started, + unsigned long *nr_written, + int unlock) { + struct btrfs_root *root = BTRFS_I(inode)->root; u64 alloc_hint = 0; u64 num_bytes; unsigned long ram_size; @@ -885,29 +853,24 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans, /* if this is a small write inside eof, kick off defrag */ if (num_bytes < 64 * 1024 && (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size)) - btrfs_add_inode_defrag(trans, inode); + btrfs_add_inode_defrag(NULL, inode); if (start == 0) { /* lets try to make an inline extent */ - ret = cow_file_range_inline(trans, root, inode, - start, end, 0, 0, NULL); + ret = cow_file_range_inline(root, inode, start, end, 0, 0, + NULL); if (ret == 0) { - extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - start, end, NULL, - EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_UNLOCK | - EXTENT_CLEAR_DELALLOC | - EXTENT_CLEAR_DIRTY | - EXTENT_SET_WRITEBACK | - EXTENT_END_WRITEBACK); + extent_clear_unlock_delalloc(inode, start, end, NULL, + EXTENT_LOCKED | EXTENT_DELALLOC | + EXTENT_DEFRAG, PAGE_UNLOCK | + PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | + PAGE_END_WRITEBACK); *nr_written = *nr_written + (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; *page_started = 1; goto out; } else if (ret < 0) { - btrfs_abort_transaction(trans, root, ret); goto out_unlock; } } @@ -922,13 +885,11 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans, unsigned long op; cur_alloc_size = disk_num_bytes; - ret = btrfs_reserve_extent(trans, root, cur_alloc_size, + ret = btrfs_reserve_extent(root, cur_alloc_size, root->sectorsize, 0, alloc_hint, &ins, 1); - if (ret < 0) { - btrfs_abort_transaction(trans, root, ret); + if (ret < 0) goto out_unlock; - } em = alloc_extent_map(); if (!em) { @@ -974,10 +935,8 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans, BTRFS_DATA_RELOC_TREE_OBJECTID) { ret = btrfs_reloc_clone_csums(inode, start, cur_alloc_size); - if (ret) { - btrfs_abort_transaction(trans, root, ret); + if (ret) goto out_reserve; - } } if (disk_num_bytes < cur_alloc_size) @@ -990,13 +949,13 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans, * Do set the Private2 bit so we know this page was properly * setup for writepage */ - op = unlock ? EXTENT_CLEAR_UNLOCK_PAGE : 0; - op |= EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | - EXTENT_SET_PRIVATE2; + op = unlock ? PAGE_UNLOCK : 0; + op |= PAGE_SET_PRIVATE2; - extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, - start, start + ram_size - 1, - locked_page, op); + extent_clear_unlock_delalloc(inode, start, + start + ram_size - 1, locked_page, + EXTENT_LOCKED | EXTENT_DELALLOC, + op); disk_num_bytes -= cur_alloc_size; num_bytes -= cur_alloc_size; alloc_hint = ins.objectid + ins.offset; @@ -1008,52 +967,14 @@ out: out_reserve: btrfs_free_reserved_extent(root, ins.objectid, ins.offset); out_unlock: - extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - start, end, locked_page, - EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_UNLOCK | - EXTENT_CLEAR_DELALLOC | - EXTENT_CLEAR_DIRTY | - EXTENT_SET_WRITEBACK | - EXTENT_END_WRITEBACK); - + extent_clear_unlock_delalloc(inode, start, end, locked_page, + EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | + EXTENT_DELALLOC | EXTENT_DEFRAG, + PAGE_UNLOCK | PAGE_CLEAR_DIRTY | + PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK); goto out; } -static noinline int cow_file_range(struct inode *inode, - struct page *locked_page, - u64 start, u64 end, int *page_started, - unsigned long *nr_written, - int unlock) -{ - struct btrfs_trans_handle *trans; - struct btrfs_root *root = BTRFS_I(inode)->root; - int ret; - - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - start, end, locked_page, - EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_UNLOCK | - EXTENT_CLEAR_DELALLOC | - EXTENT_CLEAR_DIRTY | - EXTENT_SET_WRITEBACK | - EXTENT_END_WRITEBACK); - return PTR_ERR(trans); - } - trans->block_rsv = &root->fs_info->delalloc_block_rsv; - - ret = __cow_file_range(trans, inode, root, locked_page, start, end, - page_started, nr_written, unlock); - - btrfs_end_transaction(trans, root); - - return ret; -} - /* * work queue call back to started compression on a file and pages */ @@ -1221,15 +1142,13 @@ static noinline int run_delalloc_nocow(struct inode *inode, path = btrfs_alloc_path(); if (!path) { - extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - start, end, locked_page, - EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_UNLOCK | - EXTENT_CLEAR_DELALLOC | - EXTENT_CLEAR_DIRTY | - EXTENT_SET_WRITEBACK | - EXTENT_END_WRITEBACK); + extent_clear_unlock_delalloc(inode, start, end, locked_page, + EXTENT_LOCKED | EXTENT_DELALLOC | + EXTENT_DO_ACCOUNTING | + EXTENT_DEFRAG, PAGE_UNLOCK | + PAGE_CLEAR_DIRTY | + PAGE_SET_WRITEBACK | + PAGE_END_WRITEBACK); return -ENOMEM; } @@ -1241,15 +1160,13 @@ static noinline int run_delalloc_nocow(struct inode *inode, trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { - extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - start, end, locked_page, - EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_UNLOCK | - EXTENT_CLEAR_DELALLOC | - EXTENT_CLEAR_DIRTY | - EXTENT_SET_WRITEBACK | - EXTENT_END_WRITEBACK); + extent_clear_unlock_delalloc(inode, start, end, locked_page, + EXTENT_LOCKED | EXTENT_DELALLOC | + EXTENT_DO_ACCOUNTING | + EXTENT_DEFRAG, PAGE_UNLOCK | + PAGE_CLEAR_DIRTY | + PAGE_SET_WRITEBACK | + PAGE_END_WRITEBACK); btrfs_free_path(path); return PTR_ERR(trans); } @@ -1369,9 +1286,9 @@ out_check: btrfs_release_path(path); if (cow_start != (u64)-1) { - ret = __cow_file_range(trans, inode, root, locked_page, - cow_start, found_key.offset - 1, - page_started, nr_written, 1); + ret = cow_file_range(inode, locked_page, + cow_start, found_key.offset - 1, + page_started, nr_written, 1); if (ret) { btrfs_abort_transaction(trans, root, ret); goto error; @@ -1428,11 +1345,11 @@ out_check: } } - extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, - cur_offset, cur_offset + num_bytes - 1, - locked_page, EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | - EXTENT_SET_PRIVATE2); + extent_clear_unlock_delalloc(inode, cur_offset, + cur_offset + num_bytes - 1, + locked_page, EXTENT_LOCKED | + EXTENT_DELALLOC, PAGE_UNLOCK | + PAGE_SET_PRIVATE2); cur_offset = extent_end; if (cur_offset > end) break; @@ -1445,9 +1362,8 @@ out_check: } if (cow_start != (u64)-1) { - ret = __cow_file_range(trans, inode, root, locked_page, - cow_start, end, - page_started, nr_written, 1); + ret = cow_file_range(inode, locked_page, cow_start, end, + page_started, nr_written, 1); if (ret) { btrfs_abort_transaction(trans, root, ret); goto error; @@ -1460,16 +1376,13 @@ error: ret = err; if (ret && cur_offset < end) - extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - cur_offset, end, locked_page, - EXTENT_CLEAR_UNLOCK_PAGE | - EXTENT_CLEAR_UNLOCK | - EXTENT_CLEAR_DELALLOC | - EXTENT_CLEAR_DIRTY | - EXTENT_SET_WRITEBACK | - EXTENT_END_WRITEBACK); - + extent_clear_unlock_delalloc(inode, cur_offset, end, + locked_page, EXTENT_LOCKED | + EXTENT_DELALLOC | EXTENT_DEFRAG | + EXTENT_DO_ACCOUNTING, PAGE_UNLOCK | + PAGE_CLEAR_DIRTY | + PAGE_SET_WRITEBACK | + PAGE_END_WRITEBACK); btrfs_free_path(path); return ret; } @@ -2132,6 +2045,7 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id, WARN_ON(1); return ret; } + ret = 0; while (1) { cond_resched(); @@ -2181,8 +2095,6 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id, old->len || extent_offset + num_bytes <= old->extent_offset + old->offset) continue; - - ret = 0; break; } @@ -2238,16 +2150,18 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path, static int relink_is_mergable(struct extent_buffer *leaf, struct btrfs_file_extent_item *fi, - u64 disk_bytenr) + struct new_sa_defrag_extent *new) { - if (btrfs_file_extent_disk_bytenr(leaf, fi) != disk_bytenr) + if (btrfs_file_extent_disk_bytenr(leaf, fi) != new->bytenr) return 0; if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG) return 0; - if (btrfs_file_extent_compression(leaf, fi) || - btrfs_file_extent_encryption(leaf, fi) || + if (btrfs_file_extent_compression(leaf, fi) != new->compress_type) + return 0; + + if (btrfs_file_extent_encryption(leaf, fi) || btrfs_file_extent_other_encoding(leaf, fi)) return 0; @@ -2391,8 +2305,8 @@ again: struct btrfs_file_extent_item); extent_len = btrfs_file_extent_num_bytes(leaf, fi); - if (relink_is_mergable(leaf, fi, new->bytenr) && - extent_len + found_key.offset == start) { + if (extent_len + found_key.offset == start && + relink_is_mergable(leaf, fi, new)) { btrfs_set_file_extent_num_bytes(leaf, fi, extent_len + len); btrfs_mark_buffer_dirty(leaf); @@ -2648,8 +2562,10 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) struct extent_state *cached_state = NULL; struct new_sa_defrag_extent *new = NULL; int compress_type = 0; - int ret; + int ret = 0; + u64 logical_len = ordered_extent->len; bool nolock; + bool truncated = false; nolock = btrfs_is_free_space_inode(inode); @@ -2658,6 +2574,14 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) goto out; } + if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) { + truncated = true; + logical_len = ordered_extent->truncated_len; + /* Truncated the entire extent, don't bother adding */ + if (!logical_len) + goto out; + } + if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */ btrfs_ordered_update_i_size(inode, 0, ordered_extent); @@ -2713,15 +2637,14 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) ret = btrfs_mark_extent_written(trans, inode, ordered_extent->file_offset, ordered_extent->file_offset + - ordered_extent->len); + logical_len); } else { BUG_ON(root == root->fs_info->tree_root); ret = insert_reserved_file_extent(trans, inode, ordered_extent->file_offset, ordered_extent->start, ordered_extent->disk_len, - ordered_extent->len, - ordered_extent->len, + logical_len, logical_len, compress_type, 0, 0, BTRFS_FILE_EXTENT_REG); } @@ -2753,17 +2676,27 @@ out: if (trans) btrfs_end_transaction(trans, root); - if (ret) { - clear_extent_uptodate(io_tree, ordered_extent->file_offset, - ordered_extent->file_offset + - ordered_extent->len - 1, NULL, GFP_NOFS); + if (ret || truncated) { + u64 start, end; + + if (truncated) + start = ordered_extent->file_offset + logical_len; + else + start = ordered_extent->file_offset; + end = ordered_extent->file_offset + ordered_extent->len - 1; + clear_extent_uptodate(io_tree, start, end, NULL, GFP_NOFS); + + /* Drop the cache for the part of the extent we didn't write. */ + btrfs_drop_extent_cache(inode, start, end, 0); /* * If the ordered extent had an IOERR or something else went * wrong we need to return the space for this ordered extent - * back to the allocator. + * back to the allocator. We only free the extent in the + * truncated case if we didn't write out the extent at all. */ - if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) && + if ((ret || !logical_len) && + !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) && !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) btrfs_free_reserved_extent(root, ordered_extent->start, ordered_extent->disk_len); @@ -2827,16 +2760,16 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, * if there's a match, we allow the bio to finish. If not, the code in * extent_io.c will try to find good copies for us. */ -static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, - struct extent_state *state, int mirror) +static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio, + u64 phy_offset, struct page *page, + u64 start, u64 end, int mirror) { size_t offset = start - page_offset(page); struct inode *inode = page->mapping->host; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; char *kaddr; - u64 private = ~(u32)0; - int ret; struct btrfs_root *root = BTRFS_I(inode)->root; + u32 csum_expected; u32 csum = ~(u32)0; static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); @@ -2856,19 +2789,13 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, return 0; } - if (state && state->start == start) { - private = state->private; - ret = 0; - } else { - ret = get_state_private(io_tree, start, &private); - } - kaddr = kmap_atomic(page); - if (ret) - goto zeroit; + phy_offset >>= inode->i_sb->s_blocksize_bits; + csum_expected = *(((u32 *)io_bio->csum) + phy_offset); + kaddr = kmap_atomic(page); csum = btrfs_csum_data(kaddr + offset, csum, end - start + 1); btrfs_csum_final(csum, (char *)&csum); - if (csum != private) + if (csum != csum_expected) goto zeroit; kunmap_atomic(kaddr); @@ -2877,14 +2804,12 @@ good: zeroit: if (__ratelimit(&_rs)) - btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u private %llu", - (unsigned long long)btrfs_ino(page->mapping->host), - (unsigned long long)start, csum, - (unsigned long long)private); + btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u", + btrfs_ino(page->mapping->host), start, csum, csum_expected); memset(kaddr + offset, 1, end - start + 1); flush_dcache_page(page); kunmap_atomic(kaddr); - if (private == 0) + if (csum_expected == 0) return 0; return -EIO; } @@ -2971,8 +2896,10 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, btrfs_root_refs(&root->root_item) > 0) { ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root, root->root_key.objectid); - BUG_ON(ret); - root->orphan_item_inserted = 0; + if (ret) + btrfs_abort_transaction(trans, root, ret); + else + root->orphan_item_inserted = 0; } if (block_rsv) { @@ -3041,11 +2968,18 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) /* insert an orphan item to track this unlinked/truncated file */ if (insert >= 1) { ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); - if (ret && ret != -EEXIST) { - clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, - &BTRFS_I(inode)->runtime_flags); - btrfs_abort_transaction(trans, root, ret); - return ret; + if (ret) { + if (reserve) { + clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, + &BTRFS_I(inode)->runtime_flags); + btrfs_orphan_release_metadata(inode); + } + if (ret != -EEXIST) { + clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, + &BTRFS_I(inode)->runtime_flags); + btrfs_abort_transaction(trans, root, ret); + return ret; + } } ret = 0; } @@ -3084,17 +3018,15 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans, release_rsv = 1; spin_unlock(&root->orphan_lock); - if (trans && delete_item) { + if (trans && delete_item) ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode)); - BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ - } if (release_rsv) { btrfs_orphan_release_metadata(inode); atomic_dec(&root->orphan_inodes); } - return 0; + return ret; } /* @@ -3224,8 +3156,9 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) found_key.objectid); ret = btrfs_del_orphan_item(trans, root, found_key.objectid); - BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ btrfs_end_transaction(trans, root); + if (ret) + goto out; continue; } @@ -3657,8 +3590,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, if (ret) { btrfs_info(root->fs_info, "failed to delete reference to %.*s, inode %llu parent %llu", - name_len, name, - (unsigned long long)ino, (unsigned long long)dir_ino); + name_len, name, ino, dir_ino); btrfs_abort_transaction(trans, root, ret); goto err; } @@ -3929,6 +3861,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, u64 extent_num_bytes = 0; u64 extent_offset = 0; u64 item_end = 0; + u64 last_size = (u64)-1; u32 found_type = (u8)-1; int found_extent; int del_item; @@ -4026,6 +3959,11 @@ search_again: if (found_type != BTRFS_EXTENT_DATA_KEY) goto delete; + if (del_item) + last_size = found_key.offset; + else + last_size = new_size; + if (extent_type != BTRFS_FILE_EXTENT_INLINE) { u64 num_dec; extent_start = btrfs_file_extent_disk_bytenr(leaf, fi); @@ -4137,6 +4075,8 @@ out: btrfs_abort_transaction(trans, root, ret); } error: + if (last_size != (u64)-1) + btrfs_ordered_update_i_size(inode, last_size, NULL); btrfs_free_path(path); return err; } @@ -4409,7 +4349,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb); if (newsize > oldsize) { - truncate_pagecache(inode, oldsize, newsize); + truncate_pagecache(inode, newsize); ret = btrfs_cont_expand(inode, oldsize, newsize); if (ret) return ret; @@ -4465,8 +4405,26 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) btrfs_inode_resume_unlocked_dio(inode); ret = btrfs_truncate(inode); - if (ret && inode->i_nlink) - btrfs_orphan_del(NULL, inode); + if (ret && inode->i_nlink) { + int err; + + /* + * failed to truncate, disk_i_size is only adjusted down + * as we remove extents, so it should represent the true + * size of the inode, so reset the in memory size and + * delete our orphan entry. + */ + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) { + btrfs_orphan_del(NULL, inode); + return ret; + } + i_size_write(inode, BTRFS_I(inode)->disk_i_size); + err = btrfs_orphan_del(trans, inode); + if (err) + btrfs_abort_transaction(trans, root, err); + btrfs_end_transaction(trans, root); + } } return ret; @@ -4601,10 +4559,15 @@ void btrfs_evict_inode(struct inode *inode) btrfs_free_block_rsv(root, rsv); + /* + * Errors here aren't a big deal, it just means we leave orphan items + * in the tree. They will be cleaned up on the next mount. + */ if (ret == 0) { trans->block_rsv = root->orphan_block_rsv; - ret = btrfs_orphan_del(trans, inode); - BUG_ON(ret); + btrfs_orphan_del(trans, inode); + } else { + btrfs_orphan_del(NULL, inode); } trans->block_rsv = &root->fs_info->trans_block_rsv; @@ -6161,10 +6124,7 @@ insert: btrfs_release_path(path); if (em->start > start || extent_map_end(em) <= start) { btrfs_err(root->fs_info, "bad extent! em: [%llu %llu] passed [%llu %llu]", - (unsigned long long)em->start, - (unsigned long long)em->len, - (unsigned long long)start, - (unsigned long long)len); + em->start, em->len, start, len); err = -EIO; goto out; } @@ -6362,39 +6322,32 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, u64 start, u64 len) { struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; struct extent_map *em; struct btrfs_key ins; u64 alloc_hint; int ret; - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) - return ERR_CAST(trans); - - trans->block_rsv = &root->fs_info->delalloc_block_rsv; - alloc_hint = get_extent_allocation_hint(inode, start, len); - ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0, + ret = btrfs_reserve_extent(root, len, root->sectorsize, 0, alloc_hint, &ins, 1); - if (ret) { - em = ERR_PTR(ret); - goto out; - } + if (ret) + return ERR_PTR(ret); em = create_pinned_em(inode, start, ins.offset, start, ins.objectid, ins.offset, ins.offset, ins.offset, 0); - if (IS_ERR(em)) - goto out; + if (IS_ERR(em)) { + btrfs_free_reserved_extent(root, ins.objectid, ins.offset); + return em; + } ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, ins.offset, ins.offset, 0); if (ret) { btrfs_free_reserved_extent(root, ins.objectid, ins.offset); - em = ERR_PTR(ret); + free_extent_map(em); + return ERR_PTR(ret); } -out: - btrfs_end_transaction(trans, root); + return em; } @@ -6402,11 +6355,11 @@ out: * returns 1 when the nocow is safe, < 1 on error, 0 if the * block must be cow'd */ -noinline int can_nocow_extent(struct btrfs_trans_handle *trans, - struct inode *inode, u64 offset, u64 *len, +noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, u64 *orig_start, u64 *orig_block_len, u64 *ram_bytes) { + struct btrfs_trans_handle *trans; struct btrfs_path *path; int ret; struct extent_buffer *leaf; @@ -6424,7 +6377,7 @@ noinline int can_nocow_extent(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode), + ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode), offset, 0); if (ret < 0) goto out; @@ -6489,9 +6442,19 @@ noinline int can_nocow_extent(struct btrfs_trans_handle *trans, * look for other files referencing this extent, if we * find any we must cow */ - if (btrfs_cross_ref_exist(trans, root, btrfs_ino(inode), - key.offset - backref_offset, disk_bytenr)) + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) { + ret = 0; goto out; + } + + ret = btrfs_cross_ref_exist(trans, root, btrfs_ino(inode), + key.offset - backref_offset, disk_bytenr); + btrfs_end_transaction(trans, root); + if (ret) { + ret = 0; + goto out; + } /* * adjust disk_bytenr and num_bytes to cover just the bytes @@ -6633,7 +6596,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, u64 start = iblock << inode->i_blkbits; u64 lockstart, lockend; u64 len = bh_result->b_size; - struct btrfs_trans_handle *trans; int unlock_bits = EXTENT_LOCKED; int ret = 0; @@ -6715,16 +6677,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, len = min(len, em->len - (start - em->start)); block_start = em->block_start + (start - em->start); - /* - * we're not going to log anything, but we do need - * to make sure the current transaction stays open - * while we look for nocow cross refs - */ - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) - goto must_cow; - - if (can_nocow_extent(trans, inode, start, &len, &orig_start, + if (can_nocow_extent(inode, start, &len, &orig_start, &orig_block_len, &ram_bytes) == 1) { if (type == BTRFS_ORDERED_PREALLOC) { free_extent_map(em); @@ -6733,24 +6686,20 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, block_start, len, orig_block_len, ram_bytes, type); - if (IS_ERR(em)) { - btrfs_end_transaction(trans, root); + if (IS_ERR(em)) goto unlock_err; - } } ret = btrfs_add_ordered_extent_dio(inode, start, block_start, len, len, type); - btrfs_end_transaction(trans, root); if (ret) { free_extent_map(em); goto unlock_err; } goto unlock; } - btrfs_end_transaction(trans, root); } -must_cow: + /* * this will cow the extent, reset the len in case we changed * it above @@ -6813,26 +6762,6 @@ unlock_err: return ret; } -struct btrfs_dio_private { - struct inode *inode; - u64 logical_offset; - u64 disk_bytenr; - u64 bytes; - void *private; - - /* number of bios pending for this dio */ - atomic_t pending_bios; - - /* IO errors */ - int errors; - - /* orig_bio is our btrfs_io_bio */ - struct bio *orig_bio; - - /* dio_bio came from fs/direct-io.c */ - struct bio *dio_bio; -}; - static void btrfs_endio_direct_read(struct bio *bio, int err) { struct btrfs_dio_private *dip = bio->bi_private; @@ -6841,6 +6770,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) struct inode *inode = dip->inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct bio *dio_bio; + u32 *csums = (u32 *)dip->csum; + int index = 0; u64 start; start = dip->logical_offset; @@ -6849,12 +6780,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) struct page *page = bvec->bv_page; char *kaddr; u32 csum = ~(u32)0; - u64 private = ~(u32)0; unsigned long flags; - if (get_state_private(&BTRFS_I(inode)->io_tree, - start, &private)) - goto failed; local_irq_save(flags); kaddr = kmap_atomic(page); csum = btrfs_csum_data(kaddr + bvec->bv_offset, @@ -6864,18 +6791,17 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) local_irq_restore(flags); flush_dcache_page(bvec->bv_page); - if (csum != private) { -failed: - btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u private %u", - (unsigned long long)btrfs_ino(inode), - (unsigned long long)start, - csum, (unsigned)private); + if (csum != csums[index]) { + btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u", + btrfs_ino(inode), start, csum, + csums[index]); err = -EIO; } } start += bvec->bv_len; bvec++; + index++; } while (bvec <= bvec_end); unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, @@ -6956,7 +6882,7 @@ static void btrfs_end_dio_bio(struct bio *bio, int err) if (err) { printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu " "sector %#Lx len %u err no %d\n", - (unsigned long long)btrfs_ino(dip->inode), bio->bi_rw, + btrfs_ino(dip->inode), bio->bi_rw, (unsigned long long)bio->bi_sector, bio->bi_size, err); dip->errors = 1; @@ -6992,6 +6918,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, int rw, u64 file_offset, int skip_sum, int async_submit) { + struct btrfs_dio_private *dip = bio->bi_private; int write = rw & REQ_WRITE; struct btrfs_root *root = BTRFS_I(inode)->root; int ret; @@ -7026,7 +6953,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, if (ret) goto err; } else if (!skip_sum) { - ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset); + ret = btrfs_lookup_bio_sums_dio(root, inode, dip, bio, + file_offset); if (ret) goto err; } @@ -7061,6 +6989,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, bio_put(orig_bio); return -EIO; } + if (map_length >= orig_bio->bi_size) { bio = orig_bio; goto submit; @@ -7156,19 +7085,28 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, struct btrfs_dio_private *dip; struct bio *io_bio; int skip_sum; + int sum_len; int write = rw & REQ_WRITE; int ret = 0; + u16 csum_size; skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS); - if (!io_bio) { ret = -ENOMEM; goto free_ordered; } - dip = kmalloc(sizeof(*dip), GFP_NOFS); + if (!skip_sum && !write) { + csum_size = btrfs_super_csum_size(root->fs_info->super_copy); + sum_len = dio_bio->bi_size >> inode->i_sb->s_blocksize_bits; + sum_len *= csum_size; + } else { + sum_len = 0; + } + + dip = kmalloc(sizeof(*dip) + sum_len, GFP_NOFS); if (!dip) { ret = -ENOMEM; goto free_io_bio; @@ -7443,10 +7381,23 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset, * whoever cleared the private bit is responsible * for the finish_ordered_io */ - if (TestClearPagePrivate2(page) && - btrfs_dec_test_ordered_pending(inode, &ordered, page_start, - PAGE_CACHE_SIZE, 1)) { - btrfs_finish_ordered_io(ordered); + if (TestClearPagePrivate2(page)) { + struct btrfs_ordered_inode_tree *tree; + u64 new_len; + + tree = &BTRFS_I(inode)->ordered_tree; + + spin_lock_irq(&tree->lock); + set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags); + new_len = page_start - ordered->file_offset; + if (new_len < ordered->truncated_len) + ordered->truncated_len = new_len; + spin_unlock_irq(&tree->lock); + + if (btrfs_dec_test_ordered_pending(inode, &ordered, + page_start, + PAGE_CACHE_SIZE, 1)) + btrfs_finish_ordered_io(ordered); } btrfs_put_ordered_extent(ordered); cached_state = NULL; @@ -7612,7 +7563,6 @@ static int btrfs_truncate(struct inode *inode) u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); - btrfs_ordered_update_i_size(inode, inode->i_size, NULL); /* * Yes ladies and gentelment, this is indeed ugly. The fact is we have @@ -7876,7 +7826,7 @@ void btrfs_destroy_inode(struct inode *inode) if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, &BTRFS_I(inode)->runtime_flags)) { btrfs_info(root->fs_info, "inode %llu still on the orphan list", - (unsigned long long)btrfs_ino(inode)); + btrfs_ino(inode)); atomic_dec(&root->orphan_inodes); } @@ -7886,8 +7836,7 @@ void btrfs_destroy_inode(struct inode *inode) break; else { btrfs_err(root->fs_info, "found ordered extent %llu %llu on inode cleanup", - (unsigned long long)ordered->file_offset, - (unsigned long long)ordered->len); + ordered->file_offset, ordered->len); btrfs_remove_ordered_extent(inode, ordered); btrfs_put_ordered_extent(ordered); btrfs_put_ordered_extent(ordered); @@ -8161,10 +8110,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, new_dentry->d_name.name, new_dentry->d_name.len); } - if (!ret && new_inode->i_nlink == 0) { + if (!ret && new_inode->i_nlink == 0) ret = btrfs_orphan_add(trans, new_dentry->d_inode); - BUG_ON(ret); - } if (ret) { btrfs_abort_transaction(trans, root, ret); goto out_fail; @@ -8525,8 +8472,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, cur_bytes = min(num_bytes, 256ULL * 1024 * 1024); cur_bytes = max(cur_bytes, min_size); - ret = btrfs_reserve_extent(trans, root, cur_bytes, - min_size, 0, *alloc_hint, &ins, 1); + ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0, + *alloc_hint, &ins, 1); if (ret) { if (own_trans) btrfs_end_transaction(trans, root); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 238a05545ee2..1a5b9462dd9a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -43,6 +43,7 @@ #include <linux/blkdev.h> #include <linux/uuid.h> #include <linux/btrfs.h> +#include <linux/uaccess.h> #include "compat.h" #include "ctree.h" #include "disk-io.h" @@ -57,6 +58,9 @@ #include "send.h" #include "dev-replace.h" +static int btrfs_clone(struct inode *src, struct inode *inode, + u64 off, u64 olen, u64 olen_aligned, u64 destoff); + /* Mask out flags that are inappropriate for the given type of inode. */ static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) { @@ -363,6 +367,13 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) return 0; } +int btrfs_is_empty_uuid(u8 *uuid) +{ + static char empty_uuid[BTRFS_UUID_SIZE] = {0}; + + return !memcmp(uuid, empty_uuid, BTRFS_UUID_SIZE); +} + static noinline int create_subvol(struct inode *dir, struct dentry *dentry, char *name, int namelen, @@ -396,7 +407,7 @@ static noinline int create_subvol(struct inode *dir, * of create_snapshot(). */ ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, - 7, &qgroup_reserved); + 8, &qgroup_reserved, false); if (ret) return ret; @@ -425,26 +436,25 @@ static noinline int create_subvol(struct inode *dir, btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(leaf, objectid); - write_extent_buffer(leaf, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(leaf), + write_extent_buffer(leaf, root->fs_info->fsid, btrfs_header_fsid(leaf), BTRFS_FSID_SIZE); write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(leaf), + btrfs_header_chunk_tree_uuid(leaf), BTRFS_UUID_SIZE); btrfs_mark_buffer_dirty(leaf); memset(&root_item, 0, sizeof(root_item)); inode_item = &root_item.inode; - inode_item->generation = cpu_to_le64(1); - inode_item->size = cpu_to_le64(3); - inode_item->nlink = cpu_to_le32(1); - inode_item->nbytes = cpu_to_le64(root->leafsize); - inode_item->mode = cpu_to_le32(S_IFDIR | 0755); + btrfs_set_stack_inode_generation(inode_item, 1); + btrfs_set_stack_inode_size(inode_item, 3); + btrfs_set_stack_inode_nlink(inode_item, 1); + btrfs_set_stack_inode_nbytes(inode_item, root->leafsize); + btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); - root_item.flags = 0; - root_item.byte_limit = 0; - inode_item->flags = cpu_to_le64(BTRFS_INODE_ROOT_ITEM_INIT); + btrfs_set_root_flags(&root_item, 0); + btrfs_set_root_limit(&root_item, 0); + btrfs_set_stack_inode_flags(inode_item, BTRFS_INODE_ROOT_ITEM_INIT); btrfs_set_root_bytenr(&root_item, leaf->start); btrfs_set_root_generation(&root_item, trans->transid); @@ -457,8 +467,8 @@ static noinline int create_subvol(struct inode *dir, btrfs_root_generation(&root_item)); uuid_le_gen(&new_uuid); memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE); - root_item.otime.sec = cpu_to_le64(cur_time.tv_sec); - root_item.otime.nsec = cpu_to_le32(cur_time.tv_nsec); + btrfs_set_stack_timespec_sec(&root_item.otime, cur_time.tv_sec); + btrfs_set_stack_timespec_nsec(&root_item.otime, cur_time.tv_nsec); root_item.ctime = root_item.otime; btrfs_set_root_ctransid(&root_item, trans->transid); btrfs_set_root_otransid(&root_item, trans->transid); @@ -518,9 +528,14 @@ static noinline int create_subvol(struct inode *dir, ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, objectid, root->root_key.objectid, btrfs_ino(dir), index, name, namelen); - BUG_ON(ret); + ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root, + root_item.uuid, BTRFS_UUID_KEY_SUBVOL, + objectid); + if (ret) + btrfs_abort_transaction(trans, root, ret); + fail: trans->block_rsv = NULL; trans->bytes_reserved = 0; @@ -573,10 +588,12 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, * 1 - root item * 2 - root ref/backref * 1 - root of snapshot + * 1 - UUID item */ ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root, - &pending_snapshot->block_rsv, 7, - &pending_snapshot->qgroup_reserved); + &pending_snapshot->block_rsv, 8, + &pending_snapshot->qgroup_reserved, + false); if (ret) goto out; @@ -1267,9 +1284,6 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, cluster = max_cluster; } - if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) - BTRFS_I(inode)->force_compress = compress_type; - if (i + cluster > ra_index) { ra_index = max(i, ra_index); btrfs_force_ra(inode->i_mapping, ra, file, ra_index, @@ -1278,6 +1292,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, } mutex_lock(&inode->i_mutex); + if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) + BTRFS_I(inode)->force_compress = compress_type; ret = cluster_pages_for_defrag(inode, pages, i, cluster); if (ret < 0) { mutex_unlock(&inode->i_mutex); @@ -1334,10 +1350,6 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, atomic_read(&root->fs_info->async_delalloc_pages) == 0)); } atomic_dec(&root->fs_info->async_submit_draining); - - mutex_lock(&inode->i_mutex); - BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE; - mutex_unlock(&inode->i_mutex); } if (range->compress_type == BTRFS_COMPRESS_LZO) { @@ -1347,6 +1359,11 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, ret = defrag_count; out_ra: + if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { + mutex_lock(&inode->i_mutex); + BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE; + mutex_unlock(&inode->i_mutex); + } if (!file) kfree(ra); kfree(pages); @@ -1377,9 +1394,8 @@ static noinline int btrfs_ioctl_resize(struct file *file, if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 1)) { - pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); mnt_drop_write_file(file); - return -EINVAL; + return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; } mutex_lock(&root->fs_info->volume_mutex); @@ -1403,14 +1419,13 @@ static noinline int btrfs_ioctl_resize(struct file *file, ret = -EINVAL; goto out_free; } - printk(KERN_INFO "btrfs: resizing devid %llu\n", - (unsigned long long)devid); + printk(KERN_INFO "btrfs: resizing devid %llu\n", devid); } device = btrfs_find_device(root->fs_info, devid, NULL, NULL); if (!device) { printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", - (unsigned long long)devid); + devid); ret = -ENODEV; goto out_free; } @@ -1418,7 +1433,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, if (!device->writeable) { printk(KERN_INFO "btrfs: resizer unable to apply on " "readonly device %llu\n", - (unsigned long long)devid); + devid); ret = -EPERM; goto out_free; } @@ -1470,8 +1485,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, new_size *= root->sectorsize; printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n", - rcu_str_deref(device->name), - (unsigned long long)new_size); + rcu_str_deref(device->name), new_size); if (new_size > old_size) { trans = btrfs_start_transaction(root, 0); @@ -1721,13 +1735,28 @@ out: static noinline int may_destroy_subvol(struct btrfs_root *root) { struct btrfs_path *path; + struct btrfs_dir_item *di; struct btrfs_key key; + u64 dir_id; int ret; path = btrfs_alloc_path(); if (!path) return -ENOMEM; + /* Make sure this root isn't set as the default subvol */ + dir_id = btrfs_super_root_dir(root->fs_info->super_copy); + di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, path, + dir_id, "default", 7, 0); + if (di && !IS_ERR(di)) { + btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); + if (key.objectid == root->root_key.objectid) { + ret = -ENOTEMPTY; + goto out; + } + btrfs_release_path(path); + } + key.objectid = root->root_key.objectid; key.type = BTRFS_ROOT_REF_KEY; key.offset = (u64)-1; @@ -1993,25 +2022,29 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto out; + else if (ret > 0) { + ret = btrfs_previous_item(root, path, dirid, + BTRFS_INODE_REF_KEY); + if (ret < 0) + goto out; + else if (ret > 0) { + ret = -ENOENT; + goto out; + } + } l = path->nodes[0]; slot = path->slots[0]; - if (ret > 0 && slot > 0) - slot--; btrfs_item_key_to_cpu(l, &key, slot); - if (ret > 0 && (key.objectid != dirid || - key.type != BTRFS_INODE_REF_KEY)) { - ret = -ENOENT; - goto out; - } - iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref); len = btrfs_inode_ref_name_len(l, iref); ptr -= len + 1; total_len += len + 1; - if (ptr < name) + if (ptr < name) { + ret = -ENAMETOOLONG; goto out; + } *(ptr + len) = '/'; read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len); @@ -2024,8 +2057,6 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, key.offset = (u64)-1; dirid = key.objectid; } - if (ptr < name) - goto out; memmove(name, ptr, total_len); name[total_len]='\0'; ret = 0; @@ -2174,7 +2205,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, * ref/backref. */ err = btrfs_subvolume_reserve_metadata(root, &block_rsv, - 5, &qgroup_reserved); + 5, &qgroup_reserved, true); if (err) goto out_up_write; @@ -2213,6 +2244,27 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, goto out_end_trans; } } + + ret = btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root, + dest->root_item.uuid, BTRFS_UUID_KEY_SUBVOL, + dest->root_key.objectid); + if (ret && ret != -ENOENT) { + btrfs_abort_transaction(trans, root, ret); + err = ret; + goto out_end_trans; + } + if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) { + ret = btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root, + dest->root_item.received_uuid, + BTRFS_UUID_KEY_RECEIVED_SUBVOL, + dest->root_key.objectid); + if (ret && ret != -ENOENT) { + btrfs_abort_transaction(trans, root, ret); + err = ret; + goto out_end_trans; + } + } + out_end_trans: trans->block_rsv = NULL; trans->bytes_reserved = 0; @@ -2326,8 +2378,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 1)) { - pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); - return -EINVAL; + return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; } mutex_lock(&root->fs_info->volume_mutex); @@ -2400,10 +2451,10 @@ static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg) if (!fi_args) return -ENOMEM; + mutex_lock(&fs_devices->device_list_mutex); fi_args->num_devices = fs_devices->num_devices; memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid)); - mutex_lock(&fs_devices->device_list_mutex); list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { if (device->devid > fi_args->max_id) fi_args->max_id = device->devid; @@ -2424,7 +2475,6 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; int ret = 0; char *s_uuid = NULL; - char empty_uuid[BTRFS_UUID_SIZE] = {0}; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -2433,7 +2483,7 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) if (IS_ERR(di_args)) return PTR_ERR(di_args); - if (memcmp(empty_uuid, di_args->uuid, BTRFS_UUID_SIZE) != 0) + if (!btrfs_is_empty_uuid(di_args->uuid)) s_uuid = di_args->uuid; mutex_lock(&fs_devices->device_list_mutex); @@ -2469,150 +2519,336 @@ out: return ret; } -static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, - u64 off, u64 olen, u64 destoff) +static struct page *extent_same_get_page(struct inode *inode, u64 off) +{ + struct page *page; + pgoff_t index; + struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; + + index = off >> PAGE_CACHE_SHIFT; + + page = grab_cache_page(inode->i_mapping, index); + if (!page) + return NULL; + + if (!PageUptodate(page)) { + if (extent_read_full_page_nolock(tree, page, btrfs_get_extent, + 0)) + return NULL; + lock_page(page); + if (!PageUptodate(page)) { + unlock_page(page); + page_cache_release(page); + return NULL; + } + } + unlock_page(page); + + return page; +} + +static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) +{ + /* do any pending delalloc/csum calc on src, one way or + another, and lock file content */ + while (1) { + struct btrfs_ordered_extent *ordered; + lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); + ordered = btrfs_lookup_first_ordered_extent(inode, + off + len - 1); + if (!ordered && + !test_range_bit(&BTRFS_I(inode)->io_tree, off, + off + len - 1, EXTENT_DELALLOC, 0, NULL)) + break; + unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); + if (ordered) + btrfs_put_ordered_extent(ordered); + btrfs_wait_ordered_range(inode, off, len); + } +} + +static void btrfs_double_unlock(struct inode *inode1, u64 loff1, + struct inode *inode2, u64 loff2, u64 len) +{ + unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); + unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); + + mutex_unlock(&inode1->i_mutex); + mutex_unlock(&inode2->i_mutex); +} + +static void btrfs_double_lock(struct inode *inode1, u64 loff1, + struct inode *inode2, u64 loff2, u64 len) +{ + if (inode1 < inode2) { + swap(inode1, inode2); + swap(loff1, loff2); + } + + mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); + lock_extent_range(inode1, loff1, len); + if (inode1 != inode2) { + mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); + lock_extent_range(inode2, loff2, len); + } +} + +static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, + u64 dst_loff, u64 len) +{ + int ret = 0; + struct page *src_page, *dst_page; + unsigned int cmp_len = PAGE_CACHE_SIZE; + void *addr, *dst_addr; + + while (len) { + if (len < PAGE_CACHE_SIZE) + cmp_len = len; + + src_page = extent_same_get_page(src, loff); + if (!src_page) + return -EINVAL; + dst_page = extent_same_get_page(dst, dst_loff); + if (!dst_page) { + page_cache_release(src_page); + return -EINVAL; + } + addr = kmap_atomic(src_page); + dst_addr = kmap_atomic(dst_page); + + flush_dcache_page(src_page); + flush_dcache_page(dst_page); + + if (memcmp(addr, dst_addr, cmp_len)) + ret = BTRFS_SAME_DATA_DIFFERS; + + kunmap_atomic(addr); + kunmap_atomic(dst_addr); + page_cache_release(src_page); + page_cache_release(dst_page); + + if (ret) + break; + + loff += cmp_len; + dst_loff += cmp_len; + len -= cmp_len; + } + + return ret; +} + +static int extent_same_check_offsets(struct inode *inode, u64 off, u64 len) +{ + u64 bs = BTRFS_I(inode)->root->fs_info->sb->s_blocksize; + + if (off + len > inode->i_size || off + len < off) + return -EINVAL; + /* Check that we are block aligned - btrfs_clone() requires this */ + if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs)) + return -EINVAL; + + return 0; +} + +static int btrfs_extent_same(struct inode *src, u64 loff, u64 len, + struct inode *dst, u64 dst_loff) { - struct inode *inode = file_inode(file); - struct btrfs_root *root = BTRFS_I(inode)->root; - struct fd src_file; - struct inode *src; - struct btrfs_trans_handle *trans; - struct btrfs_path *path; - struct extent_buffer *leaf; - char *buf; - struct btrfs_key key; - u32 nritems; - int slot; int ret; - u64 len = olen; - u64 bs = root->fs_info->sb->s_blocksize; - int same_inode = 0; /* - * TODO: - * - split compressed inline extents. annoying: we need to - * decompress into destination's address_space (the file offset - * may change, so source mapping won't do), then recompress (or - * otherwise reinsert) a subrange. - * - allow ranges within the same file to be cloned (provided - * they don't overlap)? + * btrfs_clone() can't handle extents in the same file + * yet. Once that works, we can drop this check and replace it + * with a check for the same inode, but overlapping extents. */ - - /* the destination must be opened for writing */ - if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) + if (src == dst) return -EINVAL; - if (btrfs_root_readonly(root)) - return -EROFS; + btrfs_double_lock(src, loff, dst, dst_loff, len); + + ret = extent_same_check_offsets(src, loff, len); + if (ret) + goto out_unlock; + + ret = extent_same_check_offsets(dst, dst_loff, len); + if (ret) + goto out_unlock; + + /* don't make the dst file partly checksummed */ + if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != + (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) { + ret = -EINVAL; + goto out_unlock; + } + + ret = btrfs_cmp_data(src, loff, dst, dst_loff, len); + if (ret == 0) + ret = btrfs_clone(src, dst, loff, len, len, dst_loff); + +out_unlock: + btrfs_double_unlock(src, loff, dst, dst_loff, len); + + return ret; +} + +#define BTRFS_MAX_DEDUPE_LEN (16 * 1024 * 1024) + +static long btrfs_ioctl_file_extent_same(struct file *file, + void __user *argp) +{ + struct btrfs_ioctl_same_args *args = argp; + struct btrfs_ioctl_same_args same; + struct btrfs_ioctl_same_extent_info info; + struct inode *src = file->f_dentry->d_inode; + struct file *dst_file = NULL; + struct inode *dst; + u64 off; + u64 len; + int i; + int ret; + u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; + bool is_admin = capable(CAP_SYS_ADMIN); + + if (!(file->f_mode & FMODE_READ)) + return -EINVAL; ret = mnt_want_write_file(file); if (ret) return ret; - src_file = fdget(srcfd); - if (!src_file.file) { - ret = -EBADF; - goto out_drop_write; + if (copy_from_user(&same, + (struct btrfs_ioctl_same_args __user *)argp, + sizeof(same))) { + ret = -EFAULT; + goto out; } - ret = -EXDEV; - if (src_file.file->f_path.mnt != file->f_path.mnt) - goto out_fput; + off = same.logical_offset; + len = same.length; - src = file_inode(src_file.file); + /* + * Limit the total length we will dedupe for each operation. + * This is intended to bound the total time spent in this + * ioctl to something sane. + */ + if (len > BTRFS_MAX_DEDUPE_LEN) + len = BTRFS_MAX_DEDUPE_LEN; - ret = -EINVAL; - if (src == inode) - same_inode = 1; + if (WARN_ON_ONCE(bs < PAGE_CACHE_SIZE)) { + /* + * Btrfs does not support blocksize < page_size. As a + * result, btrfs_cmp_data() won't correctly handle + * this situation without an update. + */ + ret = -EINVAL; + goto out; + } - /* the src must be open for reading */ - if (!(src_file.file->f_mode & FMODE_READ)) - goto out_fput; + ret = -EISDIR; + if (S_ISDIR(src->i_mode)) + goto out; - /* don't make the dst file partly checksummed */ - if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != - (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) - goto out_fput; + ret = -EACCES; + if (!S_ISREG(src->i_mode)) + goto out; - ret = -EISDIR; - if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) - goto out_fput; + ret = 0; + for (i = 0; i < same.dest_count; i++) { + if (copy_from_user(&info, &args->info[i], sizeof(info))) { + ret = -EFAULT; + goto out; + } - ret = -EXDEV; - if (src->i_sb != inode->i_sb) - goto out_fput; + info.bytes_deduped = 0; - ret = -ENOMEM; - buf = vmalloc(btrfs_level_size(root, 0)); - if (!buf) - goto out_fput; + dst_file = fget(info.fd); + if (!dst_file) { + info.status = -EBADF; + goto next; + } - path = btrfs_alloc_path(); - if (!path) { - vfree(buf); - goto out_fput; - } - path->reada = 2; + if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) { + info.status = -EINVAL; + goto next; + } - if (!same_inode) { - if (inode < src) { - mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD); - } else { - mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); + info.status = -EXDEV; + if (file->f_path.mnt != dst_file->f_path.mnt) + goto next; + + dst = dst_file->f_dentry->d_inode; + if (src->i_sb != dst->i_sb) + goto next; + + if (S_ISDIR(dst->i_mode)) { + info.status = -EISDIR; + goto next; } - } else { - mutex_lock(&src->i_mutex); - } - /* determine range to clone */ - ret = -EINVAL; - if (off + len > src->i_size || off + len < off) - goto out_unlock; - if (len == 0) - olen = len = src->i_size - off; - /* if we extend to eof, continue to block boundary */ - if (off + len == src->i_size) - len = ALIGN(src->i_size, bs) - off; + if (!S_ISREG(dst->i_mode)) { + info.status = -EACCES; + goto next; + } - /* verify the end result is block aligned */ - if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || - !IS_ALIGNED(destoff, bs)) - goto out_unlock; + info.status = btrfs_extent_same(src, off, len, dst, + info.logical_offset); + if (info.status == 0) + info.bytes_deduped += len; - /* verify if ranges are overlapped within the same file */ - if (same_inode) { - if (destoff + len > off && destoff < off + len) - goto out_unlock; - } +next: + if (dst_file) + fput(dst_file); - if (destoff > inode->i_size) { - ret = btrfs_cont_expand(inode, inode->i_size, destoff); - if (ret) - goto out_unlock; + if (__put_user_unaligned(info.status, &args->info[i].status) || + __put_user_unaligned(info.bytes_deduped, + &args->info[i].bytes_deduped)) { + ret = -EFAULT; + goto out; + } } - /* truncate page cache pages from target inode range */ - truncate_inode_pages_range(&inode->i_data, destoff, - PAGE_CACHE_ALIGN(destoff + len) - 1); +out: + mnt_drop_write_file(file); + return ret; +} - /* do any pending delalloc/csum calc on src, one way or - another, and lock file content */ - while (1) { - struct btrfs_ordered_extent *ordered; - lock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); - ordered = btrfs_lookup_first_ordered_extent(src, off + len - 1); - if (!ordered && - !test_range_bit(&BTRFS_I(src)->io_tree, off, off + len - 1, - EXTENT_DELALLOC, 0, NULL)) - break; - unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); - if (ordered) - btrfs_put_ordered_extent(ordered); - btrfs_wait_ordered_range(src, off, len); +/** + * btrfs_clone() - clone a range from inode file to another + * + * @src: Inode to clone from + * @inode: Inode to clone to + * @off: Offset within source to start clone from + * @olen: Original length, passed by user, of range to clone + * @olen_aligned: Block-aligned value of olen, extent_same uses + * identical values here + * @destoff: Offset within @inode to start clone + */ +static int btrfs_clone(struct inode *src, struct inode *inode, + u64 off, u64 olen, u64 olen_aligned, u64 destoff) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_path *path = NULL; + struct extent_buffer *leaf; + struct btrfs_trans_handle *trans; + char *buf = NULL; + struct btrfs_key key; + u32 nritems; + int slot; + int ret; + u64 len = olen_aligned; + + ret = -ENOMEM; + buf = vmalloc(btrfs_level_size(root, 0)); + if (!buf) + return ret; + + path = btrfs_alloc_path(); + if (!path) { + vfree(buf); + return ret; } + path->reada = 2; /* clone data */ key.objectid = btrfs_ino(src); key.type = BTRFS_EXTENT_DATA_KEY; @@ -2858,15 +3094,132 @@ next: key.offset++; } ret = 0; + out: btrfs_release_path(path); + btrfs_free_path(path); + vfree(buf); + return ret; +} + +static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, + u64 off, u64 olen, u64 destoff) +{ + struct inode *inode = fdentry(file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct fd src_file; + struct inode *src; + int ret; + u64 len = olen; + u64 bs = root->fs_info->sb->s_blocksize; + int same_inode = 0; + + /* + * TODO: + * - split compressed inline extents. annoying: we need to + * decompress into destination's address_space (the file offset + * may change, so source mapping won't do), then recompress (or + * otherwise reinsert) a subrange. + * - allow ranges within the same file to be cloned (provided + * they don't overlap)? + */ + + /* the destination must be opened for writing */ + if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) + return -EINVAL; + + if (btrfs_root_readonly(root)) + return -EROFS; + + ret = mnt_want_write_file(file); + if (ret) + return ret; + + src_file = fdget(srcfd); + if (!src_file.file) { + ret = -EBADF; + goto out_drop_write; + } + + ret = -EXDEV; + if (src_file.file->f_path.mnt != file->f_path.mnt) + goto out_fput; + + src = file_inode(src_file.file); + + ret = -EINVAL; + if (src == inode) + same_inode = 1; + + /* the src must be open for reading */ + if (!(src_file.file->f_mode & FMODE_READ)) + goto out_fput; + + /* don't make the dst file partly checksummed */ + if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != + (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) + goto out_fput; + + ret = -EISDIR; + if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) + goto out_fput; + + ret = -EXDEV; + if (src->i_sb != inode->i_sb) + goto out_fput; + + if (!same_inode) { + if (inode < src) { + mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD); + } else { + mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); + } + } else { + mutex_lock(&src->i_mutex); + } + + /* determine range to clone */ + ret = -EINVAL; + if (off + len > src->i_size || off + len < off) + goto out_unlock; + if (len == 0) + olen = len = src->i_size - off; + /* if we extend to eof, continue to block boundary */ + if (off + len == src->i_size) + len = ALIGN(src->i_size, bs) - off; + + /* verify the end result is block aligned */ + if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || + !IS_ALIGNED(destoff, bs)) + goto out_unlock; + + /* verify if ranges are overlapped within the same file */ + if (same_inode) { + if (destoff + len > off && destoff < off + len) + goto out_unlock; + } + + if (destoff > inode->i_size) { + ret = btrfs_cont_expand(inode, inode->i_size, destoff); + if (ret) + goto out_unlock; + } + + /* truncate page cache pages from target inode range */ + truncate_inode_pages_range(&inode->i_data, destoff, + PAGE_CACHE_ALIGN(destoff + len) - 1); + + lock_extent_range(src, off, len); + + ret = btrfs_clone(src, inode, off, olen, len, destoff); + unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); out_unlock: mutex_unlock(&src->i_mutex); if (!same_inode) mutex_unlock(&inode->i_mutex); - vfree(buf); - btrfs_free_path(path); out_fput: fdput(src_file); out_drop_write: @@ -3312,11 +3665,13 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg) switch (p->cmd) { case BTRFS_IOCTL_DEV_REPLACE_CMD_START: + if (root->fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; + if (atomic_xchg( &root->fs_info->mutually_exclusive_operation_running, 1)) { - pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); - ret = -EINPROGRESS; + ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; } else { ret = btrfs_dev_replace_start(root, p); atomic_set( @@ -3560,8 +3915,7 @@ again: } else { /* this is (1) */ mutex_unlock(&fs_info->balance_mutex); - pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); - ret = -EINVAL; + ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; goto out; } @@ -3967,6 +4321,7 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, struct btrfs_trans_handle *trans; struct timespec ct = CURRENT_TIME; int ret = 0; + int received_uuid_changed; ret = mnt_want_write_file(file); if (ret < 0) @@ -3996,7 +4351,11 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, goto out; } - trans = btrfs_start_transaction(root, 1); + /* + * 1 - root item + * 2 - uuid items (received uuid + subvol uuid) + */ + trans = btrfs_start_transaction(root, 3); if (IS_ERR(trans)) { ret = PTR_ERR(trans); trans = NULL; @@ -4007,24 +4366,42 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, sa->rtime.sec = ct.tv_sec; sa->rtime.nsec = ct.tv_nsec; + received_uuid_changed = memcmp(root_item->received_uuid, sa->uuid, + BTRFS_UUID_SIZE); + if (received_uuid_changed && + !btrfs_is_empty_uuid(root_item->received_uuid)) + btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root, + root_item->received_uuid, + BTRFS_UUID_KEY_RECEIVED_SUBVOL, + root->root_key.objectid); memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE); btrfs_set_root_stransid(root_item, sa->stransid); btrfs_set_root_rtransid(root_item, sa->rtransid); - root_item->stime.sec = cpu_to_le64(sa->stime.sec); - root_item->stime.nsec = cpu_to_le32(sa->stime.nsec); - root_item->rtime.sec = cpu_to_le64(sa->rtime.sec); - root_item->rtime.nsec = cpu_to_le32(sa->rtime.nsec); + btrfs_set_stack_timespec_sec(&root_item->stime, sa->stime.sec); + btrfs_set_stack_timespec_nsec(&root_item->stime, sa->stime.nsec); + btrfs_set_stack_timespec_sec(&root_item->rtime, sa->rtime.sec); + btrfs_set_stack_timespec_nsec(&root_item->rtime, sa->rtime.nsec); ret = btrfs_update_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); if (ret < 0) { btrfs_end_transaction(trans, root); - trans = NULL; goto out; - } else { - ret = btrfs_commit_transaction(trans, root); - if (ret < 0) + } + if (received_uuid_changed && !btrfs_is_empty_uuid(sa->uuid)) { + ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root, + sa->uuid, + BTRFS_UUID_KEY_RECEIVED_SUBVOL, + root->root_key.objectid); + if (ret < 0 && ret != -EEXIST) { + btrfs_abort_transaction(trans, root, ret); goto out; + } + } + ret = btrfs_commit_transaction(trans, root); + if (ret < 0) { + btrfs_abort_transaction(trans, root, ret); + goto out; } ret = copy_to_user(arg, sa, sizeof(*sa)); @@ -4041,18 +4418,22 @@ out: static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg) { struct btrfs_root *root = BTRFS_I(file_inode(file))->root; - const char *label = root->fs_info->super_copy->label; - size_t len = strnlen(label, BTRFS_LABEL_SIZE); + size_t len; int ret; + char label[BTRFS_LABEL_SIZE]; + + spin_lock(&root->fs_info->super_lock); + memcpy(label, root->fs_info->super_copy->label, BTRFS_LABEL_SIZE); + spin_unlock(&root->fs_info->super_lock); + + len = strnlen(label, BTRFS_LABEL_SIZE); if (len == BTRFS_LABEL_SIZE) { pr_warn("btrfs: label is too long, return the first %zu bytes\n", --len); } - mutex_lock(&root->fs_info->volume_mutex); ret = copy_to_user(arg, label, len); - mutex_unlock(&root->fs_info->volume_mutex); return ret ? -EFAULT : 0; } @@ -4081,18 +4462,18 @@ static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg) if (ret) return ret; - mutex_lock(&root->fs_info->volume_mutex); trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out_unlock; } + spin_lock(&root->fs_info->super_lock); strcpy(super_block->label, label); + spin_unlock(&root->fs_info->super_lock); ret = btrfs_end_transaction(trans, root); out_unlock: - mutex_unlock(&root->fs_info->volume_mutex); mnt_drop_write_file(file); return ret; } @@ -4207,6 +4588,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_get_fslabel(file, argp); case BTRFS_IOC_SET_FSLABEL: return btrfs_ioctl_set_fslabel(file, argp); + case BTRFS_IOC_FILE_EXTENT_SAME: + return btrfs_ioctl_file_extent_same(file, argp); } return -ENOTTY; diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index f93151a98886..b6a6f07c5ce2 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c @@ -207,8 +207,10 @@ static int lzo_compress_pages(struct list_head *ws, } /* we're making it bigger, give up */ - if (tot_in > 8192 && tot_in < tot_out) + if (tot_in > 8192 && tot_in < tot_out) { + ret = -1; goto out; + } /* we're all done */ if (tot_in >= len) diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 81369827e514..966b413a33b8 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -67,7 +67,7 @@ static void ordered_data_tree_panic(struct inode *inode, int errno, { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); btrfs_panic(fs_info, errno, "Inconsistency in ordered tree at offset " - "%llu\n", (unsigned long long)offset); + "%llu\n", offset); } /* @@ -205,6 +205,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, entry->bytes_left = len; entry->inode = igrab(inode); entry->compress_type = compress_type; + entry->truncated_len = (u64)-1; if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) set_bit(type, &entry->flags); @@ -336,14 +337,12 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode, *file_offset = dec_end; if (dec_start > dec_end) { printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n", - (unsigned long long)dec_start, - (unsigned long long)dec_end); + dec_start, dec_end); } to_dec = dec_end - dec_start; if (to_dec > entry->bytes_left) { printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n", - (unsigned long long)entry->bytes_left, - (unsigned long long)to_dec); + entry->bytes_left, to_dec); } entry->bytes_left -= to_dec; if (!uptodate) @@ -403,8 +402,7 @@ have_entry: if (io_size > entry->bytes_left) { printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n", - (unsigned long long)entry->bytes_left, - (unsigned long long)io_size); + entry->bytes_left, io_size); } entry->bytes_left -= io_size; if (!uptodate) @@ -671,7 +669,7 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans, INIT_LIST_HEAD(&splice); INIT_LIST_HEAD(&works); - mutex_lock(&root->fs_info->ordered_operations_mutex); + mutex_lock(&root->fs_info->ordered_extent_flush_mutex); spin_lock(&root->fs_info->ordered_root_lock); list_splice_init(&cur_trans->ordered_operations, &splice); while (!list_empty(&splice)) { @@ -718,7 +716,7 @@ out: list_del_init(&work->list); btrfs_wait_and_free_delalloc_work(work); } - mutex_unlock(&root->fs_info->ordered_operations_mutex); + mutex_unlock(&root->fs_info->ordered_extent_flush_mutex); return ret; } @@ -923,12 +921,16 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, struct btrfs_ordered_extent *test; int ret = 1; - if (ordered) + spin_lock_irq(&tree->lock); + if (ordered) { offset = entry_end(ordered); - else + if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags)) + offset = min(offset, + ordered->file_offset + + ordered->truncated_len); + } else { offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize); - - spin_lock_irq(&tree->lock); + } disk_i_size = BTRFS_I(inode)->disk_i_size; /* truncate file */ diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 68844d59ee6f..d9a5aa097b4f 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -69,6 +69,7 @@ struct btrfs_ordered_sum { * the isize. */ #define BTRFS_ORDERED_LOGGED_CSUM 8 /* We've logged the csums on this ordered ordered extent */ +#define BTRFS_ORDERED_TRUNCATED 9 /* Set when we have to truncate an extent */ struct btrfs_ordered_extent { /* logical offset in the file */ @@ -96,6 +97,12 @@ struct btrfs_ordered_extent { */ u64 outstanding_isize; + /* + * If we get truncated we need to adjust the file extent we enter for + * this ordered extent so that we do not expose stale data. + */ + u64 truncated_len; + /* flags (described above) */ unsigned long flags; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index dc0024f17c1f..0088bedc8631 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -26,14 +26,12 @@ static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk) int i; printk(KERN_INFO "\t\tchunk length %llu owner %llu type %llu " "num_stripes %d\n", - (unsigned long long)btrfs_chunk_length(eb, chunk), - (unsigned long long)btrfs_chunk_owner(eb, chunk), - (unsigned long long)btrfs_chunk_type(eb, chunk), - num_stripes); + btrfs_chunk_length(eb, chunk), btrfs_chunk_owner(eb, chunk), + btrfs_chunk_type(eb, chunk), num_stripes); for (i = 0 ; i < num_stripes ; i++) { printk(KERN_INFO "\t\t\tstripe %d devid %llu offset %llu\n", i, - (unsigned long long)btrfs_stripe_devid_nr(eb, chunk, i), - (unsigned long long)btrfs_stripe_offset_nr(eb, chunk, i)); + btrfs_stripe_devid_nr(eb, chunk, i), + btrfs_stripe_offset_nr(eb, chunk, i)); } } static void print_dev_item(struct extent_buffer *eb, @@ -41,18 +39,18 @@ static void print_dev_item(struct extent_buffer *eb, { printk(KERN_INFO "\t\tdev item devid %llu " "total_bytes %llu bytes used %llu\n", - (unsigned long long)btrfs_device_id(eb, dev_item), - (unsigned long long)btrfs_device_total_bytes(eb, dev_item), - (unsigned long long)btrfs_device_bytes_used(eb, dev_item)); + btrfs_device_id(eb, dev_item), + btrfs_device_total_bytes(eb, dev_item), + btrfs_device_bytes_used(eb, dev_item)); } static void print_extent_data_ref(struct extent_buffer *eb, struct btrfs_extent_data_ref *ref) { printk(KERN_INFO "\t\textent data backref root %llu " "objectid %llu offset %llu count %u\n", - (unsigned long long)btrfs_extent_data_ref_root(eb, ref), - (unsigned long long)btrfs_extent_data_ref_objectid(eb, ref), - (unsigned long long)btrfs_extent_data_ref_offset(eb, ref), + btrfs_extent_data_ref_root(eb, ref), + btrfs_extent_data_ref_objectid(eb, ref), + btrfs_extent_data_ref_offset(eb, ref), btrfs_extent_data_ref_count(eb, ref)); } @@ -87,19 +85,17 @@ static void print_extent_item(struct extent_buffer *eb, int slot) flags = btrfs_extent_flags(eb, ei); printk(KERN_INFO "\t\textent refs %llu gen %llu flags %llu\n", - (unsigned long long)btrfs_extent_refs(eb, ei), - (unsigned long long)btrfs_extent_generation(eb, ei), - (unsigned long long)flags); + btrfs_extent_refs(eb, ei), btrfs_extent_generation(eb, ei), + flags); if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { struct btrfs_tree_block_info *info; info = (struct btrfs_tree_block_info *)(ei + 1); btrfs_tree_block_key(eb, info, &key); - printk(KERN_INFO "\t\ttree block key (%llu %x %llu) " + printk(KERN_INFO "\t\ttree block key (%llu %u %llu) " "level %d\n", - (unsigned long long)btrfs_disk_key_objectid(&key), - key.type, - (unsigned long long)btrfs_disk_key_offset(&key), + btrfs_disk_key_objectid(&key), key.type, + btrfs_disk_key_offset(&key), btrfs_tree_block_level(eb, info)); iref = (struct btrfs_extent_inline_ref *)(info + 1); } else { @@ -115,11 +111,11 @@ static void print_extent_item(struct extent_buffer *eb, int slot) switch (type) { case BTRFS_TREE_BLOCK_REF_KEY: printk(KERN_INFO "\t\ttree block backref " - "root %llu\n", (unsigned long long)offset); + "root %llu\n", offset); break; case BTRFS_SHARED_BLOCK_REF_KEY: printk(KERN_INFO "\t\tshared block backref " - "parent %llu\n", (unsigned long long)offset); + "parent %llu\n", offset); break; case BTRFS_EXTENT_DATA_REF_KEY: dref = (struct btrfs_extent_data_ref *)(&iref->offset); @@ -129,8 +125,7 @@ static void print_extent_item(struct extent_buffer *eb, int slot) sref = (struct btrfs_shared_data_ref *)(iref + 1); printk(KERN_INFO "\t\tshared data backref " "parent %llu count %u\n", - (unsigned long long)offset, - btrfs_shared_data_ref_count(eb, sref)); + offset, btrfs_shared_data_ref_count(eb, sref)); break; default: BUG(); @@ -148,13 +143,32 @@ static void print_extent_ref_v0(struct extent_buffer *eb, int slot) ref0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_ref_v0); printk("\t\textent back ref root %llu gen %llu " "owner %llu num_refs %lu\n", - (unsigned long long)btrfs_ref_root_v0(eb, ref0), - (unsigned long long)btrfs_ref_generation_v0(eb, ref0), - (unsigned long long)btrfs_ref_objectid_v0(eb, ref0), + btrfs_ref_root_v0(eb, ref0), + btrfs_ref_generation_v0(eb, ref0), + btrfs_ref_objectid_v0(eb, ref0), (unsigned long)btrfs_ref_count_v0(eb, ref0)); } #endif +static void print_uuid_item(struct extent_buffer *l, unsigned long offset, + u32 item_size) +{ + if (!IS_ALIGNED(item_size, sizeof(u64))) { + pr_warn("btrfs: uuid item with illegal size %lu!\n", + (unsigned long)item_size); + return; + } + while (item_size) { + __le64 subvol_id; + + read_extent_buffer(l, &subvol_id, offset, sizeof(subvol_id)); + printk(KERN_INFO "\t\tsubvol_id %llu\n", + (unsigned long long)le64_to_cpu(subvol_id)); + item_size -= sizeof(u64); + offset += sizeof(u64); + } +} + void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) { int i; @@ -177,39 +191,34 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) nr = btrfs_header_nritems(l); btrfs_info(root->fs_info, "leaf %llu total ptrs %d free space %d", - (unsigned long long)btrfs_header_bytenr(l), nr, - btrfs_leaf_free_space(root, l)); + btrfs_header_bytenr(l), nr, btrfs_leaf_free_space(root, l)); for (i = 0 ; i < nr ; i++) { item = btrfs_item_nr(l, i); btrfs_item_key_to_cpu(l, &key, i); type = btrfs_key_type(&key); - printk(KERN_INFO "\titem %d key (%llu %x %llu) itemoff %d " + printk(KERN_INFO "\titem %d key (%llu %u %llu) itemoff %d " "itemsize %d\n", - i, - (unsigned long long)key.objectid, type, - (unsigned long long)key.offset, + i, key.objectid, type, key.offset, btrfs_item_offset(l, item), btrfs_item_size(l, item)); switch (type) { case BTRFS_INODE_ITEM_KEY: ii = btrfs_item_ptr(l, i, struct btrfs_inode_item); printk(KERN_INFO "\t\tinode generation %llu size %llu " "mode %o\n", - (unsigned long long) btrfs_inode_generation(l, ii), - (unsigned long long)btrfs_inode_size(l, ii), + btrfs_inode_size(l, ii), btrfs_inode_mode(l, ii)); break; case BTRFS_DIR_ITEM_KEY: di = btrfs_item_ptr(l, i, struct btrfs_dir_item); btrfs_dir_item_key_to_cpu(l, di, &found_key); printk(KERN_INFO "\t\tdir oid %llu type %u\n", - (unsigned long long)found_key.objectid, + found_key.objectid, btrfs_dir_type(l, di)); break; case BTRFS_ROOT_ITEM_KEY: ri = btrfs_item_ptr(l, i, struct btrfs_root_item); printk(KERN_INFO "\t\troot data bytenr %llu refs %u\n", - (unsigned long long) btrfs_disk_root_bytenr(l, ri), btrfs_disk_root_refs(l, ri)); break; @@ -245,17 +254,12 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) } printk(KERN_INFO "\t\textent data disk bytenr %llu " "nr %llu\n", - (unsigned long long) btrfs_file_extent_disk_bytenr(l, fi), - (unsigned long long) btrfs_file_extent_disk_num_bytes(l, fi)); printk(KERN_INFO "\t\textent data offset %llu " "nr %llu ram %llu\n", - (unsigned long long) btrfs_file_extent_offset(l, fi), - (unsigned long long) btrfs_file_extent_num_bytes(l, fi), - (unsigned long long) btrfs_file_extent_ram_bytes(l, fi)); break; case BTRFS_EXTENT_REF_V0_KEY: @@ -269,7 +273,6 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) bi = btrfs_item_ptr(l, i, struct btrfs_block_group_item); printk(KERN_INFO "\t\tblock group used %llu\n", - (unsigned long long) btrfs_disk_block_group_used(l, bi)); break; case BTRFS_CHUNK_ITEM_KEY: @@ -286,13 +289,9 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) printk(KERN_INFO "\t\tdev extent chunk_tree %llu\n" "\t\tchunk objectid %llu chunk offset %llu " "length %llu\n", - (unsigned long long) btrfs_dev_extent_chunk_tree(l, dev_extent), - (unsigned long long) btrfs_dev_extent_chunk_objectid(l, dev_extent), - (unsigned long long) btrfs_dev_extent_chunk_offset(l, dev_extent), - (unsigned long long) btrfs_dev_extent_length(l, dev_extent)); break; case BTRFS_DEV_STATS_KEY: @@ -301,6 +300,11 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) case BTRFS_DEV_REPLACE_KEY: printk(KERN_INFO "\t\tdev replace\n"); break; + case BTRFS_UUID_KEY_SUBVOL: + case BTRFS_UUID_KEY_RECEIVED_SUBVOL: + print_uuid_item(l, btrfs_item_ptr_offset(l, i), + btrfs_item_size_nr(l, i)); + break; }; } } @@ -320,16 +324,13 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) return; } btrfs_info(root->fs_info, "node %llu level %d total ptrs %d free spc %u", - (unsigned long long)btrfs_header_bytenr(c), - level, nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); + btrfs_header_bytenr(c), level, nr, + (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); for (i = 0; i < nr; i++) { btrfs_node_key_to_cpu(c, &key, i); printk(KERN_INFO "\tkey %d (%llu %u %llu) block %llu\n", - i, - (unsigned long long)key.objectid, - key.type, - (unsigned long long)key.offset, - (unsigned long long)btrfs_node_blockptr(c, i)); + i, key.objectid, key.type, key.offset, + btrfs_node_blockptr(c, i)); } for (i = 0; i < nr; i++) { struct extent_buffer *next = read_tree_block(root, diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 1280eff8af56..4e6ef490619e 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -157,18 +157,11 @@ static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info, return qgroup; } -/* must be called with qgroup_lock held */ -static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) +static void __del_qgroup_rb(struct btrfs_qgroup *qgroup) { - struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid); struct btrfs_qgroup_list *list; - if (!qgroup) - return -ENOENT; - - rb_erase(&qgroup->node, &fs_info->qgroup_tree); list_del(&qgroup->dirty); - while (!list_empty(&qgroup->groups)) { list = list_first_entry(&qgroup->groups, struct btrfs_qgroup_list, next_group); @@ -185,7 +178,18 @@ static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) kfree(list); } kfree(qgroup); +} +/* must be called with qgroup_lock held */ +static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) +{ + struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid); + + if (!qgroup) + return -ENOENT; + + rb_erase(&qgroup->node, &fs_info->qgroup_tree); + __del_qgroup_rb(qgroup); return 0; } @@ -394,8 +398,7 @@ next1: if (ret == -ENOENT) { printk(KERN_WARNING "btrfs: orphan qgroup relation 0x%llx->0x%llx\n", - (unsigned long long)found_key.objectid, - (unsigned long long)found_key.offset); + found_key.objectid, found_key.offset); ret = 0; /* ignore the error */ } if (ret) @@ -428,39 +431,28 @@ out: } /* - * This is only called from close_ctree() or open_ctree(), both in single- - * treaded paths. Clean up the in-memory structures. No locking needed. + * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(), + * first two are in single-threaded paths.And for the third one, we have set + * quota_root to be null with qgroup_lock held before, so it is safe to clean + * up the in-memory structures without qgroup_lock held. */ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) { struct rb_node *n; struct btrfs_qgroup *qgroup; - struct btrfs_qgroup_list *list; while ((n = rb_first(&fs_info->qgroup_tree))) { qgroup = rb_entry(n, struct btrfs_qgroup, node); rb_erase(n, &fs_info->qgroup_tree); - - while (!list_empty(&qgroup->groups)) { - list = list_first_entry(&qgroup->groups, - struct btrfs_qgroup_list, - next_group); - list_del(&list->next_group); - list_del(&list->next_member); - kfree(list); - } - - while (!list_empty(&qgroup->members)) { - list = list_first_entry(&qgroup->members, - struct btrfs_qgroup_list, - next_member); - list_del(&list->next_group); - list_del(&list->next_member); - kfree(list); - } - kfree(qgroup); + __del_qgroup_rb(qgroup); } + /* + * we call btrfs_free_qgroup_config() when umounting + * filesystem and disabling quota, so we set qgroup_ulit + * to be null here to avoid double free. + */ ulist_free(fs_info->qgroup_ulist); + fs_info->qgroup_ulist = NULL; } static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, @@ -946,13 +938,9 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, fs_info->pending_quota_state = 0; quota_root = fs_info->quota_root; fs_info->quota_root = NULL; - btrfs_free_qgroup_config(fs_info); spin_unlock(&fs_info->qgroup_lock); - if (!quota_root) { - ret = -EINVAL; - goto out; - } + btrfs_free_qgroup_config(fs_info); ret = btrfs_clean_quota_tree(trans, quota_root); if (ret) @@ -1174,7 +1162,7 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, if (ret) { fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; printk(KERN_INFO "unable to update quota limit for %llu\n", - (unsigned long long)qgroupid); + qgroupid); } spin_lock(&fs_info->qgroup_lock); @@ -1884,10 +1872,9 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, path, 1, 0); pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n", - (unsigned long long)fs_info->qgroup_rescan_progress.objectid, + fs_info->qgroup_rescan_progress.objectid, fs_info->qgroup_rescan_progress.type, - (unsigned long long)fs_info->qgroup_rescan_progress.offset, - ret); + fs_info->qgroup_rescan_progress.offset, ret); if (ret) { /* diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 0525e1389f5b..d0ecfbd9cc9f 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1540,8 +1540,10 @@ static int full_stripe_write(struct btrfs_raid_bio *rbio) int ret; ret = alloc_rbio_parity_pages(rbio); - if (ret) + if (ret) { + __free_raid_bio(rbio); return ret; + } ret = lock_stripe_add(rbio); if (ret == 0) @@ -1687,11 +1689,8 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio, struct blk_plug_cb *cb; rbio = alloc_rbio(root, bbio, raid_map, stripe_len); - if (IS_ERR(rbio)) { - kfree(raid_map); - kfree(bbio); + if (IS_ERR(rbio)) return PTR_ERR(rbio); - } bio_list_add(&rbio->bio_list, bio); rbio->bio_list_bytes = bio->bi_size; @@ -2041,9 +2040,8 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, int ret; rbio = alloc_rbio(root, bbio, raid_map, stripe_len); - if (IS_ERR(rbio)) { + if (IS_ERR(rbio)) return PTR_ERR(rbio); - } rbio->read_rebuild = 1; bio_list_add(&rbio->bio_list, bio); @@ -2052,6 +2050,8 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, rbio->faila = find_logical_bio_stripe(rbio, bio); if (rbio->faila == -1) { BUG(); + kfree(raid_map); + kfree(bbio); kfree(rbio); return -EIO; } diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 12096496cc99..aacc2121e87c 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -335,7 +335,7 @@ static void backref_tree_panic(struct rb_node *rb_node, int errno, u64 bytenr) if (bnode->root) fs_info = bnode->root->fs_info; btrfs_panic(fs_info, errno, "Inconsistency in backref cache " - "found at offset %llu\n", (unsigned long long)bytenr); + "found at offset %llu\n", bytenr); } /* @@ -641,6 +641,11 @@ int find_inline_backref(struct extent_buffer *leaf, int slot, WARN_ON(item_size < sizeof(*ei) + sizeof(*bi)); return 1; } + if (key.type == BTRFS_METADATA_ITEM_KEY && + item_size <= sizeof(*ei)) { + WARN_ON(item_size < sizeof(*ei)); + return 1; + } if (key.type == BTRFS_EXTENT_ITEM_KEY) { bi = (struct btrfs_tree_block_info *)(ei + 1); @@ -691,6 +696,7 @@ struct backref_node *build_backref_tree(struct reloc_control *rc, int cowonly; int ret; int err = 0; + bool need_check = true; path1 = btrfs_alloc_path(); path2 = btrfs_alloc_path(); @@ -914,6 +920,7 @@ again: cur->bytenr); lower = cur; + need_check = true; for (; level < BTRFS_MAX_LEVEL; level++) { if (!path2->nodes[level]) { BUG_ON(btrfs_root_bytenr(&root->root_item) != @@ -957,14 +964,12 @@ again: /* * add the block to pending list if we - * need check its backrefs. only block - * at 'cur->level + 1' is added to the - * tail of pending list. this guarantees - * we check backrefs from lower level - * blocks to upper level blocks. + * need check its backrefs, we only do this once + * while walking up a tree as we will catch + * anything else later on. */ - if (!upper->checked && - level == cur->level + 1) { + if (!upper->checked && need_check) { + need_check = false; list_add_tail(&edge->list[UPPER], &list); } else @@ -2314,8 +2319,13 @@ again: BUG_ON(root->reloc_root != reloc_root); ret = merge_reloc_root(rc, root); - if (ret) + if (ret) { + __update_reloc_root(reloc_root, 1); + free_extent_buffer(reloc_root->node); + free_extent_buffer(reloc_root->commit_root); + kfree(reloc_root); goto out; + } } else { list_del_init(&reloc_root->root_list); } @@ -2344,9 +2354,6 @@ again: if (IS_ERR(root)) continue; - if (btrfs_root_refs(&root->root_item) == 0) - continue; - trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); @@ -3628,7 +3635,7 @@ int add_data_references(struct reloc_control *rc, unsigned long ptr; unsigned long end; u32 blocksize = btrfs_level_size(rc->extent_root, 0); - int ret; + int ret = 0; int err = 0; eb = path->nodes[0]; @@ -3655,6 +3662,10 @@ int add_data_references(struct reloc_control *rc, } else { BUG(); } + if (ret) { + err = ret; + goto out; + } ptr += btrfs_extent_inline_ref_size(key.type); } WARN_ON(ptr > end); @@ -3700,6 +3711,7 @@ int add_data_references(struct reloc_control *rc, } path->slots[0]++; } +out: btrfs_release_path(path); if (err) free_block_list(blocks); @@ -4219,8 +4231,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) } printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n", - (unsigned long long)rc->block_group->key.objectid, - (unsigned long long)rc->block_group->flags); + rc->block_group->key.objectid, rc->block_group->flags); ret = btrfs_start_all_delalloc_inodes(fs_info, 0); if (ret < 0) { @@ -4242,7 +4253,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) break; printk(KERN_INFO "btrfs: found %llu extents\n", - (unsigned long long)rc->extents_found); + rc->extents_found); if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) { btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index ffb1036ef10d..0b1f4ef8db98 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -29,8 +29,8 @@ * generation numbers as then we know the root was once mounted with an older * kernel that was not aware of the root item structure change. */ -void btrfs_read_root_item(struct extent_buffer *eb, int slot, - struct btrfs_root_item *item) +static void btrfs_read_root_item(struct extent_buffer *eb, int slot, + struct btrfs_root_item *item) { uuid_le uuid; int len; @@ -155,8 +155,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root if (ret != 0) { btrfs_print_leaf(root, path->nodes[0]); printk(KERN_CRIT "unable to update root key %llu %u %llu\n", - (unsigned long long)key->objectid, key->type, - (unsigned long long)key->offset); + key->objectid, key->type, key->offset); BUG_ON(1); } @@ -490,13 +489,13 @@ again: */ void btrfs_check_and_init_root_item(struct btrfs_root_item *root_item) { - u64 inode_flags = le64_to_cpu(root_item->inode.flags); + u64 inode_flags = btrfs_stack_inode_flags(&root_item->inode); if (!(inode_flags & BTRFS_INODE_ROOT_ITEM_INIT)) { inode_flags |= BTRFS_INODE_ROOT_ITEM_INIT; - root_item->inode.flags = cpu_to_le64(inode_flags); - root_item->flags = 0; - root_item->byte_limit = 0; + btrfs_set_stack_inode_flags(&root_item->inode, inode_flags); + btrfs_set_root_flags(root_item, 0); + btrfs_set_root_limit(root_item, 0); } } @@ -507,8 +506,8 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans, struct timespec ct = CURRENT_TIME; spin_lock(&root->root_item_lock); - item->ctransid = cpu_to_le64(trans->transid); - item->ctime.sec = cpu_to_le64(ct.tv_sec); - item->ctime.nsec = cpu_to_le32(ct.tv_nsec); + btrfs_set_root_ctransid(item, trans->transid); + btrfs_set_stack_timespec_sec(&item->ctime, ct.tv_sec); + btrfs_set_stack_timespec_nsec(&item->ctime, ct.tv_nsec); spin_unlock(&root->root_item_lock); } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 64a157becbe5..0afcd452fcb3 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -754,8 +754,7 @@ out: num_uncorrectable_read_errors); printk_ratelimited_in_rcu(KERN_ERR "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n", - (unsigned long long)fixup->logical, - rcu_str_deref(fixup->dev->name)); + fixup->logical, rcu_str_deref(fixup->dev->name)); } btrfs_free_path(path); @@ -1154,8 +1153,7 @@ corrected_error: spin_unlock(&sctx->stat_lock); printk_ratelimited_in_rcu(KERN_ERR "btrfs: fixed up error at logical %llu on dev %s\n", - (unsigned long long)logical, - rcu_str_deref(dev->name)); + logical, rcu_str_deref(dev->name)); } } else { did_not_correct_error: @@ -1164,8 +1162,7 @@ did_not_correct_error: spin_unlock(&sctx->stat_lock); printk_ratelimited_in_rcu(KERN_ERR "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n", - (unsigned long long)logical, - rcu_str_deref(dev->name)); + logical, rcu_str_deref(dev->name)); } out: @@ -1345,12 +1342,12 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, mapped_buffer = kmap_atomic(sblock->pagev[0]->page); h = (struct btrfs_header *)mapped_buffer; - if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr) || + if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h) || memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) || memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, BTRFS_UUID_SIZE)) { sblock->header_error = 1; - } else if (generation != le64_to_cpu(h->generation)) { + } else if (generation != btrfs_stack_header_generation(h)) { sblock->header_error = 1; sblock->generation_error = 1; } @@ -1720,10 +1717,10 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) * b) the page is already kmapped */ - if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr)) + if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h)) ++fail; - if (sblock->pagev[0]->generation != le64_to_cpu(h->generation)) + if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h)) ++fail; if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) @@ -1786,10 +1783,10 @@ static int scrub_checksum_super(struct scrub_block *sblock) s = (struct btrfs_super_block *)mapped_buffer; memcpy(on_disk_csum, s->csum, sctx->csum_size); - if (sblock->pagev[0]->logical != le64_to_cpu(s->bytenr)) + if (sblock->pagev[0]->logical != btrfs_super_bytenr(s)) ++fail_cor; - if (sblock->pagev[0]->generation != le64_to_cpu(s->generation)) + if (sblock->pagev[0]->generation != btrfs_super_generation(s)) ++fail_gen; if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) @@ -2455,8 +2452,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, printk(KERN_ERR "btrfs scrub: tree block %llu spanning " "stripes, ignored. logical=%llu\n", - (unsigned long long)key.objectid, - (unsigned long long)logical); + key.objectid, logical); goto next; } @@ -2863,9 +2859,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, if (fs_info->chunk_root->sectorsize != PAGE_SIZE) { /* not supported for data w/o checksums */ printk(KERN_ERR - "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lld) fails\n", - fs_info->chunk_root->sectorsize, - (unsigned long long)PAGE_SIZE); + "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lu) fails\n", + fs_info->chunk_root->sectorsize, PAGE_SIZE); return -EINVAL; } @@ -3175,11 +3170,9 @@ static void copy_nocow_pages_worker(struct btrfs_work *work) copy_nocow_pages_for_inode, nocow_ctx); if (ret != 0 && ret != -ENOENT) { - pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %llu, ret %d\n", - (unsigned long long)logical, - (unsigned long long)physical_for_dev_replace, - (unsigned long long)len, - (unsigned long long)mirror_num, ret); + pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d\n", + logical, physical_for_dev_replace, len, mirror_num, + ret); not_written = 1; goto out; } @@ -3224,11 +3217,6 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx) return PTR_ERR(local_root); } - if (btrfs_root_refs(&local_root->root_item) == 0) { - srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); - return -ENOENT; - } - key.type = BTRFS_INODE_ITEM_KEY; key.objectid = inum; key.offset = 0; diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 2e14fd89a8b4..e46e0ed74925 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -26,6 +26,7 @@ #include <linux/radix-tree.h> #include <linux/crc32c.h> #include <linux/vmalloc.h> +#include <linux/string.h> #include "send.h" #include "backref.h" @@ -54,8 +55,8 @@ struct fs_path { char *buf; int buf_len; - int reversed:1; - int virtual_mem:1; + unsigned int reversed:1; + unsigned int virtual_mem:1; char inline_buf[]; }; char pad[PAGE_SIZE]; @@ -1668,6 +1669,7 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, u64 *who_ino, u64 *who_gen) { int ret = 0; + u64 gen; u64 other_inode = 0; u8 other_type = 0; @@ -1678,6 +1680,24 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, if (ret <= 0) goto out; + /* + * If we have a parent root we need to verify that the parent dir was + * not delted and then re-created, if it was then we have no overwrite + * and we can just unlink this entry. + */ + if (sctx->parent_root) { + ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, + NULL, NULL, NULL); + if (ret < 0 && ret != -ENOENT) + goto out; + if (ret) { + ret = 0; + goto out; + } + if (gen != dir_gen) + goto out; + } + ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len, &other_inode, &other_type); if (ret < 0 && ret != -ENOENT) @@ -2519,7 +2539,8 @@ static int did_create_dir(struct send_ctx *sctx, u64 dir) di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); btrfs_dir_item_key_to_cpu(eb, di, &di_key); - if (di_key.objectid < sctx->send_progress) { + if (di_key.type != BTRFS_ROOT_ITEM_KEY && + di_key.objectid < sctx->send_progress) { ret = 1; goto out; } @@ -2581,7 +2602,6 @@ static int record_ref(struct list_head *head, u64 dir, u64 dir_gen, struct fs_path *path) { struct recorded_ref *ref; - char *tmp; ref = kmalloc(sizeof(*ref), GFP_NOFS); if (!ref) @@ -2591,25 +2611,35 @@ static int record_ref(struct list_head *head, u64 dir, ref->dir_gen = dir_gen; ref->full_path = path; - tmp = strrchr(ref->full_path->start, '/'); - if (!tmp) { - ref->name_len = ref->full_path->end - ref->full_path->start; - ref->name = ref->full_path->start; + ref->name = (char *)kbasename(ref->full_path->start); + ref->name_len = ref->full_path->end - ref->name; + ref->dir_path = ref->full_path->start; + if (ref->name == ref->full_path->start) ref->dir_path_len = 0; - ref->dir_path = ref->full_path->start; - } else { - tmp++; - ref->name_len = ref->full_path->end - tmp; - ref->name = tmp; - ref->dir_path = ref->full_path->start; + else ref->dir_path_len = ref->full_path->end - ref->full_path->start - 1 - ref->name_len; - } list_add_tail(&ref->list, head); return 0; } +static int dup_ref(struct recorded_ref *ref, struct list_head *list) +{ + struct recorded_ref *new; + + new = kmalloc(sizeof(*ref), GFP_NOFS); + if (!new) + return -ENOMEM; + + new->dir = ref->dir; + new->dir_gen = ref->dir_gen; + new->full_path = NULL; + INIT_LIST_HEAD(&new->list); + list_add_tail(&new->list, list); + return 0; +} + static void __free_recorded_refs(struct list_head *head) { struct recorded_ref *cur; @@ -2724,9 +2754,7 @@ static int process_recorded_refs(struct send_ctx *sctx) int ret = 0; struct recorded_ref *cur; struct recorded_ref *cur2; - struct ulist *check_dirs = NULL; - struct ulist_iterator uit; - struct ulist_node *un; + struct list_head check_dirs; struct fs_path *valid_path = NULL; u64 ow_inode = 0; u64 ow_gen; @@ -2740,6 +2768,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); * which is always '..' */ BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID); + INIT_LIST_HEAD(&check_dirs); valid_path = fs_path_alloc(); if (!valid_path) { @@ -2747,12 +2776,6 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); goto out; } - check_dirs = ulist_alloc(GFP_NOFS); - if (!check_dirs) { - ret = -ENOMEM; - goto out; - } - /* * First, check if the first ref of the current inode was overwritten * before. If yes, we know that the current inode was already orphanized @@ -2889,8 +2912,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); goto out; } } - ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, - GFP_NOFS); + ret = dup_ref(cur, &check_dirs); if (ret < 0) goto out; } @@ -2918,8 +2940,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); } list_for_each_entry(cur, &sctx->deleted_refs, list) { - ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, - GFP_NOFS); + ret = dup_ref(cur, &check_dirs); if (ret < 0) goto out; } @@ -2930,8 +2951,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); */ cur = list_entry(sctx->deleted_refs.next, struct recorded_ref, list); - ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, - GFP_NOFS); + ret = dup_ref(cur, &check_dirs); if (ret < 0) goto out; } else if (!S_ISDIR(sctx->cur_inode_mode)) { @@ -2951,12 +2971,10 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); if (ret < 0) goto out; } - ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, - GFP_NOFS); + ret = dup_ref(cur, &check_dirs); if (ret < 0) goto out; } - /* * If the inode is still orphan, unlink the orphan. This may * happen when a previous inode did overwrite the first ref @@ -2978,33 +2996,32 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); * deletion and if it's finally possible to perform the rmdir now. * We also update the inode stats of the parent dirs here. */ - ULIST_ITER_INIT(&uit); - while ((un = ulist_next(check_dirs, &uit))) { + list_for_each_entry(cur, &check_dirs, list) { /* * In case we had refs into dirs that were not processed yet, * we don't need to do the utime and rmdir logic for these dirs. * The dir will be processed later. */ - if (un->val > sctx->cur_ino) + if (cur->dir > sctx->cur_ino) continue; - ret = get_cur_inode_state(sctx, un->val, un->aux); + ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen); if (ret < 0) goto out; if (ret == inode_state_did_create || ret == inode_state_no_change) { /* TODO delayed utimes */ - ret = send_utimes(sctx, un->val, un->aux); + ret = send_utimes(sctx, cur->dir, cur->dir_gen); if (ret < 0) goto out; } else if (ret == inode_state_did_delete) { - ret = can_rmdir(sctx, un->val, sctx->cur_ino); + ret = can_rmdir(sctx, cur->dir, sctx->cur_ino); if (ret < 0) goto out; if (ret) { - ret = get_cur_path(sctx, un->val, un->aux, - valid_path); + ret = get_cur_path(sctx, cur->dir, + cur->dir_gen, valid_path); if (ret < 0) goto out; ret = send_rmdir(sctx, valid_path); @@ -3017,8 +3034,8 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); ret = 0; out: + __free_recorded_refs(&check_dirs); free_recorded_refs(sctx); - ulist_free(check_dirs); fs_path_free(valid_path); return ret; } @@ -3119,6 +3136,8 @@ out: struct find_ref_ctx { u64 dir; + u64 dir_gen; + struct btrfs_root *root; struct fs_path *name; int found_idx; }; @@ -3128,9 +3147,21 @@ static int __find_iref(int num, u64 dir, int index, void *ctx_) { struct find_ref_ctx *ctx = ctx_; + u64 dir_gen; + int ret; if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) && strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) { + /* + * To avoid doing extra lookups we'll only do this if everything + * else matches. + */ + ret = get_inode_info(ctx->root, dir, NULL, &dir_gen, NULL, + NULL, NULL, NULL); + if (ret) + return ret; + if (dir_gen != ctx->dir_gen) + return 0; ctx->found_idx = num; return 1; } @@ -3140,14 +3171,16 @@ static int __find_iref(int num, u64 dir, int index, static int find_iref(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *key, - u64 dir, struct fs_path *name) + u64 dir, u64 dir_gen, struct fs_path *name) { int ret; struct find_ref_ctx ctx; ctx.dir = dir; ctx.name = name; + ctx.dir_gen = dir_gen; ctx.found_idx = -1; + ctx.root = root; ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx); if (ret < 0) @@ -3163,11 +3196,17 @@ static int __record_changed_new_ref(int num, u64 dir, int index, struct fs_path *name, void *ctx) { + u64 dir_gen; int ret; struct send_ctx *sctx = ctx; + ret = get_inode_info(sctx->send_root, dir, NULL, &dir_gen, NULL, + NULL, NULL, NULL); + if (ret) + return ret; + ret = find_iref(sctx->parent_root, sctx->right_path, - sctx->cmp_key, dir, name); + sctx->cmp_key, dir, dir_gen, name); if (ret == -ENOENT) ret = __record_new_ref(num, dir, index, name, sctx); else if (ret > 0) @@ -3180,11 +3219,17 @@ static int __record_changed_deleted_ref(int num, u64 dir, int index, struct fs_path *name, void *ctx) { + u64 dir_gen; int ret; struct send_ctx *sctx = ctx; + ret = get_inode_info(sctx->parent_root, dir, NULL, &dir_gen, NULL, + NULL, NULL, NULL); + if (ret) + return ret; + ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key, - dir, name); + dir, dir_gen, name); if (ret == -ENOENT) ret = __record_deleted_ref(num, dir, index, name, sctx); else if (ret > 0) @@ -3869,7 +3914,8 @@ static int is_extent_unchanged(struct send_ctx *sctx, btrfs_item_key_to_cpu(eb, &found_key, slot); if (found_key.objectid != key.objectid || found_key.type != key.type) { - ret = 0; + /* If we're a hole then just pretend nothing changed */ + ret = (left_disknr) ? 0 : 1; goto out; } @@ -3895,7 +3941,8 @@ static int is_extent_unchanged(struct send_ctx *sctx, * This may only happen on the first iteration. */ if (found_key.offset + right_len <= ekey->offset) { - ret = 0; + /* If we're a hole just pretend nothing changed */ + ret = (left_disknr) ? 0 : 1; goto out; } @@ -3960,8 +4007,8 @@ static int process_extent(struct send_ctx *sctx, struct btrfs_path *path, struct btrfs_key *key) { - int ret = 0; struct clone_root *found_clone = NULL; + int ret = 0; if (S_ISLNK(sctx->cur_inode_mode)) return 0; @@ -3974,6 +4021,32 @@ static int process_extent(struct send_ctx *sctx, ret = 0; goto out; } + } else { + struct btrfs_file_extent_item *ei; + u8 type; + + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + type = btrfs_file_extent_type(path->nodes[0], ei); + if (type == BTRFS_FILE_EXTENT_PREALLOC || + type == BTRFS_FILE_EXTENT_REG) { + /* + * The send spec does not have a prealloc command yet, + * so just leave a hole for prealloc'ed extents until + * we have enough commands queued up to justify rev'ing + * the send spec. + */ + if (type == BTRFS_FILE_EXTENT_PREALLOC) { + ret = 0; + goto out; + } + + /* Have a hole, just skip it. */ + if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) { + ret = 0; + goto out; + } + } } ret = find_extent_clone(sctx, path, key->objectid, key->offset, @@ -4361,6 +4434,64 @@ static int changed_extent(struct send_ctx *sctx, return ret; } +static int dir_changed(struct send_ctx *sctx, u64 dir) +{ + u64 orig_gen, new_gen; + int ret; + + ret = get_inode_info(sctx->send_root, dir, NULL, &new_gen, NULL, NULL, + NULL, NULL); + if (ret) + return ret; + + ret = get_inode_info(sctx->parent_root, dir, NULL, &orig_gen, NULL, + NULL, NULL, NULL); + if (ret) + return ret; + + return (orig_gen != new_gen) ? 1 : 0; +} + +static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path, + struct btrfs_key *key) +{ + struct btrfs_inode_extref *extref; + struct extent_buffer *leaf; + u64 dirid = 0, last_dirid = 0; + unsigned long ptr; + u32 item_size; + u32 cur_offset = 0; + int ref_name_len; + int ret = 0; + + /* Easy case, just check this one dirid */ + if (key->type == BTRFS_INODE_REF_KEY) { + dirid = key->offset; + + ret = dir_changed(sctx, dirid); + goto out; + } + + leaf = path->nodes[0]; + item_size = btrfs_item_size_nr(leaf, path->slots[0]); + ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); + while (cur_offset < item_size) { + extref = (struct btrfs_inode_extref *)(ptr + + cur_offset); + dirid = btrfs_inode_extref_parent(leaf, extref); + ref_name_len = btrfs_inode_extref_name_len(leaf, extref); + cur_offset += ref_name_len + sizeof(*extref); + if (dirid == last_dirid) + continue; + ret = dir_changed(sctx, dirid); + if (ret) + break; + last_dirid = dirid; + } +out: + return ret; +} + /* * Updates compare related fields in sctx and simply forwards to the actual * changed_xxx functions. @@ -4376,6 +4507,19 @@ static int changed_cb(struct btrfs_root *left_root, int ret = 0; struct send_ctx *sctx = ctx; + if (result == BTRFS_COMPARE_TREE_SAME) { + if (key->type != BTRFS_INODE_REF_KEY && + key->type != BTRFS_INODE_EXTREF_KEY) + return 0; + ret = compare_refs(sctx, left_path, key); + if (!ret) + return 0; + if (ret < 0) + return ret; + result = BTRFS_COMPARE_TREE_CHANGED; + ret = 0; + } + sctx->left_path = left_path; sctx->right_path = right_path; sctx->cmp_key = key; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8eb6191d86da..3aab10ce63e8 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -56,6 +56,8 @@ #include "rcu-string.h" #include "dev-replace.h" #include "free-space-cache.h" +#include "backref.h" +#include "tests/btrfs-tests.h" #define CREATE_TRACE_POINTS #include <trace/events/btrfs.h> @@ -320,14 +322,15 @@ enum { Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache, Opt_no_space_cache, Opt_recovery, Opt_skip_balance, Opt_check_integrity, Opt_check_integrity_including_extent_data, - Opt_check_integrity_print_mask, Opt_fatal_errors, + Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree, + Opt_commit_interval, Opt_err, }; static match_table_t tokens = { {Opt_degraded, "degraded"}, {Opt_subvol, "subvol=%s"}, - {Opt_subvolid, "subvolid=%d"}, + {Opt_subvolid, "subvolid=%s"}, {Opt_device, "device=%s"}, {Opt_nodatasum, "nodatasum"}, {Opt_nodatacow, "nodatacow"}, @@ -360,7 +363,9 @@ static match_table_t tokens = { {Opt_check_integrity, "check_int"}, {Opt_check_integrity_including_extent_data, "check_int_data"}, {Opt_check_integrity_print_mask, "check_int_print_mask=%d"}, + {Opt_rescan_uuid_tree, "rescan_uuid_tree"}, {Opt_fatal_errors, "fatal_errors=%s"}, + {Opt_commit_interval, "commit=%d"}, {Opt_err, NULL}, }; @@ -496,10 +501,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) btrfs_set_opt(info->mount_opt, NOBARRIER); break; case Opt_thread_pool: - intarg = 0; - match_int(&args[0], &intarg); - if (intarg) + ret = match_int(&args[0], &intarg); + if (ret) { + goto out; + } else if (intarg > 0) { info->thread_pool_size = intarg; + } else { + ret = -EINVAL; + goto out; + } break; case Opt_max_inline: num = match_strdup(&args[0]); @@ -513,7 +523,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) root->sectorsize); } printk(KERN_INFO "btrfs: max_inline at %llu\n", - (unsigned long long)info->max_inline); + info->max_inline); + } else { + ret = -ENOMEM; + goto out; } break; case Opt_alloc_start: @@ -525,7 +538,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) kfree(num); printk(KERN_INFO "btrfs: allocations start at %llu\n", - (unsigned long long)info->alloc_start); + info->alloc_start); + } else { + ret = -ENOMEM; + goto out; } break; case Opt_noacl: @@ -540,12 +556,16 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT); break; case Opt_ratio: - intarg = 0; - match_int(&args[0], &intarg); - if (intarg) { + ret = match_int(&args[0], &intarg); + if (ret) { + goto out; + } else if (intarg >= 0) { info->metadata_ratio = intarg; printk(KERN_INFO "btrfs: metadata ratio %d\n", info->metadata_ratio); + } else { + ret = -EINVAL; + goto out; } break; case Opt_discard: @@ -554,6 +574,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) case Opt_space_cache: btrfs_set_opt(info->mount_opt, SPACE_CACHE); break; + case Opt_rescan_uuid_tree: + btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE); + break; case Opt_no_space_cache: printk(KERN_INFO "btrfs: disabling disk space caching\n"); btrfs_clear_opt(info->mount_opt, SPACE_CACHE); @@ -596,13 +619,17 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY); break; case Opt_check_integrity_print_mask: - intarg = 0; - match_int(&args[0], &intarg); - if (intarg) { + ret = match_int(&args[0], &intarg); + if (ret) { + goto out; + } else if (intarg >= 0) { info->check_integrity_print_mask = intarg; printk(KERN_INFO "btrfs:" " check_integrity_print_mask 0x%x\n", info->check_integrity_print_mask); + } else { + ret = -EINVAL; + goto out; } break; #else @@ -626,6 +653,29 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) goto out; } break; + case Opt_commit_interval: + intarg = 0; + ret = match_int(&args[0], &intarg); + if (ret < 0) { + printk(KERN_ERR + "btrfs: invalid commit interval\n"); + ret = -EINVAL; + goto out; + } + if (intarg > 0) { + if (intarg > 300) { + printk(KERN_WARNING + "btrfs: excessive commit interval %d\n", + intarg); + } + info->commit_interval = intarg; + } else { + printk(KERN_INFO + "btrfs: using default commit interval %ds\n", + BTRFS_DEFAULT_COMMIT_INTERVAL); + info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; + } + break; case Opt_err: printk(KERN_INFO "btrfs: unrecognized mount option " "'%s'\n", p); @@ -654,8 +704,8 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, { substring_t args[MAX_OPT_ARGS]; char *device_name, *opts, *orig, *p; + char *num = NULL; int error = 0; - int intarg; if (!options) return 0; @@ -679,17 +729,23 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, case Opt_subvol: kfree(*subvol_name); *subvol_name = match_strdup(&args[0]); + if (!*subvol_name) { + error = -ENOMEM; + goto out; + } break; case Opt_subvolid: - intarg = 0; - error = match_int(&args[0], &intarg); - if (!error) { + num = match_strdup(&args[0]); + if (num) { + *subvol_objectid = memparse(num, NULL); + kfree(num); /* we want the original fs_tree */ - if (!intarg) + if (!*subvol_objectid) *subvol_objectid = BTRFS_FS_TREE_OBJECTID; - else - *subvol_objectid = intarg; + } else { + error = -EINVAL; + goto out; } break; case Opt_subvolrootid: @@ -892,11 +948,9 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) if (btrfs_test_opt(root, NOBARRIER)) seq_puts(seq, ",nobarrier"); if (info->max_inline != 8192 * 1024) - seq_printf(seq, ",max_inline=%llu", - (unsigned long long)info->max_inline); + seq_printf(seq, ",max_inline=%llu", info->max_inline); if (info->alloc_start != 0) - seq_printf(seq, ",alloc_start=%llu", - (unsigned long long)info->alloc_start); + seq_printf(seq, ",alloc_start=%llu", info->alloc_start); if (info->thread_pool_size != min_t(unsigned long, num_online_cpus() + 2, 8)) seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); @@ -928,6 +982,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) seq_puts(seq, ",space_cache"); else seq_puts(seq, ",nospace_cache"); + if (btrfs_test_opt(root, RESCAN_UUID_TREE)) + seq_puts(seq, ",rescan_uuid_tree"); if (btrfs_test_opt(root, CLEAR_CACHE)) seq_puts(seq, ",clear_cache"); if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) @@ -940,8 +996,24 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) seq_puts(seq, ",inode_cache"); if (btrfs_test_opt(root, SKIP_BALANCE)) seq_puts(seq, ",skip_balance"); + if (btrfs_test_opt(root, RECOVERY)) + seq_puts(seq, ",recovery"); +#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY + if (btrfs_test_opt(root, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA)) + seq_puts(seq, ",check_int_data"); + else if (btrfs_test_opt(root, CHECK_INTEGRITY)) + seq_puts(seq, ",check_int"); + if (info->check_integrity_print_mask) + seq_printf(seq, ",check_int_print_mask=%d", + info->check_integrity_print_mask); +#endif + if (info->metadata_ratio) + seq_printf(seq, ",metadata_ratio=%d", + info->metadata_ratio); if (btrfs_test_opt(root, PANIC_ON_FATAL_ERROR)) seq_puts(seq, ",fatal_errors=panic"); + if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL) + seq_printf(seq, ",commit=%d", info->commit_interval); return 0; } @@ -1696,6 +1768,11 @@ static void btrfs_print_info(void) "\n"); } +static int btrfs_run_sanity_tests(void) +{ + return btrfs_test_free_space_cache(); +} + static int __init init_btrfs_fs(void) { int err; @@ -1734,23 +1811,32 @@ static int __init init_btrfs_fs(void) if (err) goto free_auto_defrag; - err = btrfs_interface_init(); + err = btrfs_prelim_ref_init(); if (err) - goto free_delayed_ref; + goto free_prelim_ref; - err = register_filesystem(&btrfs_fs_type); + err = btrfs_interface_init(); if (err) - goto unregister_ioctl; + goto free_delayed_ref; btrfs_init_lockdep(); btrfs_print_info(); - btrfs_test_free_space_cache(); + + err = btrfs_run_sanity_tests(); + if (err) + goto unregister_ioctl; + + err = register_filesystem(&btrfs_fs_type); + if (err) + goto unregister_ioctl; return 0; unregister_ioctl: btrfs_interface_exit(); +free_prelim_ref: + btrfs_prelim_ref_exit(); free_delayed_ref: btrfs_delayed_ref_exit(); free_auto_defrag: @@ -1777,6 +1863,7 @@ static void __exit exit_btrfs_fs(void) btrfs_delayed_ref_exit(); btrfs_auto_defrag_exit(); btrfs_delayed_inode_exit(); + btrfs_prelim_ref_exit(); ordered_data_exit(); extent_map_exit(); extent_io_exit(); diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h new file mode 100644 index 000000000000..580877625776 --- /dev/null +++ b/fs/btrfs/tests/btrfs-tests.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2013 Fusion IO. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_TESTS +#define __BTRFS_TESTS + +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS + +#define test_msg(fmt, ...) pr_info("btrfs: selftest: " fmt, ##__VA_ARGS__) + +int btrfs_test_free_space_cache(void); +#else +static inline int btrfs_test_free_space_cache(void) +{ + return 0; +} +#endif + +#endif diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c new file mode 100644 index 000000000000..6fc82010dc15 --- /dev/null +++ b/fs/btrfs/tests/free-space-tests.c @@ -0,0 +1,395 @@ +/* + * Copyright (C) 2013 Fusion IO. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <linux/slab.h> +#include "btrfs-tests.h" +#include "../ctree.h" +#include "../free-space-cache.h" + +#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) +static struct btrfs_block_group_cache *init_test_block_group(void) +{ + struct btrfs_block_group_cache *cache; + + cache = kzalloc(sizeof(*cache), GFP_NOFS); + if (!cache) + return NULL; + cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), + GFP_NOFS); + if (!cache->free_space_ctl) { + kfree(cache); + return NULL; + } + + cache->key.objectid = 0; + cache->key.offset = 1024 * 1024 * 1024; + cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; + cache->sectorsize = 4096; + + spin_lock_init(&cache->lock); + INIT_LIST_HEAD(&cache->list); + INIT_LIST_HEAD(&cache->cluster_list); + INIT_LIST_HEAD(&cache->new_bg_list); + + btrfs_init_free_space_ctl(cache); + + return cache; +} + +/* + * This test just does basic sanity checking, making sure we can add an exten + * entry and remove space from either end and the middle, and make sure we can + * remove space that covers adjacent extent entries. + */ +static int test_extents(struct btrfs_block_group_cache *cache) +{ + int ret = 0; + + test_msg("Running extent only tests\n"); + + /* First just make sure we can remove an entire entry */ + ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024); + if (ret) { + test_msg("Error adding initial extents %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024); + if (ret) { + test_msg("Error removing extent %d\n", ret); + return ret; + } + + if (test_check_exists(cache, 0, 4 * 1024 * 1024)) { + test_msg("Full remove left some lingering space\n"); + return -1; + } + + /* Ok edge and middle cases now */ + ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024); + if (ret) { + test_msg("Error adding half extent %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 1 * 1024 * 1024); + if (ret) { + test_msg("Error removing tail end %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024); + if (ret) { + test_msg("Error removing front end %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096); + if (ret) { + test_msg("Error removing middle peice %d\n", ret); + return ret; + } + + if (test_check_exists(cache, 0, 1 * 1024 * 1024)) { + test_msg("Still have space at the front\n"); + return -1; + } + + if (test_check_exists(cache, 2 * 1024 * 1024, 4096)) { + test_msg("Still have space in the middle\n"); + return -1; + } + + if (test_check_exists(cache, 3 * 1024 * 1024, 1 * 1024 * 1024)) { + test_msg("Still have space at the end\n"); + return -1; + } + + /* Cleanup */ + __btrfs_remove_free_space_cache(cache->free_space_ctl); + + return 0; +} + +static int test_bitmaps(struct btrfs_block_group_cache *cache) +{ + u64 next_bitmap_offset; + int ret; + + test_msg("Running bitmap only tests\n"); + + ret = test_add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1); + if (ret) { + test_msg("Couldn't create a bitmap entry %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024); + if (ret) { + test_msg("Error removing bitmap full range %d\n", ret); + return ret; + } + + if (test_check_exists(cache, 0, 4 * 1024 * 1024)) { + test_msg("Left some space in bitmap\n"); + return -1; + } + + ret = test_add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1); + if (ret) { + test_msg("Couldn't add to our bitmap entry %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 2 * 1024 * 1024); + if (ret) { + test_msg("Couldn't remove middle chunk %d\n", ret); + return ret; + } + + /* + * The first bitmap we have starts at offset 0 so the next one is just + * at the end of the first bitmap. + */ + next_bitmap_offset = (u64)(BITS_PER_BITMAP * 4096); + + /* Test a bit straddling two bitmaps */ + ret = test_add_free_space_entry(cache, next_bitmap_offset - + (2 * 1024 * 1024), 4 * 1024 * 1024, 1); + if (ret) { + test_msg("Couldn't add space that straddles two bitmaps %d\n", + ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, next_bitmap_offset - + (1 * 1024 * 1024), 2 * 1024 * 1024); + if (ret) { + test_msg("Couldn't remove overlapping space %d\n", ret); + return ret; + } + + if (test_check_exists(cache, next_bitmap_offset - (1 * 1024 * 1024), + 2 * 1024 * 1024)) { + test_msg("Left some space when removing overlapping\n"); + return -1; + } + + __btrfs_remove_free_space_cache(cache->free_space_ctl); + + return 0; +} + +/* This is the high grade jackassery */ +static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) +{ + u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096); + int ret; + + test_msg("Running bitmap and extent tests\n"); + + /* + * First let's do something simple, an extent at the same offset as the + * bitmap, but the free space completely in the extent and then + * completely in the bitmap. + */ + ret = test_add_free_space_entry(cache, 4 * 1024 * 1024, 1 * 1024 * 1024, 1); + if (ret) { + test_msg("Couldn't create bitmap entry %d\n", ret); + return ret; + } + + ret = test_add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0); + if (ret) { + test_msg("Couldn't add extent entry %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024); + if (ret) { + test_msg("Couldn't remove extent entry %d\n", ret); + return ret; + } + + if (test_check_exists(cache, 0, 1 * 1024 * 1024)) { + test_msg("Left remnants after our remove\n"); + return -1; + } + + /* Now to add back the extent entry and remove from the bitmap */ + ret = test_add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0); + if (ret) { + test_msg("Couldn't re-add extent entry %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 4 * 1024 * 1024, 1 * 1024 * 1024); + if (ret) { + test_msg("Couldn't remove from bitmap %d\n", ret); + return ret; + } + + if (test_check_exists(cache, 4 * 1024 * 1024, 1 * 1024 * 1024)) { + test_msg("Left remnants in the bitmap\n"); + return -1; + } + + /* + * Ok so a little more evil, extent entry and bitmap at the same offset, + * removing an overlapping chunk. + */ + ret = test_add_free_space_entry(cache, 1 * 1024 * 1024, 4 * 1024 * 1024, 1); + if (ret) { + test_msg("Couldn't add to a bitmap %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 512 * 1024, 3 * 1024 * 1024); + if (ret) { + test_msg("Couldn't remove overlapping space %d\n", ret); + return ret; + } + + if (test_check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) { + test_msg("Left over peices after removing overlapping\n"); + return -1; + } + + __btrfs_remove_free_space_cache(cache->free_space_ctl); + + /* Now with the extent entry offset into the bitmap */ + ret = test_add_free_space_entry(cache, 4 * 1024 * 1024, 4 * 1024 * 1024, 1); + if (ret) { + test_msg("Couldn't add space to the bitmap %d\n", ret); + return ret; + } + + ret = test_add_free_space_entry(cache, 2 * 1024 * 1024, 2 * 1024 * 1024, 0); + if (ret) { + test_msg("Couldn't add extent to the cache %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 4 * 1024 * 1024); + if (ret) { + test_msg("Problem removing overlapping space %d\n", ret); + return ret; + } + + if (test_check_exists(cache, 3 * 1024 * 1024, 4 * 1024 * 1024)) { + test_msg("Left something behind when removing space"); + return -1; + } + + /* + * This has blown up in the past, the extent entry starts before the + * bitmap entry, but we're trying to remove an offset that falls + * completely within the bitmap range and is in both the extent entry + * and the bitmap entry, looks like this + * + * [ extent ] + * [ bitmap ] + * [ del ] + */ + __btrfs_remove_free_space_cache(cache->free_space_ctl); + ret = test_add_free_space_entry(cache, bitmap_offset + 4 * 1024 * 1024, + 4 * 1024 * 1024, 1); + if (ret) { + test_msg("Couldn't add bitmap %d\n", ret); + return ret; + } + + ret = test_add_free_space_entry(cache, bitmap_offset - 1 * 1024 * 1024, + 5 * 1024 * 1024, 0); + if (ret) { + test_msg("Couldn't add extent entry %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, bitmap_offset + 1 * 1024 * 1024, + 5 * 1024 * 1024); + if (ret) { + test_msg("Failed to free our space %d\n", ret); + return ret; + } + + if (test_check_exists(cache, bitmap_offset + 1 * 1024 * 1024, + 5 * 1024 * 1024)) { + test_msg("Left stuff over\n"); + return -1; + } + + __btrfs_remove_free_space_cache(cache->free_space_ctl); + + /* + * This blew up before, we have part of the free space in a bitmap and + * then the entirety of the rest of the space in an extent. This used + * to return -EAGAIN back from btrfs_remove_extent, make sure this + * doesn't happen. + */ + ret = test_add_free_space_entry(cache, 1 * 1024 * 1024, 2 * 1024 * 1024, 1); + if (ret) { + test_msg("Couldn't add bitmap entry %d\n", ret); + return ret; + } + + ret = test_add_free_space_entry(cache, 3 * 1024 * 1024, 1 * 1024 * 1024, 0); + if (ret) { + test_msg("Couldn't add extent entry %d\n", ret); + return ret; + } + + ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 3 * 1024 * 1024); + if (ret) { + test_msg("Error removing bitmap and extent overlapping %d\n", ret); + return ret; + } + + __btrfs_remove_free_space_cache(cache->free_space_ctl); + return 0; +} + +int btrfs_test_free_space_cache(void) +{ + struct btrfs_block_group_cache *cache; + int ret; + + test_msg("Running btrfs free space cache tests\n"); + + cache = init_test_block_group(); + if (!cache) { + test_msg("Couldn't run the tests\n"); + return 0; + } + + ret = test_extents(cache); + if (ret) + goto out; + ret = test_bitmaps(cache); + if (ret) + goto out; + ret = test_bitmaps_and_extents(cache); + if (ret) + goto out; +out: + __btrfs_remove_free_space_cache(cache->free_space_ctl); + kfree(cache->free_space_ctl); + kfree(cache); + test_msg("Free space cache tests finished\n"); + return ret; +} diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index af1931a5960d..cac4a3f76323 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -837,7 +837,7 @@ int btrfs_wait_marked_extents(struct btrfs_root *root, * them in one of two extent_io trees. This is used to make sure all of * those extents are on disk for transaction or log commit */ -int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, +static int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, struct extent_io_tree *dirty_pages, int mark) { int ret; @@ -1225,8 +1225,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, btrfs_set_root_stransid(new_root_item, 0); btrfs_set_root_rtransid(new_root_item, 0); } - new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec); - new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec); + btrfs_set_stack_timespec_sec(&new_root_item->otime, cur_time.tv_sec); + btrfs_set_stack_timespec_nsec(&new_root_item->otime, cur_time.tv_nsec); btrfs_set_root_otransid(new_root_item, trans->transid); old = btrfs_lock_root_node(root); @@ -1311,8 +1311,26 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, dentry->d_name.len * 2); parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode); - if (ret) + if (ret) { + btrfs_abort_transaction(trans, root, ret); + goto fail; + } + ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, new_uuid.b, + BTRFS_UUID_KEY_SUBVOL, objectid); + if (ret) { btrfs_abort_transaction(trans, root, ret); + goto fail; + } + if (!btrfs_is_empty_uuid(new_root_item->received_uuid)) { + ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, + new_root_item->received_uuid, + BTRFS_UUID_KEY_RECEIVED_SUBVOL, + objectid); + if (ret && ret != -EEXIST) { + btrfs_abort_transaction(trans, root, ret); + goto fail; + } + } fail: pending->error = ret; dir_item_existed: @@ -1362,6 +1380,8 @@ static void update_super_roots(struct btrfs_root *root) super->root_level = root_item->level; if (btrfs_test_opt(root, SPACE_CACHE)) super->cache_generation = root_item->generation; + if (root->fs_info->update_uuid_tree_gen) + super->uuid_tree_generation = root_item->generation; } int btrfs_transaction_in_commit(struct btrfs_fs_info *info) @@ -1928,8 +1948,7 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root) list_del_init(&root->root_list); spin_unlock(&fs_info->trans_lock); - pr_debug("btrfs: cleaner removing %llu\n", - (unsigned long long)root->objectid); + pr_debug("btrfs: cleaner removing %llu\n", root->objectid); btrfs_kill_all_delayed_nodes(root); @@ -1942,6 +1961,5 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root) * If we encounter a transaction abort during snapshot cleaning, we * don't want to crash here */ - BUG_ON(ret < 0 && ret != -EAGAIN && ret != -EROFS); - return 1; + return (ret < 0) ? 0 : 1; } diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index defbc4269897..5c2af8491621 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -160,8 +160,6 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, void btrfs_throttle(struct btrfs_root *root); int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, - struct extent_io_tree *dirty_pages, int mark); int btrfs_write_marked_extents(struct btrfs_root *root, struct extent_io_tree *dirty_pages, int mark); int btrfs_wait_marked_extents(struct btrfs_root *root, diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index ff60d8978ae2..0d9613c3f5e5 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -747,7 +747,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); if (ret) goto out; - btrfs_run_delayed_items(trans, root); + else + ret = btrfs_run_delayed_items(trans, root); out: kfree(name); iput(inode); @@ -923,7 +924,9 @@ again: kfree(victim_name); if (ret) return ret; - btrfs_run_delayed_items(trans, root); + ret = btrfs_run_delayed_items(trans, root); + if (ret) + return ret; *search_done = 1; goto again; } @@ -990,7 +993,9 @@ again: inode, victim_name, victim_name_len); - btrfs_run_delayed_items(trans, root); + if (!ret) + ret = btrfs_run_delayed_items( + trans, root); } iput(victim_parent); kfree(victim_name); @@ -1536,8 +1541,10 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, name_len = btrfs_dir_name_len(eb, di); name = kmalloc(name_len, GFP_NOFS); - if (!name) - return -ENOMEM; + if (!name) { + ret = -ENOMEM; + goto out; + } log_type = btrfs_dir_type(eb, di); read_extent_buffer(eb, name, (unsigned long)(di + 1), @@ -1810,7 +1817,7 @@ again: ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); if (!ret) - btrfs_run_delayed_items(trans, root); + ret = btrfs_run_delayed_items(trans, root); kfree(name); iput(inode); if (ret) diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c new file mode 100644 index 000000000000..dd0dea3766f7 --- /dev/null +++ b/fs/btrfs/uuid-tree.c @@ -0,0 +1,358 @@ +/* + * Copyright (C) STRATO AG 2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ +#include <linux/uuid.h> +#include <asm/unaligned.h> +#include "ctree.h" +#include "transaction.h" +#include "disk-io.h" +#include "print-tree.h" + + +static void btrfs_uuid_to_key(u8 *uuid, u8 type, struct btrfs_key *key) +{ + key->type = type; + key->objectid = get_unaligned_le64(uuid); + key->offset = get_unaligned_le64(uuid + sizeof(u64)); +} + +/* return -ENOENT for !found, < 0 for errors, or 0 if an item was found */ +static int btrfs_uuid_tree_lookup(struct btrfs_root *uuid_root, u8 *uuid, + u8 type, u64 subid) +{ + int ret; + struct btrfs_path *path = NULL; + struct extent_buffer *eb; + int slot; + u32 item_size; + unsigned long offset; + struct btrfs_key key; + + if (WARN_ON_ONCE(!uuid_root)) { + ret = -ENOENT; + goto out; + } + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + btrfs_uuid_to_key(uuid, type, &key); + ret = btrfs_search_slot(NULL, uuid_root, &key, path, 0, 0); + if (ret < 0) { + goto out; + } else if (ret > 0) { + ret = -ENOENT; + goto out; + } + + eb = path->nodes[0]; + slot = path->slots[0]; + item_size = btrfs_item_size_nr(eb, slot); + offset = btrfs_item_ptr_offset(eb, slot); + ret = -ENOENT; + + if (!IS_ALIGNED(item_size, sizeof(u64))) { + pr_warn("btrfs: uuid item with illegal size %lu!\n", + (unsigned long)item_size); + goto out; + } + while (item_size) { + __le64 data; + + read_extent_buffer(eb, &data, offset, sizeof(data)); + if (le64_to_cpu(data) == subid) { + ret = 0; + break; + } + offset += sizeof(data); + item_size -= sizeof(data); + } + +out: + btrfs_free_path(path); + return ret; +} + +int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, + struct btrfs_root *uuid_root, u8 *uuid, u8 type, + u64 subid_cpu) +{ + int ret; + struct btrfs_path *path = NULL; + struct btrfs_key key; + struct extent_buffer *eb; + int slot; + unsigned long offset; + __le64 subid_le; + + ret = btrfs_uuid_tree_lookup(uuid_root, uuid, type, subid_cpu); + if (ret != -ENOENT) + return ret; + + if (WARN_ON_ONCE(!uuid_root)) { + ret = -EINVAL; + goto out; + } + + btrfs_uuid_to_key(uuid, type, &key); + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + ret = btrfs_insert_empty_item(trans, uuid_root, path, &key, + sizeof(subid_le)); + if (ret >= 0) { + /* Add an item for the type for the first time */ + eb = path->nodes[0]; + slot = path->slots[0]; + offset = btrfs_item_ptr_offset(eb, slot); + } else if (ret == -EEXIST) { + /* + * An item with that type already exists. + * Extend the item and store the new subid at the end. + */ + btrfs_extend_item(uuid_root, path, sizeof(subid_le)); + eb = path->nodes[0]; + slot = path->slots[0]; + offset = btrfs_item_ptr_offset(eb, slot); + offset += btrfs_item_size_nr(eb, slot) - sizeof(subid_le); + } else if (ret < 0) { + pr_warn("btrfs: insert uuid item failed %d (0x%016llx, 0x%016llx) type %u!\n", + ret, (unsigned long long)key.objectid, + (unsigned long long)key.offset, type); + goto out; + } + + ret = 0; + subid_le = cpu_to_le64(subid_cpu); + write_extent_buffer(eb, &subid_le, offset, sizeof(subid_le)); + btrfs_mark_buffer_dirty(eb); + +out: + btrfs_free_path(path); + return ret; +} + +int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans, + struct btrfs_root *uuid_root, u8 *uuid, u8 type, + u64 subid) +{ + int ret; + struct btrfs_path *path = NULL; + struct btrfs_key key; + struct extent_buffer *eb; + int slot; + unsigned long offset; + u32 item_size; + unsigned long move_dst; + unsigned long move_src; + unsigned long move_len; + + if (WARN_ON_ONCE(!uuid_root)) { + ret = -EINVAL; + goto out; + } + + btrfs_uuid_to_key(uuid, type, &key); + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + ret = btrfs_search_slot(trans, uuid_root, &key, path, -1, 1); + if (ret < 0) { + pr_warn("btrfs: error %d while searching for uuid item!\n", + ret); + goto out; + } + if (ret > 0) { + ret = -ENOENT; + goto out; + } + + eb = path->nodes[0]; + slot = path->slots[0]; + offset = btrfs_item_ptr_offset(eb, slot); + item_size = btrfs_item_size_nr(eb, slot); + if (!IS_ALIGNED(item_size, sizeof(u64))) { + pr_warn("btrfs: uuid item with illegal size %lu!\n", + (unsigned long)item_size); + ret = -ENOENT; + goto out; + } + while (item_size) { + __le64 read_subid; + + read_extent_buffer(eb, &read_subid, offset, sizeof(read_subid)); + if (le64_to_cpu(read_subid) == subid) + break; + offset += sizeof(read_subid); + item_size -= sizeof(read_subid); + } + + if (!item_size) { + ret = -ENOENT; + goto out; + } + + item_size = btrfs_item_size_nr(eb, slot); + if (item_size == sizeof(subid)) { + ret = btrfs_del_item(trans, uuid_root, path); + goto out; + } + + move_dst = offset; + move_src = offset + sizeof(subid); + move_len = item_size - (move_src - btrfs_item_ptr_offset(eb, slot)); + memmove_extent_buffer(eb, move_dst, move_src, move_len); + btrfs_truncate_item(uuid_root, path, item_size - sizeof(subid), 1); + +out: + btrfs_free_path(path); + return ret; +} + +static int btrfs_uuid_iter_rem(struct btrfs_root *uuid_root, u8 *uuid, u8 type, + u64 subid) +{ + struct btrfs_trans_handle *trans; + int ret; + + /* 1 - for the uuid item */ + trans = btrfs_start_transaction(uuid_root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + + ret = btrfs_uuid_tree_rem(trans, uuid_root, uuid, type, subid); + btrfs_end_transaction(trans, uuid_root); + +out: + return ret; +} + +int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info, + int (*check_func)(struct btrfs_fs_info *, u8 *, u8, + u64)) +{ + struct btrfs_root *root = fs_info->uuid_root; + struct btrfs_key key; + struct btrfs_key max_key; + struct btrfs_path *path; + int ret = 0; + struct extent_buffer *leaf; + int slot; + u32 item_size; + unsigned long offset; + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + key.objectid = 0; + key.type = 0; + key.offset = 0; + max_key.objectid = (u64)-1; + max_key.type = (u8)-1; + max_key.offset = (u64)-1; + +again_search_slot: + path->keep_locks = 1; + ret = btrfs_search_forward(root, &key, &max_key, path, 0); + if (ret) { + if (ret > 0) + ret = 0; + goto out; + } + + while (1) { + cond_resched(); + leaf = path->nodes[0]; + slot = path->slots[0]; + btrfs_item_key_to_cpu(leaf, &key, slot); + + if (key.type != BTRFS_UUID_KEY_SUBVOL && + key.type != BTRFS_UUID_KEY_RECEIVED_SUBVOL) + goto skip; + + offset = btrfs_item_ptr_offset(leaf, slot); + item_size = btrfs_item_size_nr(leaf, slot); + if (!IS_ALIGNED(item_size, sizeof(u64))) { + pr_warn("btrfs: uuid item with illegal size %lu!\n", + (unsigned long)item_size); + goto skip; + } + while (item_size) { + u8 uuid[BTRFS_UUID_SIZE]; + __le64 subid_le; + u64 subid_cpu; + + put_unaligned_le64(key.objectid, uuid); + put_unaligned_le64(key.offset, uuid + sizeof(u64)); + read_extent_buffer(leaf, &subid_le, offset, + sizeof(subid_le)); + subid_cpu = le64_to_cpu(subid_le); + ret = check_func(fs_info, uuid, key.type, subid_cpu); + if (ret < 0) + goto out; + if (ret > 0) { + btrfs_release_path(path); + ret = btrfs_uuid_iter_rem(root, uuid, key.type, + subid_cpu); + if (ret == 0) { + /* + * this might look inefficient, but the + * justification is that it is an + * exception that check_func returns 1, + * and that in the regular case only one + * entry per UUID exists. + */ + goto again_search_slot; + } + if (ret < 0 && ret != -ENOENT) + goto out; + } + item_size -= sizeof(subid_le); + offset += sizeof(subid_le); + } + +skip: + ret = btrfs_next_item(root, path); + if (ret == 0) + continue; + else if (ret > 0) + ret = 0; + break; + } + +out: + btrfs_free_path(path); + if (ret) + pr_warn("btrfs: btrfs_uuid_tree_iterate failed %d\n", ret); + return 0; +} diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 67a085381845..0052ca8264d9 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -26,6 +26,7 @@ #include <linux/ratelimit.h> #include <linux/kthread.h> #include <linux/raid/pq.h> +#include <linux/semaphore.h> #include <asm/div64.h> #include "compat.h" #include "ctree.h" @@ -62,6 +63,48 @@ static void unlock_chunks(struct btrfs_root *root) mutex_unlock(&root->fs_info->chunk_mutex); } +static struct btrfs_fs_devices *__alloc_fs_devices(void) +{ + struct btrfs_fs_devices *fs_devs; + + fs_devs = kzalloc(sizeof(*fs_devs), GFP_NOFS); + if (!fs_devs) + return ERR_PTR(-ENOMEM); + + mutex_init(&fs_devs->device_list_mutex); + + INIT_LIST_HEAD(&fs_devs->devices); + INIT_LIST_HEAD(&fs_devs->alloc_list); + INIT_LIST_HEAD(&fs_devs->list); + + return fs_devs; +} + +/** + * alloc_fs_devices - allocate struct btrfs_fs_devices + * @fsid: a pointer to UUID for this FS. If NULL a new UUID is + * generated. + * + * Return: a pointer to a new &struct btrfs_fs_devices on success; + * ERR_PTR() on error. Returned struct is not linked onto any lists and + * can be destroyed with kfree() right away. + */ +static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid) +{ + struct btrfs_fs_devices *fs_devs; + + fs_devs = __alloc_fs_devices(); + if (IS_ERR(fs_devs)) + return fs_devs; + + if (fsid) + memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE); + else + generate_random_uuid(fs_devs->fsid); + + return fs_devs; +} + static void free_fs_devices(struct btrfs_fs_devices *fs_devices) { struct btrfs_device *device; @@ -101,6 +144,27 @@ void btrfs_cleanup_fs_uuids(void) } } +static struct btrfs_device *__alloc_device(void) +{ + struct btrfs_device *dev; + + dev = kzalloc(sizeof(*dev), GFP_NOFS); + if (!dev) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&dev->dev_list); + INIT_LIST_HEAD(&dev->dev_alloc_list); + + spin_lock_init(&dev->io_lock); + + spin_lock_init(&dev->reada_lock); + atomic_set(&dev->reada_in_flight, 0); + INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT); + INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT); + + return dev; +} + static noinline struct btrfs_device *__find_device(struct list_head *head, u64 devid, u8 *uuid) { @@ -395,16 +459,14 @@ static noinline int device_list_add(const char *path, fs_devices = find_fsid(disk_super->fsid); if (!fs_devices) { - fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); - if (!fs_devices) - return -ENOMEM; - INIT_LIST_HEAD(&fs_devices->devices); - INIT_LIST_HEAD(&fs_devices->alloc_list); + fs_devices = alloc_fs_devices(disk_super->fsid); + if (IS_ERR(fs_devices)) + return PTR_ERR(fs_devices); + list_add(&fs_devices->list, &fs_uuids); - memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); fs_devices->latest_devid = devid; fs_devices->latest_trans = found_transid; - mutex_init(&fs_devices->device_list_mutex); + device = NULL; } else { device = __find_device(&fs_devices->devices, devid, @@ -414,17 +476,12 @@ static noinline int device_list_add(const char *path, if (fs_devices->opened) return -EBUSY; - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) { + device = btrfs_alloc_device(NULL, &devid, + disk_super->dev_item.uuid); + if (IS_ERR(device)) { /* we can safely leave the fs_devices entry around */ - return -ENOMEM; + return PTR_ERR(device); } - device->devid = devid; - device->dev_stats_valid = 0; - device->work.func = pending_bios_fn; - memcpy(device->uuid, disk_super->dev_item.uuid, - BTRFS_UUID_SIZE); - spin_lock_init(&device->io_lock); name = rcu_string_strdup(path, GFP_NOFS); if (!name) { @@ -432,22 +489,13 @@ static noinline int device_list_add(const char *path, return -ENOMEM; } rcu_assign_pointer(device->name, name); - INIT_LIST_HEAD(&device->dev_alloc_list); - - /* init readahead state */ - spin_lock_init(&device->reada_lock); - device->reada_curr_zone = NULL; - atomic_set(&device->reada_in_flight, 0); - device->reada_next = 0; - INIT_RADIX_TREE(&device->reada_zones, GFP_NOFS & ~__GFP_WAIT); - INIT_RADIX_TREE(&device->reada_extents, GFP_NOFS & ~__GFP_WAIT); mutex_lock(&fs_devices->device_list_mutex); list_add_rcu(&device->dev_list, &fs_devices->devices); + fs_devices->num_devices++; mutex_unlock(&fs_devices->device_list_mutex); device->fs_devices = fs_devices; - fs_devices->num_devices++; } else if (!device->name || strcmp(device->name->str, path)) { name = rcu_string_strdup(path, GFP_NOFS); if (!name) @@ -474,25 +522,21 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) struct btrfs_device *device; struct btrfs_device *orig_dev; - fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); - if (!fs_devices) - return ERR_PTR(-ENOMEM); + fs_devices = alloc_fs_devices(orig->fsid); + if (IS_ERR(fs_devices)) + return fs_devices; - INIT_LIST_HEAD(&fs_devices->devices); - INIT_LIST_HEAD(&fs_devices->alloc_list); - INIT_LIST_HEAD(&fs_devices->list); - mutex_init(&fs_devices->device_list_mutex); fs_devices->latest_devid = orig->latest_devid; fs_devices->latest_trans = orig->latest_trans; fs_devices->total_devices = orig->total_devices; - memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid)); /* We have held the volume lock, it is safe to get the devices. */ list_for_each_entry(orig_dev, &orig->devices, dev_list) { struct rcu_string *name; - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) + device = btrfs_alloc_device(NULL, &orig_dev->devid, + orig_dev->uuid); + if (IS_ERR(device)) goto error; /* @@ -506,13 +550,6 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) } rcu_assign_pointer(device->name, name); - device->devid = orig_dev->devid; - device->work.func = pending_bios_fn; - memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid)); - spin_lock_init(&device->io_lock); - INIT_LIST_HEAD(&device->dev_list); - INIT_LIST_HEAD(&device->dev_alloc_list); - list_add(&device->dev_list, &fs_devices->devices); device->fs_devices = fs_devices; fs_devices->num_devices++; @@ -636,23 +673,22 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) if (device->can_discard) fs_devices->num_can_discard--; + if (device->missing) + fs_devices->missing_devices--; - new_device = kmalloc(sizeof(*new_device), GFP_NOFS); - BUG_ON(!new_device); /* -ENOMEM */ - memcpy(new_device, device, sizeof(*new_device)); + new_device = btrfs_alloc_device(NULL, &device->devid, + device->uuid); + BUG_ON(IS_ERR(new_device)); /* -ENOMEM */ /* Safe because we are under uuid_mutex */ if (device->name) { name = rcu_string_strdup(device->name->str, GFP_NOFS); - BUG_ON(device->name && !name); /* -ENOMEM */ + BUG_ON(!name); /* -ENOMEM */ rcu_assign_pointer(new_device->name, name); } - new_device->bdev = NULL; - new_device->writeable = 0; - new_device->in_fs_metadata = 0; - new_device->can_discard = 0; - spin_lock_init(&new_device->io_lock); + list_replace_rcu(&device->dev_list, &new_device->dev_list); + new_device->fs_devices = device->fs_devices; call_rcu(&device->rcu, free_device); } @@ -865,7 +901,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, disk_super = p + (bytenr & ~PAGE_CACHE_MASK); if (btrfs_super_bytenr(disk_super) != bytenr || - disk_super->magic != cpu_to_le64(BTRFS_MAGIC)) + btrfs_super_magic(disk_super) != BTRFS_MAGIC) goto error_unmap; devid = btrfs_stack_device_id(&disk_super->dev_item); @@ -880,8 +916,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, printk(KERN_INFO "device fsid %pU ", disk_super->fsid); } - printk(KERN_CONT "devid %llu transid %llu %s\n", - (unsigned long long)devid, (unsigned long long)transid, path); + printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path); ret = device_list_add(path, disk_super, devid, fs_devices_ret); if (!ret && fs_devices_ret) @@ -1278,8 +1313,7 @@ static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset); write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, - (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent), - BTRFS_UUID_SIZE); + btrfs_dev_extent_chunk_tree_uuid(extent), BTRFS_UUID_SIZE); btrfs_set_dev_extent_length(leaf, extent, num_bytes); btrfs_mark_buffer_dirty(leaf); @@ -1307,15 +1341,14 @@ static u64 find_next_chunk(struct btrfs_fs_info *fs_info) return ret; } -static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid) +static noinline int find_next_devid(struct btrfs_fs_info *fs_info, + u64 *devid_ret) { int ret; struct btrfs_key key; struct btrfs_key found_key; struct btrfs_path *path; - root = root->fs_info->chunk_root; - path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -1324,20 +1357,21 @@ static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid) key.type = BTRFS_DEV_ITEM_KEY; key.offset = (u64)-1; - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0); if (ret < 0) goto error; BUG_ON(ret == 0); /* Corruption */ - ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID, + ret = btrfs_previous_item(fs_info->chunk_root, path, + BTRFS_DEV_ITEMS_OBJECTID, BTRFS_DEV_ITEM_KEY); if (ret) { - *objectid = 1; + *devid_ret = 1; } else { btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); - *objectid = found_key.offset + 1; + *devid_ret = found_key.offset + 1; } ret = 0; error: @@ -1391,9 +1425,9 @@ static int btrfs_add_device(struct btrfs_trans_handle *trans, btrfs_set_device_bandwidth(leaf, dev_item, 0); btrfs_set_device_start_offset(leaf, dev_item, 0); - ptr = (unsigned long)btrfs_device_uuid(dev_item); + ptr = btrfs_device_uuid(dev_item); write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); - ptr = (unsigned long)btrfs_device_fsid(dev_item); + ptr = btrfs_device_fsid(dev_item); write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE); btrfs_mark_buffer_dirty(leaf); @@ -1562,7 +1596,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) clear_super = true; } + mutex_unlock(&uuid_mutex); ret = btrfs_shrink_device(device, 0); + mutex_lock(&uuid_mutex); if (ret) goto error_undo; @@ -1586,7 +1622,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) /* * the device list mutex makes sure that we don't change * the device list while someone else is writing out all - * the device supers. + * the device supers. Whoever is writing all supers, should + * lock the device list mutex before getting the number of + * devices in the super block (super_copy). Conversely, + * whoever updates the number of devices in the super block + * (super_copy) should hold the device list mutex. */ cur_devices = device->fs_devices; @@ -1610,10 +1650,10 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) device->fs_devices->open_devices--; call_rcu(&device->rcu, free_device); - mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1; btrfs_set_super_num_devices(root->fs_info->super_copy, num_devices); + mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); if (cur_devices->open_devices == 0) { struct btrfs_fs_devices *fs_devices; @@ -1793,9 +1833,9 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) if (!fs_devices->seeding) return -EINVAL; - seed_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); - if (!seed_devices) - return -ENOMEM; + seed_devices = __alloc_fs_devices(); + if (IS_ERR(seed_devices)) + return PTR_ERR(seed_devices); old_devices = clone_fs_devices(fs_devices); if (IS_ERR(old_devices)) { @@ -1814,7 +1854,6 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) mutex_lock(&root->fs_info->fs_devices->device_list_mutex); list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices, synchronize_rcu); - mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list); list_for_each_entry(device, &seed_devices->devices, dev_list) { @@ -1830,6 +1869,8 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) generate_random_uuid(fs_devices->fsid); memcpy(root->fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE); memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE); + mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); + super_flags = btrfs_super_flags(disk_super) & ~BTRFS_SUPER_FLAG_SEEDING; btrfs_set_super_flags(disk_super, super_flags); @@ -1889,11 +1930,9 @@ next_slot: dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); devid = btrfs_device_id(leaf, dev_item); - read_extent_buffer(leaf, dev_uuid, - (unsigned long)btrfs_device_uuid(dev_item), + read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE); - read_extent_buffer(leaf, fs_uuid, - (unsigned long)btrfs_device_fsid(dev_item), + read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE); device = btrfs_find_device(root->fs_info, devid, dev_uuid, fs_uuid); @@ -1956,10 +1995,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) } mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) { + device = btrfs_alloc_device(root->fs_info, NULL, NULL); + if (IS_ERR(device)) { /* we can safely leave the fs_devices entry around */ - ret = -ENOMEM; + ret = PTR_ERR(device); goto error; } @@ -1971,13 +2010,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) } rcu_assign_pointer(device->name, name); - ret = find_next_devid(root, &device->devid); - if (ret) { - rcu_string_free(device->name); - kfree(device); - goto error; - } - trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { rcu_string_free(device->name); @@ -1992,9 +2024,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) if (blk_queue_discard(q)) device->can_discard = 1; device->writeable = 1; - device->work.func = pending_bios_fn; - generate_random_uuid(device->uuid); - spin_lock_init(&device->io_lock); device->generation = trans->transid; device->io_width = root->sectorsize; device->io_align = root->sectorsize; @@ -2121,6 +2150,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, struct btrfs_fs_info *fs_info = root->fs_info; struct list_head *devices; struct rcu_string *name; + u64 devid = BTRFS_DEV_REPLACE_DEVID; int ret = 0; *device_out = NULL; @@ -2142,9 +2172,9 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, } } - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) { - ret = -ENOMEM; + device = btrfs_alloc_device(NULL, &devid, NULL); + if (IS_ERR(device)) { + ret = PTR_ERR(device); goto error; } @@ -2161,10 +2191,6 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, device->can_discard = 1; mutex_lock(&root->fs_info->fs_devices->device_list_mutex); device->writeable = 1; - device->work.func = pending_bios_fn; - generate_random_uuid(device->uuid); - device->devid = BTRFS_DEV_REPLACE_DEVID; - spin_lock_init(&device->io_lock); device->generation = 0; device->io_width = root->sectorsize; device->io_align = root->sectorsize; @@ -2971,10 +2997,6 @@ again: if (found_key.objectid != key.objectid) break; - /* chunk zero is special */ - if (found_key.offset == 0) - break; - chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); if (!counting) { @@ -3010,6 +3032,8 @@ again: spin_unlock(&fs_info->balance_lock); } loop: + if (found_key.offset == 0) + break; key.offset = found_key.offset - 1; } @@ -3074,9 +3098,6 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info) atomic_set(&fs_info->mutually_exclusive_operation_running, 0); } -void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, - struct btrfs_ioctl_balance_args *bargs); - /* * Should be called with both balance and volume mutexes held */ @@ -3139,7 +3160,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl, (bctl->data.target & ~allowed))) { printk(KERN_ERR "btrfs: unable to start balance with target " "data profile %llu\n", - (unsigned long long)bctl->data.target); + bctl->data.target); ret = -EINVAL; goto out; } @@ -3148,7 +3169,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl, (bctl->meta.target & ~allowed))) { printk(KERN_ERR "btrfs: unable to start balance with target " "metadata profile %llu\n", - (unsigned long long)bctl->meta.target); + bctl->meta.target); ret = -EINVAL; goto out; } @@ -3157,7 +3178,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl, (bctl->sys.target & ~allowed))) { printk(KERN_ERR "btrfs: unable to start balance with target " "system profile %llu\n", - (unsigned long long)bctl->sys.target); + bctl->sys.target); ret = -EINVAL; goto out; } @@ -3430,6 +3451,264 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info) return 0; } +static int btrfs_uuid_scan_kthread(void *data) +{ + struct btrfs_fs_info *fs_info = data; + struct btrfs_root *root = fs_info->tree_root; + struct btrfs_key key; + struct btrfs_key max_key; + struct btrfs_path *path = NULL; + int ret = 0; + struct extent_buffer *eb; + int slot; + struct btrfs_root_item root_item; + u32 item_size; + struct btrfs_trans_handle *trans = NULL; + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + key.objectid = 0; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = 0; + + max_key.objectid = (u64)-1; + max_key.type = BTRFS_ROOT_ITEM_KEY; + max_key.offset = (u64)-1; + + path->keep_locks = 1; + + while (1) { + ret = btrfs_search_forward(root, &key, &max_key, path, 0); + if (ret) { + if (ret > 0) + ret = 0; + break; + } + + if (key.type != BTRFS_ROOT_ITEM_KEY || + (key.objectid < BTRFS_FIRST_FREE_OBJECTID && + key.objectid != BTRFS_FS_TREE_OBJECTID) || + key.objectid > BTRFS_LAST_FREE_OBJECTID) + goto skip; + + eb = path->nodes[0]; + slot = path->slots[0]; + item_size = btrfs_item_size_nr(eb, slot); + if (item_size < sizeof(root_item)) + goto skip; + + read_extent_buffer(eb, &root_item, + btrfs_item_ptr_offset(eb, slot), + (int)sizeof(root_item)); + if (btrfs_root_refs(&root_item) == 0) + goto skip; + + if (!btrfs_is_empty_uuid(root_item.uuid) || + !btrfs_is_empty_uuid(root_item.received_uuid)) { + if (trans) + goto update_tree; + + btrfs_release_path(path); + /* + * 1 - subvol uuid item + * 1 - received_subvol uuid item + */ + trans = btrfs_start_transaction(fs_info->uuid_root, 2); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + break; + } + continue; + } else { + goto skip; + } +update_tree: + if (!btrfs_is_empty_uuid(root_item.uuid)) { + ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, + root_item.uuid, + BTRFS_UUID_KEY_SUBVOL, + key.objectid); + if (ret < 0) { + pr_warn("btrfs: uuid_tree_add failed %d\n", + ret); + break; + } + } + + if (!btrfs_is_empty_uuid(root_item.received_uuid)) { + ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, + root_item.received_uuid, + BTRFS_UUID_KEY_RECEIVED_SUBVOL, + key.objectid); + if (ret < 0) { + pr_warn("btrfs: uuid_tree_add failed %d\n", + ret); + break; + } + } + +skip: + if (trans) { + ret = btrfs_end_transaction(trans, fs_info->uuid_root); + trans = NULL; + if (ret) + break; + } + + btrfs_release_path(path); + if (key.offset < (u64)-1) { + key.offset++; + } else if (key.type < BTRFS_ROOT_ITEM_KEY) { + key.offset = 0; + key.type = BTRFS_ROOT_ITEM_KEY; + } else if (key.objectid < (u64)-1) { + key.offset = 0; + key.type = BTRFS_ROOT_ITEM_KEY; + key.objectid++; + } else { + break; + } + cond_resched(); + } + +out: + btrfs_free_path(path); + if (trans && !IS_ERR(trans)) + btrfs_end_transaction(trans, fs_info->uuid_root); + if (ret) + pr_warn("btrfs: btrfs_uuid_scan_kthread failed %d\n", ret); + else + fs_info->update_uuid_tree_gen = 1; + up(&fs_info->uuid_tree_rescan_sem); + return 0; +} + +/* + * Callback for btrfs_uuid_tree_iterate(). + * returns: + * 0 check succeeded, the entry is not outdated. + * < 0 if an error occured. + * > 0 if the check failed, which means the caller shall remove the entry. + */ +static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info, + u8 *uuid, u8 type, u64 subid) +{ + struct btrfs_key key; + int ret = 0; + struct btrfs_root *subvol_root; + + if (type != BTRFS_UUID_KEY_SUBVOL && + type != BTRFS_UUID_KEY_RECEIVED_SUBVOL) + goto out; + + key.objectid = subid; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + subvol_root = btrfs_read_fs_root_no_name(fs_info, &key); + if (IS_ERR(subvol_root)) { + ret = PTR_ERR(subvol_root); + if (ret == -ENOENT) + ret = 1; + goto out; + } + + switch (type) { + case BTRFS_UUID_KEY_SUBVOL: + if (memcmp(uuid, subvol_root->root_item.uuid, BTRFS_UUID_SIZE)) + ret = 1; + break; + case BTRFS_UUID_KEY_RECEIVED_SUBVOL: + if (memcmp(uuid, subvol_root->root_item.received_uuid, + BTRFS_UUID_SIZE)) + ret = 1; + break; + } + +out: + return ret; +} + +static int btrfs_uuid_rescan_kthread(void *data) +{ + struct btrfs_fs_info *fs_info = (struct btrfs_fs_info *)data; + int ret; + + /* + * 1st step is to iterate through the existing UUID tree and + * to delete all entries that contain outdated data. + * 2nd step is to add all missing entries to the UUID tree. + */ + ret = btrfs_uuid_tree_iterate(fs_info, btrfs_check_uuid_tree_entry); + if (ret < 0) { + pr_warn("btrfs: iterating uuid_tree failed %d\n", ret); + up(&fs_info->uuid_tree_rescan_sem); + return ret; + } + return btrfs_uuid_scan_kthread(data); +} + +int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_root *uuid_root; + struct task_struct *task; + int ret; + + /* + * 1 - root node + * 1 - root item + */ + trans = btrfs_start_transaction(tree_root, 2); + if (IS_ERR(trans)) + return PTR_ERR(trans); + + uuid_root = btrfs_create_tree(trans, fs_info, + BTRFS_UUID_TREE_OBJECTID); + if (IS_ERR(uuid_root)) { + btrfs_abort_transaction(trans, tree_root, + PTR_ERR(uuid_root)); + return PTR_ERR(uuid_root); + } + + fs_info->uuid_root = uuid_root; + + ret = btrfs_commit_transaction(trans, tree_root); + if (ret) + return ret; + + down(&fs_info->uuid_tree_rescan_sem); + task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid"); + if (IS_ERR(task)) { + /* fs_info->update_uuid_tree_gen remains 0 in all error case */ + pr_warn("btrfs: failed to start uuid_scan task\n"); + up(&fs_info->uuid_tree_rescan_sem); + return PTR_ERR(task); + } + + return 0; +} + +int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info) +{ + struct task_struct *task; + + down(&fs_info->uuid_tree_rescan_sem); + task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid"); + if (IS_ERR(task)) { + /* fs_info->update_uuid_tree_gen remains 0 in all error case */ + pr_warn("btrfs: failed to start uuid_rescan task\n"); + up(&fs_info->uuid_tree_rescan_sem); + return PTR_ERR(task); + } + + return 0; +} + /* * shrinking a device means finding all of the device extents past * the new size, and then following the back refs to the chunks. @@ -4194,13 +4473,13 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) * and exit, so return 1 so the callers don't try to use other copies. */ if (!em) { - btrfs_emerg(fs_info, "No mapping for %Lu-%Lu\n", logical, + btrfs_crit(fs_info, "No mapping for %Lu-%Lu\n", logical, logical+len); return 1; } if (em->start > logical || em->start + em->len < logical) { - btrfs_emerg(fs_info, "Invalid mapping for %Lu-%Lu, got " + btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got " "%Lu-%Lu\n", logical, logical+len, em->start, em->start + em->len); return 1; @@ -4375,8 +4654,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, if (!em) { btrfs_crit(fs_info, "unable to find logical %llu len %llu", - (unsigned long long)logical, - (unsigned long long)*length); + logical, *length); return -EINVAL; } @@ -4671,6 +4949,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, } bbio = kzalloc(btrfs_bio_size(num_alloc_stripes), GFP_NOFS); if (!bbio) { + kfree(raid_map); ret = -ENOMEM; goto out; } @@ -5246,9 +5525,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, if (map_length < length) { btrfs_crit(root->fs_info, "mapping failed logical %llu bio len %llu len %llu", - (unsigned long long)logical, - (unsigned long long)length, - (unsigned long long)map_length); + logical, length, map_length); BUG(); } @@ -5314,23 +5591,72 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root, struct btrfs_device *device; struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) + device = btrfs_alloc_device(NULL, &devid, dev_uuid); + if (IS_ERR(device)) return NULL; - list_add(&device->dev_list, - &fs_devices->devices); - device->devid = devid; - device->work.func = pending_bios_fn; + + list_add(&device->dev_list, &fs_devices->devices); device->fs_devices = fs_devices; - device->missing = 1; fs_devices->num_devices++; + + device->missing = 1; fs_devices->missing_devices++; - spin_lock_init(&device->io_lock); - INIT_LIST_HEAD(&device->dev_alloc_list); - memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE); + return device; } +/** + * btrfs_alloc_device - allocate struct btrfs_device + * @fs_info: used only for generating a new devid, can be NULL if + * devid is provided (i.e. @devid != NULL). + * @devid: a pointer to devid for this device. If NULL a new devid + * is generated. + * @uuid: a pointer to UUID for this device. If NULL a new UUID + * is generated. + * + * Return: a pointer to a new &struct btrfs_device on success; ERR_PTR() + * on error. Returned struct is not linked onto any lists and can be + * destroyed with kfree() right away. + */ +struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, + const u64 *devid, + const u8 *uuid) +{ + struct btrfs_device *dev; + u64 tmp; + + if (!devid && !fs_info) { + WARN_ON(1); + return ERR_PTR(-EINVAL); + } + + dev = __alloc_device(); + if (IS_ERR(dev)) + return dev; + + if (devid) + tmp = *devid; + else { + int ret; + + ret = find_next_devid(fs_info, &tmp); + if (ret) { + kfree(dev); + return ERR_PTR(ret); + } + } + dev->devid = tmp; + + if (uuid) + memcpy(dev->uuid, uuid, BTRFS_UUID_SIZE); + else + generate_random_uuid(dev->uuid); + + dev->work.func = pending_bios_fn; + + return dev; +} + static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, struct extent_buffer *leaf, struct btrfs_chunk *chunk) @@ -5437,7 +5763,7 @@ static void fill_device_from_item(struct extent_buffer *leaf, WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID); device->is_tgtdev_for_dev_replace = 0; - ptr = (unsigned long)btrfs_device_uuid(dev_item); + ptr = btrfs_device_uuid(dev_item); read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); } @@ -5500,11 +5826,9 @@ static int read_one_dev(struct btrfs_root *root, u8 dev_uuid[BTRFS_UUID_SIZE]; devid = btrfs_device_id(leaf, dev_item); - read_extent_buffer(leaf, dev_uuid, - (unsigned long)btrfs_device_uuid(dev_item), + read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE); - read_extent_buffer(leaf, fs_uuid, - (unsigned long)btrfs_device_fsid(dev_item), + read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE); if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) { @@ -5519,8 +5843,7 @@ static int read_one_dev(struct btrfs_root *root, return -EIO; if (!device) { - btrfs_warn(root->fs_info, "devid %llu missing", - (unsigned long long)devid); + btrfs_warn(root->fs_info, "devid %llu missing", devid); device = add_missing_dev(root, devid, dev_uuid); if (!device) return -ENOMEM; @@ -5644,14 +5967,15 @@ int btrfs_read_chunk_tree(struct btrfs_root *root) mutex_lock(&uuid_mutex); lock_chunks(root); - /* first we search for all of the device items, and then we - * read in all of the chunk items. This way we can create chunk - * mappings that reference all of the devices that are afound + /* + * Read all device items, and then all the chunk items. All + * device items are found before any chunk item (their object id + * is smaller than the lowest possible object id for a chunk + * item - BTRFS_FIRST_CHUNK_TREE_OBJECTID). */ key.objectid = BTRFS_DEV_ITEMS_OBJECTID; key.offset = 0; key.type = 0; -again: ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto error; @@ -5667,17 +5991,13 @@ again: break; } btrfs_item_key_to_cpu(leaf, &found_key, slot); - if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) { - if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID) - break; - if (found_key.type == BTRFS_DEV_ITEM_KEY) { - struct btrfs_dev_item *dev_item; - dev_item = btrfs_item_ptr(leaf, slot, + if (found_key.type == BTRFS_DEV_ITEM_KEY) { + struct btrfs_dev_item *dev_item; + dev_item = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item); - ret = read_one_dev(root, leaf, dev_item); - if (ret) - goto error; - } + ret = read_one_dev(root, leaf, dev_item); + if (ret) + goto error; } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) { struct btrfs_chunk *chunk; chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); @@ -5687,11 +6007,6 @@ again: } path->slots[0]++; } - if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) { - key.objectid = 0; - btrfs_release_path(path); - goto again; - } ret = 0; error: unlock_chunks(root); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 86705583480d..b72f540c8b29 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -152,6 +152,8 @@ struct btrfs_fs_devices { int rotating; }; +#define BTRFS_BIO_INLINE_CSUM_SIZE 64 + /* * we need the mirror number and stripe index to be passed around * the call chain while we are processing end_io (especially errors). @@ -161,9 +163,14 @@ struct btrfs_fs_devices { * we allocate are actually btrfs_io_bios. We'll cram as much of * struct btrfs_bio as we can into this over time. */ +typedef void (btrfs_io_bio_end_io_t) (struct btrfs_io_bio *bio, int err); struct btrfs_io_bio { unsigned long mirror_num; unsigned long stripe_index; + u8 *csum; + u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE]; + u8 *csum_allocated; + btrfs_io_bio_end_io_t *end_io; struct bio bio; }; @@ -298,6 +305,9 @@ void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info, int btrfs_find_device_missing_or_by_path(struct btrfs_root *root, char *device_path, struct btrfs_device **device); +struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, + const u64 *devid, + const u8 *uuid); int btrfs_rm_device(struct btrfs_root *root, char *device_path); void btrfs_cleanup_fs_uuids(void); int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len); @@ -315,6 +325,8 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info); int btrfs_recover_balance(struct btrfs_fs_info *fs_info); int btrfs_pause_balance(struct btrfs_fs_info *fs_info); int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); +int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info); +int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info); int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); int find_free_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 num_bytes, diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index aa0d68b086eb..1964d212ab08 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_CIFS) += cifs.o cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \ link.o misc.o netmisc.o smbencrypt.o transport.o asn1.o \ cifs_unicode.o nterr.o xattr.o cifsencrypt.o \ - readdir.o ioctl.o sess.o export.o smb1ops.o + readdir.o ioctl.o sess.o export.o smb1ops.o winucase.o cifs-$(CONFIG_CIFS_ACL) += cifsacl.o diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index fe8d6276410a..d8eac3b6cefb 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h @@ -91,6 +91,8 @@ extern __le16 *cifs_strndup_to_utf16(const char *src, const int maxlen, #endif /* CONFIG_CIFS_SMB2 */ #endif +wchar_t cifs_toupper(wchar_t in); + /* * UniStrcat: Concatenate the second string to the first * diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 85ea98d139fc..a16b4e58bcc6 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -255,6 +255,7 @@ cifs_alloc_inode(struct super_block *sb) cifs_inode->server_eof = 0; cifs_inode->uniqueid = 0; cifs_inode->createtime = 0; + cifs_inode->epoch = 0; #ifdef CONFIG_CIFS_SMB2 get_random_bytes(cifs_inode->lease_key, SMB2_LEASE_KEY_SIZE); #endif @@ -357,6 +358,18 @@ cifs_show_cache_flavor(struct seq_file *s, struct cifs_sb_info *cifs_sb) seq_printf(s, "loose"); } +static void +cifs_show_nls(struct seq_file *s, struct nls_table *cur) +{ + struct nls_table *def; + + /* Display iocharset= option if it's not default charset */ + def = load_nls_default(); + if (def != cur) + seq_printf(s, ",iocharset=%s", cur->charset); + unload_nls(def); +} + /* * cifs_show_options() is for displaying mount options in /proc/mounts. * Not all settable options are displayed but most of the important @@ -418,6 +431,9 @@ cifs_show_options(struct seq_file *s, struct dentry *root) seq_printf(s, ",file_mode=0%ho,dir_mode=0%ho", cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode); + + cifs_show_nls(s, cifs_sb->local_nls); + if (tcon->seal) seq_printf(s, ",seal"); if (tcon->nocase) @@ -718,7 +734,7 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, written = generic_file_aio_write(iocb, iov, nr_segs, pos); - if (CIFS_I(inode)->clientCanCacheAll) + if (CIFS_CACHE_WRITE(CIFS_I(inode))) return written; rc = filemap_fdatawrite(inode->i_mapping); @@ -743,7 +759,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int whence) * We need to be sure that all dirty pages are written and the * server has the newest file length. */ - if (!CIFS_I(inode)->clientCanCacheRead && inode->i_mapping && + if (!CIFS_CACHE_READ(CIFS_I(inode)) && inode->i_mapping && inode->i_mapping->nrpages != 0) { rc = filemap_fdatawait(inode->i_mapping); if (rc) { @@ -767,8 +783,10 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int whence) static int cifs_setlease(struct file *file, long arg, struct file_lock **lease) { - /* note that this is called by vfs setlease with i_lock held - to protect *lease from going away */ + /* + * Note that this is called by vfs setlease with i_lock held to + * protect *lease from going away. + */ struct inode *inode = file_inode(file); struct cifsFileInfo *cfile = file->private_data; @@ -776,20 +794,19 @@ static int cifs_setlease(struct file *file, long arg, struct file_lock **lease) return -EINVAL; /* check if file is oplocked */ - if (((arg == F_RDLCK) && - (CIFS_I(inode)->clientCanCacheRead)) || - ((arg == F_WRLCK) && - (CIFS_I(inode)->clientCanCacheAll))) + if (((arg == F_RDLCK) && CIFS_CACHE_READ(CIFS_I(inode))) || + ((arg == F_WRLCK) && CIFS_CACHE_WRITE(CIFS_I(inode)))) return generic_setlease(file, arg, lease); else if (tlink_tcon(cfile->tlink)->local_lease && - !CIFS_I(inode)->clientCanCacheRead) - /* If the server claims to support oplock on this - file, then we still need to check oplock even - if the local_lease mount option is set, but there - are servers which do not support oplock for which - this mount option may be useful if the user - knows that the file won't be changed on the server - by anyone else */ + !CIFS_CACHE_READ(CIFS_I(inode))) + /* + * If the server claims to support oplock on this file, then we + * still need to check oplock even if the local_lease mount + * option is set, but there are servers which do not support + * oplock for which this mount option may be useful if the user + * knows that the file won't be changed on the server by anyone + * else. + */ return generic_setlease(file, arg, lease); else return -EAGAIN; diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 52ca861ed35e..cfa14c80ef3b 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -28,6 +28,7 @@ #include "cifsacl.h" #include <crypto/internal/hash.h> #include <linux/scatterlist.h> +#include <uapi/linux/cifs/cifs_mount.h> #ifdef CONFIG_CIFS_SMB2 #include "smb2pdu.h" #endif @@ -41,12 +42,7 @@ #define MAX_SES_INFO 2 #define MAX_TCON_INFO 4 -#define MAX_TREE_SIZE (2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1) -#define MAX_SERVER_SIZE 15 -#define MAX_SHARE_SIZE 80 -#define CIFS_MAX_DOMAINNAME_LEN 256 /* max domain name length */ -#define MAX_USERNAME_SIZE 256 /* reasonable maximum for current servers */ -#define MAX_PASSWORD_SIZE 512 /* max for windows seems to be 256 wide chars */ +#define MAX_TREE_SIZE (2 + CIFS_NI_MAXHOST + 1 + CIFS_MAX_SHARE_LEN + 1) #define CIFS_MIN_RCV_POOL 4 @@ -135,6 +131,7 @@ struct cifs_secmech { /* per smb session structure/fields */ struct ntlmssp_auth { + bool sesskey_per_smbsess; /* whether session key is per smb session */ __u32 client_flags; /* sent by client in type 1 ntlmsssp exchange */ __u32 server_flags; /* sent by server in type 2 ntlmssp exchange */ unsigned char ciphertext[CIFS_CPHTXT_SIZE]; /* sent to server */ @@ -308,6 +305,9 @@ struct smb_version_operations { int (*create_hardlink)(const unsigned int, struct cifs_tcon *, const char *, const char *, struct cifs_sb_info *); + /* query symlink target */ + int (*query_symlink)(const unsigned int, struct cifs_tcon *, + const char *, char **, struct cifs_sb_info *); /* open a file for non-posix mounts */ int (*open)(const unsigned int, struct cifs_open_parms *, __u32 *, FILE_ALL_INFO *); @@ -361,18 +361,24 @@ struct smb_version_operations { /* push brlocks from the cache to the server */ int (*push_mand_locks)(struct cifsFileInfo *); /* get lease key of the inode */ - void (*get_lease_key)(struct inode *, struct cifs_fid *fid); + void (*get_lease_key)(struct inode *, struct cifs_fid *); /* set lease key of the inode */ - void (*set_lease_key)(struct inode *, struct cifs_fid *fid); + void (*set_lease_key)(struct inode *, struct cifs_fid *); /* generate new lease key */ - void (*new_lease_key)(struct cifs_fid *fid); - /* The next two functions will need to be changed to per smb session */ - void (*generate_signingkey)(struct TCP_Server_Info *server); - int (*calc_signature)(struct smb_rqst *rqst, - struct TCP_Server_Info *server); - int (*query_mf_symlink)(const unsigned char *path, char *pbuf, - unsigned int *pbytes_read, struct cifs_sb_info *cifs_sb, - unsigned int xid); + void (*new_lease_key)(struct cifs_fid *); + int (*generate_signingkey)(struct cifs_ses *); + int (*calc_signature)(struct smb_rqst *, struct TCP_Server_Info *); + int (*query_mf_symlink)(const unsigned char *, char *, unsigned int *, + struct cifs_sb_info *, unsigned int); + /* if we can do cache read operations */ + bool (*is_read_op)(__u32); + /* set oplock level for the inode */ + void (*set_oplock_level)(struct cifsInodeInfo *, __u32, unsigned int, + bool *); + /* create lease context buffer for CREATE request */ + char * (*create_lease_buf)(u8 *, u8); + /* parse lease context buffer and return oplock/epoch info */ + __u8 (*parse_lease_buf)(void *, unsigned int *); }; struct smb_version_values { @@ -390,9 +396,9 @@ struct smb_version_values { unsigned int cap_unix; unsigned int cap_nt_find; unsigned int cap_large_files; - unsigned int oplock_read; __u16 signing_enabled; __u16 signing_required; + size_t create_lease_size; }; #define HEADER_SIZE(server) (server->vals->header_size) @@ -548,7 +554,6 @@ struct TCP_Server_Info { int timeAdj; /* Adjust for difference in server time zone in sec */ __u64 CurrentMid; /* multiplex id - rotating counter */ char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */ - char smb3signingkey[SMB3_SIGN_KEY_SIZE]; /* for signing smb3 packets */ /* 16th byte of RFC1001 workstation name is always null */ char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; __u32 sequence_number; /* for signing, protected by srv_mutex */ @@ -731,6 +736,7 @@ struct cifs_ses { bool need_reconnect:1; /* connection reset, uid now invalid */ #ifdef CONFIG_CIFS_SMB2 __u16 session_flags; + char smb3signingkey[SMB3_SIGN_KEY_SIZE]; /* for signing smb3 packets */ #endif /* CONFIG_CIFS_SMB2 */ }; @@ -935,6 +941,8 @@ struct cifs_fid { __u8 lease_key[SMB2_LEASE_KEY_SIZE]; /* lease key for smb2 */ #endif struct cifs_pending_open *pending_open; + unsigned int epoch; + bool purge_cache; }; struct cifs_fid_locks { @@ -1032,6 +1040,17 @@ cifsFileInfo_get_locked(struct cifsFileInfo *cifs_file) struct cifsFileInfo *cifsFileInfo_get(struct cifsFileInfo *cifs_file); void cifsFileInfo_put(struct cifsFileInfo *cifs_file); +#define CIFS_CACHE_READ_FLG 1 +#define CIFS_CACHE_HANDLE_FLG 2 +#define CIFS_CACHE_RH_FLG (CIFS_CACHE_READ_FLG | CIFS_CACHE_HANDLE_FLG) +#define CIFS_CACHE_WRITE_FLG 4 +#define CIFS_CACHE_RW_FLG (CIFS_CACHE_READ_FLG | CIFS_CACHE_WRITE_FLG) +#define CIFS_CACHE_RHW_FLG (CIFS_CACHE_RW_FLG | CIFS_CACHE_HANDLE_FLG) + +#define CIFS_CACHE_READ(cinode) (cinode->oplock & CIFS_CACHE_READ_FLG) +#define CIFS_CACHE_HANDLE(cinode) (cinode->oplock & CIFS_CACHE_HANDLE_FLG) +#define CIFS_CACHE_WRITE(cinode) (cinode->oplock & CIFS_CACHE_WRITE_FLG) + /* * One of these for each file inode */ @@ -1043,8 +1062,8 @@ struct cifsInodeInfo { /* BB add in lists for dirty pages i.e. write caching info for oplock */ struct list_head openFileList; __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ - bool clientCanCacheRead; /* read oplock */ - bool clientCanCacheAll; /* read and writebehind oplock */ + unsigned int oplock; /* oplock/lease level we have */ + unsigned int epoch; /* used to track lease state changes */ bool delete_pending; /* DELETE_ON_CLOSE is set */ bool invalid_mapping; /* pagecache is invalid */ unsigned long time; /* jiffies of last update of inode */ @@ -1502,7 +1521,7 @@ extern mempool_t *cifs_mid_poolp; extern struct smb_version_operations smb1_operations; extern struct smb_version_values smb1_values; #define SMB20_VERSION_STRING "2.0" -/*extern struct smb_version_operations smb20_operations; */ /* not needed yet */ +extern struct smb_version_operations smb20_operations; extern struct smb_version_values smb20_values; #define SMB21_VERSION_STRING "2.1" extern struct smb_version_operations smb21_operations; diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 11ca24a8e054..948676db8e2e 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -1495,11 +1495,12 @@ struct reparse_data { __u32 ReparseTag; __u16 ReparseDataLength; __u16 Reserved; - __u16 AltNameOffset; - __u16 AltNameLen; - __u16 TargetNameOffset; - __u16 TargetNameLen; - char LinkNamesBuf[1]; + __u16 SubstituteNameOffset; + __u16 SubstituteNameLength; + __u16 PrintNameOffset; + __u16 PrintNameLength; + __u32 Flags; + char PathBuffer[0]; } __attribute__((packed)); struct cifs_quota_data { diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index b29a012bed33..b5ec2a268f56 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -357,13 +357,9 @@ extern int CIFSSMBUnixQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, char **syminfo, const struct nls_table *nls_codepage); -#ifdef CONFIG_CIFS_SYMLINK_EXPERIMENTAL -extern int CIFSSMBQueryReparseLinkInfo(const unsigned int xid, - struct cifs_tcon *tcon, - const unsigned char *searchName, - char *symlinkinfo, const int buflen, __u16 fid, - const struct nls_table *nls_codepage); -#endif /* temporarily unused until cifs_symlink fixed */ +extern int CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, + __u16 fid, char **symlinkinfo, + const struct nls_table *nls_codepage); extern int CIFSSMBOpen(const unsigned int xid, struct cifs_tcon *tcon, const char *fileName, const int disposition, const int access_flags, const int omode, @@ -435,7 +431,7 @@ extern int setup_ntlm_response(struct cifs_ses *, const struct nls_table *); extern int setup_ntlmv2_rsp(struct cifs_ses *, const struct nls_table *); extern void cifs_crypto_shash_release(struct TCP_Server_Info *); extern int calc_seckey(struct cifs_ses *); -extern void generate_smb3signingkey(struct TCP_Server_Info *); +extern int generate_smb3signingkey(struct cifs_ses *); #ifdef CONFIG_CIFS_WEAK_PW_HASH extern int calc_lanman_hash(const char *password, const char *cryptkey, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index a89c4cb4e6cf..a3d74fea1623 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -3067,7 +3067,6 @@ querySymLinkRetry: return rc; } -#ifdef CONFIG_CIFS_SYMLINK_EXPERIMENTAL /* * Recent Windows versions now create symlinks more frequently * and they use the "reparse point" mechanism below. We can of course @@ -3079,18 +3078,22 @@ querySymLinkRetry: * it is not compiled in by default until callers fixed up and more tested. */ int -CIFSSMBQueryReparseLinkInfo(const unsigned int xid, struct cifs_tcon *tcon, - const unsigned char *searchName, - char *symlinkinfo, const int buflen, __u16 fid, - const struct nls_table *nls_codepage) +CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, + __u16 fid, char **symlinkinfo, + const struct nls_table *nls_codepage) { int rc = 0; int bytes_returned; struct smb_com_transaction_ioctl_req *pSMB; struct smb_com_transaction_ioctl_rsp *pSMBr; + bool is_unicode; + unsigned int sub_len; + char *sub_start; + struct reparse_data *reparse_buf; + __u32 data_offset, data_count; + char *end_of_smb; - cifs_dbg(FYI, "In Windows reparse style QueryLink for path %s\n", - searchName); + cifs_dbg(FYI, "In Windows reparse style QueryLink for fid %u\n", fid); rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB, (void **) &pSMBr); if (rc) @@ -3119,66 +3122,55 @@ CIFSSMBQueryReparseLinkInfo(const unsigned int xid, struct cifs_tcon *tcon, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { cifs_dbg(FYI, "Send error in QueryReparseLinkInfo = %d\n", rc); - } else { /* decode response */ - __u32 data_offset = le32_to_cpu(pSMBr->DataOffset); - __u32 data_count = le32_to_cpu(pSMBr->DataCount); - if (get_bcc(&pSMBr->hdr) < 2 || data_offset > 512) { - /* BB also check enough total bytes returned */ - rc = -EIO; /* bad smb */ - goto qreparse_out; - } - if (data_count && (data_count < 2048)) { - char *end_of_smb = 2 /* sizeof byte count */ + - get_bcc(&pSMBr->hdr) + (char *)&pSMBr->ByteCount; - - struct reparse_data *reparse_buf = - (struct reparse_data *) - ((char *)&pSMBr->hdr.Protocol - + data_offset); - if ((char *)reparse_buf >= end_of_smb) { - rc = -EIO; - goto qreparse_out; - } - if ((reparse_buf->LinkNamesBuf + - reparse_buf->TargetNameOffset + - reparse_buf->TargetNameLen) > end_of_smb) { - cifs_dbg(FYI, "reparse buf beyond SMB\n"); - rc = -EIO; - goto qreparse_out; - } + goto qreparse_out; + } - if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) { - cifs_from_ucs2(symlinkinfo, (__le16 *) - (reparse_buf->LinkNamesBuf + - reparse_buf->TargetNameOffset), - buflen, - reparse_buf->TargetNameLen, - nls_codepage, 0); - } else { /* ASCII names */ - strncpy(symlinkinfo, - reparse_buf->LinkNamesBuf + - reparse_buf->TargetNameOffset, - min_t(const int, buflen, - reparse_buf->TargetNameLen)); - } - } else { - rc = -EIO; - cifs_dbg(FYI, "Invalid return data count on get reparse info ioctl\n"); - } - symlinkinfo[buflen] = 0; /* just in case so the caller - does not go off the end of the buffer */ - cifs_dbg(FYI, "readlink result - %s\n", symlinkinfo); + data_offset = le32_to_cpu(pSMBr->DataOffset); + data_count = le32_to_cpu(pSMBr->DataCount); + if (get_bcc(&pSMBr->hdr) < 2 || data_offset > 512) { + /* BB also check enough total bytes returned */ + rc = -EIO; /* bad smb */ + goto qreparse_out; + } + if (!data_count || (data_count > 2048)) { + rc = -EIO; + cifs_dbg(FYI, "Invalid return data count on get reparse info ioctl\n"); + goto qreparse_out; + } + end_of_smb = 2 + get_bcc(&pSMBr->hdr) + (char *)&pSMBr->ByteCount; + reparse_buf = (struct reparse_data *) + ((char *)&pSMBr->hdr.Protocol + data_offset); + if ((char *)reparse_buf >= end_of_smb) { + rc = -EIO; + goto qreparse_out; } + if ((reparse_buf->PathBuffer + reparse_buf->PrintNameOffset + + reparse_buf->PrintNameLength) > end_of_smb) { + cifs_dbg(FYI, "reparse buf beyond SMB\n"); + rc = -EIO; + goto qreparse_out; + } + sub_start = reparse_buf->SubstituteNameOffset + reparse_buf->PathBuffer; + sub_len = reparse_buf->SubstituteNameLength; + if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) + is_unicode = true; + else + is_unicode = false; + /* BB FIXME investigate remapping reserved chars here */ + *symlinkinfo = cifs_strndup_from_utf16(sub_start, sub_len, is_unicode, + nls_codepage); + if (!*symlinkinfo) + rc = -ENOMEM; qreparse_out: cifs_buf_release(pSMB); - /* Note: On -EAGAIN error only caller can retry on handle based calls - since file handle passed in no longer valid */ - + /* + * Note: On -EAGAIN error only caller can retry on handle based calls + * since file handle passed in no longer valid. + */ return rc; } -#endif /* CIFS_SYMLINK_EXPERIMENTAL */ /* BB temporarily unused */ #ifdef CONFIG_CIFS_POSIX diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index d67c550c4980..a279ffc0bc29 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -379,6 +379,7 @@ cifs_reconnect(struct TCP_Server_Info *server) try_to_freeze(); /* we should try only the port we connected to before */ + mutex_lock(&server->srv_mutex); rc = generic_ip_connect(server); if (rc) { cifs_dbg(FYI, "reconnect error %d\n", rc); @@ -390,6 +391,7 @@ cifs_reconnect(struct TCP_Server_Info *server) server->tcpStatus = CifsNeedNegotiate; spin_unlock(&GlobalMid_Lock); } + mutex_unlock(&server->srv_mutex); } while (server->tcpStatus == CifsNeedReconnect); return rc; @@ -1114,7 +1116,7 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol) break; #ifdef CONFIG_CIFS_SMB2 case Smb_20: - vol->ops = &smb21_operations; /* currently identical with 2.1 */ + vol->ops = &smb20_operations; vol->vals = &smb20_values; break; case Smb_21: @@ -1575,8 +1577,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (string == NULL) goto out_nomem; - if (strnlen(string, MAX_USERNAME_SIZE) > - MAX_USERNAME_SIZE) { + if (strnlen(string, CIFS_MAX_USERNAME_LEN) > + CIFS_MAX_USERNAME_LEN) { printk(KERN_WARNING "CIFS: username too long\n"); goto cifs_parse_mount_err; } @@ -2221,13 +2223,13 @@ static int match_session(struct cifs_ses *ses, struct smb_vol *vol) /* anything else takes username/password */ if (strncmp(ses->user_name, vol->username ? vol->username : "", - MAX_USERNAME_SIZE)) + CIFS_MAX_USERNAME_LEN)) return 0; if (strlen(vol->username) != 0 && ses->password != NULL && strncmp(ses->password, vol->password ? vol->password : "", - MAX_PASSWORD_SIZE)) + CIFS_MAX_PASSWORD_LEN)) return 0; } return 1; @@ -2352,7 +2354,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) } len = delim - payload; - if (len > MAX_USERNAME_SIZE || len <= 0) { + if (len > CIFS_MAX_USERNAME_LEN || len <= 0) { cifs_dbg(FYI, "Bad value from username search (len=%zd)\n", len); rc = -EINVAL; @@ -2369,7 +2371,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) cifs_dbg(FYI, "%s: username=%s\n", __func__, vol->username); len = key->datalen - (len + 1); - if (len > MAX_PASSWORD_SIZE || len <= 0) { + if (len > CIFS_MAX_PASSWORD_LEN || len <= 0) { cifs_dbg(FYI, "Bad len for password search (len=%zd)\n", len); rc = -EINVAL; kfree(vol->username); @@ -3826,33 +3828,8 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, if (server->ops->sess_setup) rc = server->ops->sess_setup(xid, ses, nls_info); - if (rc) { + if (rc) cifs_dbg(VFS, "Send error in SessSetup = %d\n", rc); - } else { - mutex_lock(&server->srv_mutex); - if (!server->session_estab) { - server->session_key.response = ses->auth_key.response; - server->session_key.len = ses->auth_key.len; - server->sequence_number = 0x2; - server->session_estab = true; - ses->auth_key.response = NULL; - if (server->ops->generate_signingkey) - server->ops->generate_signingkey(server); - } - mutex_unlock(&server->srv_mutex); - - cifs_dbg(FYI, "CIFS Session Established successfully\n"); - spin_lock(&GlobalMid_Lock); - ses->status = CifsGood; - ses->need_reconnect = false; - spin_unlock(&GlobalMid_Lock); - } - - kfree(ses->auth_key.response); - ses->auth_key.response = NULL; - ses->auth_key.len = 0; - kfree(ses->ntlmssp); - ses->ntlmssp = NULL; return rc; } diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index d62ce0d48141..5384c2a640ca 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -32,6 +32,7 @@ #include "cifsproto.h" #include "cifs_debug.h" #include "cifs_fs_sb.h" +#include "cifs_unicode.h" static void renew_parental_timestamps(struct dentry *direntry) @@ -499,6 +500,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, if (server->ops->close) server->ops->close(xid, tcon, &fid); cifs_del_pending_open(&open); + fput(file); rc = -ENOMEM; } @@ -834,12 +836,17 @@ static int cifs_ci_hash(const struct dentry *dentry, struct qstr *q) { struct nls_table *codepage = CIFS_SB(dentry->d_sb)->local_nls; unsigned long hash; - int i; + wchar_t c; + int i, charlen; hash = init_name_hash(); - for (i = 0; i < q->len; i++) - hash = partial_name_hash(nls_tolower(codepage, q->name[i]), - hash); + for (i = 0; i < q->len; i += charlen) { + charlen = codepage->char2uni(&q->name[i], q->len - i, &c); + /* error out if we can't convert the character */ + if (unlikely(charlen < 0)) + return charlen; + hash = partial_name_hash(cifs_toupper(c), hash); + } q->hash = end_name_hash(hash); return 0; @@ -849,11 +856,47 @@ static int cifs_ci_compare(const struct dentry *parent, const struct dentry *den unsigned int len, const char *str, const struct qstr *name) { struct nls_table *codepage = CIFS_SB(parent->d_sb)->local_nls; + wchar_t c1, c2; + int i, l1, l2; - if ((name->len == len) && - (nls_strnicmp(codepage, name->name, str, len) == 0)) - return 0; - return 1; + /* + * We make the assumption here that uppercase characters in the local + * codepage are always the same length as their lowercase counterparts. + * + * If that's ever not the case, then this will fail to match it. + */ + if (name->len != len) + return 1; + + for (i = 0; i < len; i += l1) { + /* Convert characters in both strings to UTF-16. */ + l1 = codepage->char2uni(&str[i], len - i, &c1); + l2 = codepage->char2uni(&name->name[i], name->len - i, &c2); + + /* + * If we can't convert either character, just declare it to + * be 1 byte long and compare the original byte. + */ + if (unlikely(l1 < 0 && l2 < 0)) { + if (str[i] != name->name[i]) + return 1; + l1 = 1; + continue; + } + + /* + * Here, we again ass|u|me that upper/lowercase versions of + * a character are the same length in the local NLS. + */ + if (l1 != l2) + return 1; + + /* Now compare uppercase versions of these characters */ + if (cifs_toupper(c1) != cifs_toupper(c2)) + return 1; + } + + return 0; } const struct dentry_operations cifs_ci_dentry_ops = { diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 9d0dd952ad79..eb955b525e55 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -313,8 +313,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, * If the server returned a read oplock and we have mandatory brlocks, * set oplock level to None. */ - if (oplock == server->vals->oplock_read && - cifs_has_mand_locks(cinode)) { + if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) { cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n"); oplock = 0; } @@ -324,6 +323,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, oplock = fid->pending_open->oplock; list_del(&fid->pending_open->olist); + fid->purge_cache = false; server->ops->set_fid(cfile, fid, oplock); list_add(&cfile->tlist, &tcon->openFileList); @@ -334,6 +334,9 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, list_add_tail(&cfile->flist, &cinode->openFileList); spin_unlock(&cifs_file_list_lock); + if (fid->purge_cache) + cifs_invalidate_mapping(inode); + file->private_data = cfile; return cfile; } @@ -1524,12 +1527,12 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, * read won't conflict with non-overlapted locks due to * pagereading. */ - if (!CIFS_I(inode)->clientCanCacheAll && - CIFS_I(inode)->clientCanCacheRead) { + if (!CIFS_CACHE_WRITE(CIFS_I(inode)) && + CIFS_CACHE_READ(CIFS_I(inode))) { cifs_invalidate_mapping(inode); cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n", inode); - CIFS_I(inode)->clientCanCacheRead = false; + CIFS_I(inode)->oplock = 0; } rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, @@ -2213,7 +2216,7 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n", file->f_path.dentry->d_name.name, datasync); - if (!CIFS_I(inode)->clientCanCacheRead) { + if (!CIFS_CACHE_READ(CIFS_I(inode))) { rc = cifs_invalidate_mapping(inode); if (rc) { cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc); @@ -2577,7 +2580,7 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); ssize_t written; - if (cinode->clientCanCacheAll) { + if (CIFS_CACHE_WRITE(cinode)) { if (cap_unix(tcon->ses) && (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) @@ -2591,7 +2594,7 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, * these pages but not on the region from pos to ppos+len-1. */ written = cifs_user_writev(iocb, iov, nr_segs, pos); - if (written > 0 && cinode->clientCanCacheRead) { + if (written > 0 && CIFS_CACHE_READ(cinode)) { /* * Windows 7 server can delay breaking level2 oplock if a write * request comes - break it on the client to prevent reading @@ -2600,7 +2603,7 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, cifs_invalidate_mapping(inode); cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n", inode); - cinode->clientCanCacheRead = false; + cinode->oplock = 0; } return written; } @@ -2957,7 +2960,7 @@ cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, * on pages affected by this read but not on the region from pos to * pos+len-1. */ - if (!cinode->clientCanCacheRead) + if (!CIFS_CACHE_READ(cinode)) return cifs_user_readv(iocb, iov, nr_segs, pos); if (cap_unix(tcon->ses) && @@ -3093,7 +3096,7 @@ int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) xid = get_xid(); - if (!CIFS_I(inode)->clientCanCacheRead) { + if (!CIFS_CACHE_READ(CIFS_I(inode))) { rc = cifs_invalidate_mapping(inode); if (rc) return rc; @@ -3376,6 +3379,9 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, return rc; } +/* + * cifs_readpage_worker must be called with the page pinned + */ static int cifs_readpage_worker(struct file *file, struct page *page, loff_t *poffset) { @@ -3387,7 +3393,6 @@ static int cifs_readpage_worker(struct file *file, struct page *page, if (rc == 0) goto read_complete; - page_cache_get(page); read_data = kmap(page); /* for reads over a certain size could initiate async read ahead */ @@ -3414,7 +3419,7 @@ static int cifs_readpage_worker(struct file *file, struct page *page, io_error: kunmap(page); - page_cache_release(page); + unlock_page(page); read_complete: return rc; @@ -3439,8 +3444,6 @@ static int cifs_readpage(struct file *file, struct page *page) rc = cifs_readpage_worker(file, page, &offset); - unlock_page(page); - free_xid(xid); return rc; } @@ -3494,6 +3497,7 @@ static int cifs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { + int oncethru = 0; pgoff_t index = pos >> PAGE_CACHE_SHIFT; loff_t offset = pos & (PAGE_CACHE_SIZE - 1); loff_t page_start = pos & PAGE_MASK; @@ -3503,6 +3507,7 @@ static int cifs_write_begin(struct file *file, struct address_space *mapping, cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len); +start: page = grab_cache_page_write_begin(mapping, index, flags); if (!page) { rc = -ENOMEM; @@ -3526,7 +3531,7 @@ static int cifs_write_begin(struct file *file, struct address_space *mapping, * is, when the page lies beyond the EOF, or straddles the EOF * and the write will cover all of the existing data. */ - if (CIFS_I(mapping->host)->clientCanCacheRead) { + if (CIFS_CACHE_READ(CIFS_I(mapping->host))) { i_size = i_size_read(mapping->host); if (page_start >= i_size || (offset == 0 && (pos + len) >= i_size)) { @@ -3544,13 +3549,16 @@ static int cifs_write_begin(struct file *file, struct address_space *mapping, } } - if ((file->f_flags & O_ACCMODE) != O_WRONLY) { + if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) { /* * might as well read a page, it is fast enough. If we get * an error, we don't need to return it. cifs_write_end will * do a sync write instead since PG_uptodate isn't set. */ cifs_readpage_worker(file, page, &page_start); + page_cache_release(page); + oncethru = 1; + goto start; } else { /* we could try using another file handle if there is one - but how would we lock it to prevent close of that handle @@ -3609,20 +3617,20 @@ void cifs_oplock_break(struct work_struct *work) struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); int rc = 0; - if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead && + if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) && cifs_has_mand_locks(cinode)) { cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n", inode); - cinode->clientCanCacheRead = false; + cinode->oplock = 0; } if (inode && S_ISREG(inode->i_mode)) { - if (cinode->clientCanCacheRead) + if (CIFS_CACHE_READ(cinode)) break_lease(inode, O_RDONLY); else break_lease(inode, O_WRONLY); rc = filemap_fdatawrite(inode->i_mapping); - if (cinode->clientCanCacheRead == 0) { + if (!CIFS_CACHE_READ(cinode)) { rc = filemap_fdatawait(inode->i_mapping); mapping_set_error(inode->i_mapping, rc); cifs_invalidate_mapping(inode); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 449b6cf09b09..f9ff9c173f78 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -101,7 +101,7 @@ cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr) } /* don't bother with revalidation if we have an oplock */ - if (cifs_i->clientCanCacheRead) { + if (CIFS_CACHE_READ(cifs_i)) { cifs_dbg(FYI, "%s: inode %llu is oplocked\n", __func__, cifs_i->uniqueid); return; @@ -549,6 +549,10 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, * when Unix extensions are disabled - fake it. */ fattr->cf_nlink = 2; + } else if (fattr->cf_cifsattrs & ATTR_REPARSE) { + fattr->cf_mode = S_IFLNK; + fattr->cf_dtype = DT_LNK; + fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); } else { fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode; fattr->cf_dtype = DT_REG; @@ -646,7 +650,7 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, cifs_dbg(FYI, "Getting info on %s\n", full_path); if ((data == NULL) && (*inode != NULL)) { - if (CIFS_I(*inode)->clientCanCacheRead) { + if (CIFS_CACHE_READ(CIFS_I(*inode))) { cifs_dbg(FYI, "No need to revalidate cached inode sizes\n"); goto cgii_exit; } @@ -1657,7 +1661,7 @@ cifs_inode_needs_reval(struct inode *inode) struct cifsInodeInfo *cifs_i = CIFS_I(inode); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); - if (cifs_i->clientCanCacheRead) + if (CIFS_CACHE_READ(cifs_i)) return false; if (!lookupCacheEnabled) @@ -1800,7 +1804,7 @@ int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry, * We need to be sure that all dirty pages are written and the server * has actual ctime, mtime and file length. */ - if (!CIFS_I(inode)->clientCanCacheRead && inode->i_mapping && + if (!CIFS_CACHE_READ(CIFS_I(inode)) && inode->i_mapping && inode->i_mapping->nrpages != 0) { rc = filemap_fdatawait(inode->i_mapping); if (rc) { @@ -1852,14 +1856,11 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from) static void cifs_setsize(struct inode *inode, loff_t offset) { - loff_t oldsize; - spin_lock(&inode->i_lock); - oldsize = inode->i_size; i_size_write(inode, offset); spin_unlock(&inode->i_lock); - truncate_pagecache(inode, oldsize, offset); + truncate_pagecache(inode, offset); } static int diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 562044f700e5..7e36ceba0c7a 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -509,6 +509,7 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd) struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct tcon_link *tlink = NULL; struct cifs_tcon *tcon; + struct TCP_Server_Info *server; xid = get_xid(); @@ -519,25 +520,7 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd) goto out; } tcon = tlink_tcon(tlink); - - /* - * For now, we just handle symlinks with unix extensions enabled. - * Eventually we should handle NTFS reparse points, and MacOS - * symlink support. For instance... - * - * rc = CIFSSMBQueryReparseLinkInfo(...) - * - * For now, just return -EACCES when the server doesn't support posix - * extensions. Note that we still allow querying symlinks when posix - * extensions are manually disabled. We could disable these as well - * but there doesn't seem to be any harm in allowing the client to - * read them. - */ - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) && - !cap_unix(tcon->ses)) { - rc = -EACCES; - goto out; - } + server = tcon->ses->server; full_path = build_path_from_dentry(direntry); if (!full_path) @@ -559,6 +542,9 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd) if ((rc != 0) && cap_unix(tcon->ses)) rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, &target_path, cifs_sb->local_nls); + else if (rc != 0 && server->ops->query_symlink) + rc = server->ops->query_symlink(xid, tcon, full_path, + &target_path, cifs_sb); kfree(full_path); out: diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index f7d4b2285efe..138a011633fe 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -105,6 +105,7 @@ sesInfoFree(struct cifs_ses *buf_to_free) } kfree(buf_to_free->user_name); kfree(buf_to_free->domainName); + kfree(buf_to_free->auth_key.response); kfree(buf_to_free); } @@ -545,19 +546,15 @@ void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock) oplock &= 0xF; if (oplock == OPLOCK_EXCLUSIVE) { - cinode->clientCanCacheAll = true; - cinode->clientCanCacheRead = true; + cinode->oplock = CIFS_CACHE_WRITE_FLG | CIFS_CACHE_READ_FLG; cifs_dbg(FYI, "Exclusive Oplock granted on inode %p\n", &cinode->vfs_inode); } else if (oplock == OPLOCK_READ) { - cinode->clientCanCacheAll = false; - cinode->clientCanCacheRead = true; + cinode->oplock = CIFS_CACHE_READ_FLG; cifs_dbg(FYI, "Level II Oplock granted on inode %p\n", &cinode->vfs_inode); - } else { - cinode->clientCanCacheAll = false; - cinode->clientCanCacheRead = false; - } + } else + cinode->oplock = 0; } bool diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 69d2c826a23b..42ef03be089f 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -172,6 +172,9 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) if (cifs_dfs_is_possible(cifs_sb) && (fattr->cf_cifsattrs & ATTR_REPARSE)) fattr->cf_flags |= CIFS_FATTR_NEED_REVAL; + } else if (fattr->cf_cifsattrs & ATTR_REPARSE) { + fattr->cf_mode = S_IFLNK; + fattr->cf_dtype = DT_LNK; } else { fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode; fattr->cf_dtype = DT_REG; diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 08dd37bb23aa..5f99b7f19e78 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -226,7 +226,7 @@ static void unicode_ssetup_strings(char **pbcc_area, struct cifs_ses *ses, *(bcc_ptr+1) = 0; } else { bytes_ret = cifs_strtoUTF16((__le16 *) bcc_ptr, ses->user_name, - MAX_USERNAME_SIZE, nls_cp); + CIFS_MAX_USERNAME_LEN, nls_cp); } bcc_ptr += 2 * bytes_ret; bcc_ptr += 2; /* account for null termination */ @@ -246,8 +246,8 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses, /* BB what about null user mounts - check that we do this BB */ /* copy user */ if (ses->user_name != NULL) { - strncpy(bcc_ptr, ses->user_name, MAX_USERNAME_SIZE); - bcc_ptr += strnlen(ses->user_name, MAX_USERNAME_SIZE); + strncpy(bcc_ptr, ses->user_name, CIFS_MAX_USERNAME_LEN); + bcc_ptr += strnlen(ses->user_name, CIFS_MAX_USERNAME_LEN); } /* else null user mount */ *bcc_ptr = 0; @@ -428,7 +428,8 @@ void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC; if (ses->server->sign) { flags |= NTLMSSP_NEGOTIATE_SIGN; - if (!ses->server->session_estab) + if (!ses->server->session_estab || + ses->ntlmssp->sesskey_per_smbsess) flags |= NTLMSSP_NEGOTIATE_KEY_XCH; } @@ -466,7 +467,8 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC; if (ses->server->sign) { flags |= NTLMSSP_NEGOTIATE_SIGN; - if (!ses->server->session_estab) + if (!ses->server->session_estab || + ses->ntlmssp->sesskey_per_smbsess) flags |= NTLMSSP_NEGOTIATE_KEY_XCH; } @@ -501,7 +503,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, } else { int len; len = cifs_strtoUTF16((__le16 *)tmp, ses->domainName, - MAX_USERNAME_SIZE, nls_cp); + CIFS_MAX_USERNAME_LEN, nls_cp); len *= 2; /* unicode is 2 bytes each */ sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); sec_blob->DomainName.Length = cpu_to_le16(len); @@ -517,7 +519,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, } else { int len; len = cifs_strtoUTF16((__le16 *)tmp, ses->user_name, - MAX_USERNAME_SIZE, nls_cp); + CIFS_MAX_USERNAME_LEN, nls_cp); len *= 2; /* unicode is 2 bytes each */ sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); sec_blob->UserName.Length = cpu_to_le16(len); @@ -629,7 +631,8 @@ CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, type = select_sectype(ses->server, ses->sectype); cifs_dbg(FYI, "sess setup type %d\n", type); if (type == Unspecified) { - cifs_dbg(VFS, "Unable to select appropriate authentication method!"); + cifs_dbg(VFS, + "Unable to select appropriate authentication method!"); return -EINVAL; } @@ -640,6 +643,8 @@ CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL); if (!ses->ntlmssp) return -ENOMEM; + ses->ntlmssp->sesskey_per_smbsess = false; + } ssetup_ntlmssp_authenticate: @@ -815,8 +820,9 @@ ssetup_ntlmssp_authenticate: ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len, GFP_KERNEL); if (!ses->auth_key.response) { - cifs_dbg(VFS, "Kerberos can't allocate (%u bytes) memory", - msg->sesskey_len); + cifs_dbg(VFS, + "Kerberos can't allocate (%u bytes) memory", + msg->sesskey_len); rc = -ENOMEM; goto ssetup_exit; } @@ -1005,5 +1011,37 @@ ssetup_exit: if ((phase == NtLmChallenge) && (rc == 0)) goto ssetup_ntlmssp_authenticate; + if (!rc) { + mutex_lock(&ses->server->srv_mutex); + if (!ses->server->session_estab) { + if (ses->server->sign) { + ses->server->session_key.response = + kmemdup(ses->auth_key.response, + ses->auth_key.len, GFP_KERNEL); + if (!ses->server->session_key.response) { + rc = -ENOMEM; + mutex_unlock(&ses->server->srv_mutex); + goto keycp_exit; + } + ses->server->session_key.len = + ses->auth_key.len; + } + ses->server->sequence_number = 0x2; + ses->server->session_estab = true; + } + mutex_unlock(&ses->server->srv_mutex); + + cifs_dbg(FYI, "CIFS session established successfully\n"); + spin_lock(&GlobalMid_Lock); + ses->status = CifsGood; + ses->need_reconnect = false; + spin_unlock(&GlobalMid_Lock); + } + +keycp_exit: + kfree(ses->auth_key.response); + ses->auth_key.response = NULL; + kfree(ses->ntlmssp); + return rc; } diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 60943978aec3..8233b174de3d 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -700,7 +700,7 @@ cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); cfile->fid.netfid = fid->netfid; cifs_set_oplock_level(cinode, oplock); - cinode->can_cache_brlcks = cinode->clientCanCacheAll; + cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode); } static void @@ -837,7 +837,7 @@ cifs_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid, { return CIFSSMBLock(0, tcon, fid->netfid, current->tgid, 0, 0, 0, 0, LOCKING_ANDX_OPLOCK_RELEASE, false, - cinode->clientCanCacheRead ? 1 : 0); + CIFS_CACHE_READ(cinode) ? 1 : 0); } static int @@ -881,6 +881,43 @@ cifs_mand_lock(const unsigned int xid, struct cifsFileInfo *cfile, __u64 offset, (__u8)type, wait, 0); } +static int +cifs_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, + const char *full_path, char **target_path, + struct cifs_sb_info *cifs_sb) +{ + int rc; + int oplock = 0; + __u16 netfid; + + cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path); + + rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN, + FILE_READ_ATTRIBUTES, OPEN_REPARSE_POINT, &netfid, + &oplock, NULL, cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + if (rc) + return rc; + + rc = CIFSSMBQuerySymLink(xid, tcon, netfid, target_path, + cifs_sb->local_nls); + if (rc) { + CIFSSMBClose(xid, tcon, netfid); + return rc; + } + + convert_delimiter(*target_path, '/'); + CIFSSMBClose(xid, tcon, netfid); + cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path); + return rc; +} + +static bool +cifs_is_read_op(__u32 oplock) +{ + return oplock == OPLOCK_READ; +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -927,6 +964,7 @@ struct smb_version_operations smb1_operations = { .rename_pending_delete = cifs_rename_pending_delete, .rename = CIFSSMBRename, .create_hardlink = CIFSCreateHardLink, + .query_symlink = cifs_query_symlink, .open = cifs_open_file, .set_fid = cifs_set_fid, .close = cifs_close_file, @@ -945,6 +983,7 @@ struct smb_version_operations smb1_operations = { .mand_unlock_range = cifs_unlock_range, .push_mand_locks = cifs_push_mandatory_locks, .query_mf_symlink = open_query_close_cifs_symlink, + .is_read_op = cifs_is_read_op, }; struct smb_version_values smb1_values = { @@ -960,7 +999,6 @@ struct smb_version_values smb1_values = { .cap_unix = CAP_UNIX, .cap_nt_find = CAP_NT_SMBS | CAP_NT_FIND, .cap_large_files = CAP_LARGE_FILES, - .oplock_read = OPLOCK_READ, .signing_enabled = SECMODE_SIGN_ENABLED, .signing_required = SECMODE_SIGN_REQUIRED, }; diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index 04a81a4142c3..3f17b4550831 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -34,29 +34,6 @@ #include "fscache.h" #include "smb2proto.h" -void -smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock) -{ - oplock &= 0xFF; - if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE) - return; - if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE || - oplock == SMB2_OPLOCK_LEVEL_BATCH) { - cinode->clientCanCacheAll = true; - cinode->clientCanCacheRead = true; - cifs_dbg(FYI, "Exclusive Oplock granted on inode %p\n", - &cinode->vfs_inode); - } else if (oplock == SMB2_OPLOCK_LEVEL_II) { - cinode->clientCanCacheAll = false; - cinode->clientCanCacheRead = true; - cifs_dbg(FYI, "Level II Oplock granted on inode %p\n", - &cinode->vfs_inode); - } else { - cinode->clientCanCacheAll = false; - cinode->clientCanCacheRead = false; - } -} - int smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, __u32 *oplock, FILE_ALL_INFO *buf) @@ -86,7 +63,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, if (oparms->tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) memcpy(smb2_oplock + 1, fid->lease_key, SMB2_LEASE_KEY_SIZE); - rc = SMB2_open(xid, oparms, smb2_path, smb2_oplock, smb2_data); + rc = SMB2_open(xid, oparms, smb2_path, smb2_oplock, smb2_data, NULL); if (rc) goto out; diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index c6ec1633309a..78ff88c467b9 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -60,7 +60,7 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, oparms.fid = &fid; oparms.reconnect = false; - rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL); + rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL); if (rc) { kfree(utf16_path); return rc; @@ -136,7 +136,8 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, return -ENOMEM; rc = smb2_open_op_close(xid, tcon, cifs_sb, full_path, - FILE_READ_ATTRIBUTES, FILE_OPEN, 0, smb2_data, + FILE_READ_ATTRIBUTES, FILE_OPEN, + OPEN_REPARSE_POINT, smb2_data, SMB2_OP_QUERY_INFO); if (rc) goto out; @@ -191,8 +192,8 @@ smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb) { return smb2_open_op_close(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN, - CREATE_DELETE_ON_CLOSE, NULL, - SMB2_OP_DELETE); + CREATE_DELETE_ON_CLOSE | OPEN_REPARSE_POINT, + NULL, SMB2_OP_DELETE); } static int diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index b0c43345cd98..fb3966265b6e 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -171,6 +171,10 @@ smb2_check_message(char *buf, unsigned int length) if (4 + len != clc_len) { cifs_dbg(FYI, "Calculated size %u length %u mismatch mid %llu\n", clc_len, 4 + len, mid); + /* create failed on symlink */ + if (command == SMB2_CREATE_HE && + hdr->Status == STATUS_STOPPED_ON_SYMLINK) + return 0; /* Windows 7 server returns 24 bytes more */ if (clc_len + 20 == len && command == SMB2_OPLOCK_BREAK_HE) return 0; @@ -376,23 +380,15 @@ cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb) __le32 smb2_get_lease_state(struct cifsInodeInfo *cinode) { - if (cinode->clientCanCacheAll) - return SMB2_LEASE_WRITE_CACHING | SMB2_LEASE_READ_CACHING; - else if (cinode->clientCanCacheRead) - return SMB2_LEASE_READ_CACHING; - return 0; -} - -__u8 smb2_map_lease_to_oplock(__le32 lease_state) -{ - if (lease_state & SMB2_LEASE_WRITE_CACHING) { - if (lease_state & SMB2_LEASE_HANDLE_CACHING) - return SMB2_OPLOCK_LEVEL_BATCH; - else - return SMB2_OPLOCK_LEVEL_EXCLUSIVE; - } else if (lease_state & SMB2_LEASE_READ_CACHING) - return SMB2_OPLOCK_LEVEL_II; - return 0; + __le32 lease = 0; + + if (CIFS_CACHE_WRITE(cinode)) + lease |= SMB2_LEASE_WRITE_CACHING; + if (CIFS_CACHE_HANDLE(cinode)) + lease |= SMB2_LEASE_HANDLE_CACHING; + if (CIFS_CACHE_READ(cinode)) + lease |= SMB2_LEASE_READ_CACHING; + return lease; } struct smb2_lease_break_work { @@ -417,96 +413,109 @@ cifs_ses_oplock_break(struct work_struct *work) } static bool -smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) +smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp, + struct smb2_lease_break_work *lw) { - struct smb2_lease_break *rsp = (struct smb2_lease_break *)buffer; - struct list_head *tmp, *tmp1, *tmp2; - struct cifs_ses *ses; - struct cifs_tcon *tcon; - struct cifsInodeInfo *cinode; + bool found; + __u8 lease_state; + struct list_head *tmp; struct cifsFileInfo *cfile; + struct TCP_Server_Info *server = tcon->ses->server; struct cifs_pending_open *open; - struct smb2_lease_break_work *lw; - bool found; + struct cifsInodeInfo *cinode; int ack_req = le32_to_cpu(rsp->Flags & SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED); - lw = kmalloc(sizeof(struct smb2_lease_break_work), GFP_KERNEL); - if (!lw) - return false; + lease_state = le32_to_cpu(rsp->NewLeaseState); - INIT_WORK(&lw->lease_break, cifs_ses_oplock_break); - lw->lease_state = rsp->NewLeaseState; + list_for_each(tmp, &tcon->openFileList) { + cfile = list_entry(tmp, struct cifsFileInfo, tlist); + cinode = CIFS_I(cfile->dentry->d_inode); - cifs_dbg(FYI, "Checking for lease break\n"); + if (memcmp(cinode->lease_key, rsp->LeaseKey, + SMB2_LEASE_KEY_SIZE)) + continue; - /* look up tcon based on tid & uid */ - spin_lock(&cifs_tcp_ses_lock); - list_for_each(tmp, &server->smb_ses_list) { - ses = list_entry(tmp, struct cifs_ses, smb_ses_list); + cifs_dbg(FYI, "found in the open list\n"); + cifs_dbg(FYI, "lease key match, lease break 0x%d\n", + le32_to_cpu(rsp->NewLeaseState)); - spin_lock(&cifs_file_list_lock); - list_for_each(tmp1, &ses->tcon_list) { - tcon = list_entry(tmp1, struct cifs_tcon, tcon_list); + server->ops->set_oplock_level(cinode, lease_state, 0, NULL); - cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks); - list_for_each(tmp2, &tcon->openFileList) { - cfile = list_entry(tmp2, struct cifsFileInfo, - tlist); - cinode = CIFS_I(cfile->dentry->d_inode); + if (ack_req) + cfile->oplock_break_cancelled = false; + else + cfile->oplock_break_cancelled = true; - if (memcmp(cinode->lease_key, rsp->LeaseKey, - SMB2_LEASE_KEY_SIZE)) - continue; + queue_work(cifsiod_wq, &cfile->oplock_break); + kfree(lw); + return true; + } - cifs_dbg(FYI, "found in the open list\n"); - cifs_dbg(FYI, "lease key match, lease break 0x%d\n", - le32_to_cpu(rsp->NewLeaseState)); + found = false; + list_for_each_entry(open, &tcon->pending_opens, olist) { + if (memcmp(open->lease_key, rsp->LeaseKey, + SMB2_LEASE_KEY_SIZE)) + continue; + + if (!found && ack_req) { + found = true; + memcpy(lw->lease_key, open->lease_key, + SMB2_LEASE_KEY_SIZE); + lw->tlink = cifs_get_tlink(open->tlink); + queue_work(cifsiod_wq, &lw->lease_break); + } - smb2_set_oplock_level(cinode, - smb2_map_lease_to_oplock(rsp->NewLeaseState)); + cifs_dbg(FYI, "found in the pending open list\n"); + cifs_dbg(FYI, "lease key match, lease break 0x%d\n", + le32_to_cpu(rsp->NewLeaseState)); - if (ack_req) - cfile->oplock_break_cancelled = false; - else - cfile->oplock_break_cancelled = true; + open->oplock = lease_state; + } + return found; +} - queue_work(cifsiod_wq, &cfile->oplock_break); +static bool +smb2_is_valid_lease_break(char *buffer) +{ + struct smb2_lease_break *rsp = (struct smb2_lease_break *)buffer; + struct list_head *tmp, *tmp1, *tmp2; + struct TCP_Server_Info *server; + struct cifs_ses *ses; + struct cifs_tcon *tcon; + struct smb2_lease_break_work *lw; - spin_unlock(&cifs_file_list_lock); - spin_unlock(&cifs_tcp_ses_lock); - return true; - } + lw = kmalloc(sizeof(struct smb2_lease_break_work), GFP_KERNEL); + if (!lw) + return false; - found = false; - list_for_each_entry(open, &tcon->pending_opens, olist) { - if (memcmp(open->lease_key, rsp->LeaseKey, - SMB2_LEASE_KEY_SIZE)) - continue; + INIT_WORK(&lw->lease_break, cifs_ses_oplock_break); + lw->lease_state = rsp->NewLeaseState; - if (!found && ack_req) { - found = true; - memcpy(lw->lease_key, open->lease_key, - SMB2_LEASE_KEY_SIZE); - lw->tlink = cifs_get_tlink(open->tlink); - queue_work(cifsiod_wq, - &lw->lease_break); - } + cifs_dbg(FYI, "Checking for lease break\n"); - cifs_dbg(FYI, "found in the pending open list\n"); - cifs_dbg(FYI, "lease key match, lease break 0x%d\n", - le32_to_cpu(rsp->NewLeaseState)); + /* look up tcon based on tid & uid */ + spin_lock(&cifs_tcp_ses_lock); + list_for_each(tmp, &cifs_tcp_ses_list) { + server = list_entry(tmp, struct TCP_Server_Info, tcp_ses_list); - open->oplock = - smb2_map_lease_to_oplock(rsp->NewLeaseState); - } - if (found) { - spin_unlock(&cifs_file_list_lock); - spin_unlock(&cifs_tcp_ses_lock); - return true; + list_for_each(tmp1, &server->smb_ses_list) { + ses = list_entry(tmp1, struct cifs_ses, smb_ses_list); + + spin_lock(&cifs_file_list_lock); + list_for_each(tmp2, &ses->tcon_list) { + tcon = list_entry(tmp2, struct cifs_tcon, + tcon_list); + cifs_stats_inc( + &tcon->stats.cifs_stats.num_oplock_brks); + if (smb2_tcon_has_lease(tcon, rsp, lw)) { + spin_unlock(&cifs_file_list_lock); + spin_unlock(&cifs_tcp_ses_lock); + return true; + } } + spin_unlock(&cifs_file_list_lock); } - spin_unlock(&cifs_file_list_lock); } spin_unlock(&cifs_tcp_ses_lock); kfree(lw); @@ -532,7 +541,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) if (rsp->StructureSize != smb2_rsp_struct_sizes[SMB2_OPLOCK_BREAK_HE]) { if (le16_to_cpu(rsp->StructureSize) == 44) - return smb2_is_valid_lease_break(buffer, server); + return smb2_is_valid_lease_break(buffer); else return false; } @@ -560,14 +569,15 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) cifs_dbg(FYI, "file id match, oplock break\n"); cinode = CIFS_I(cfile->dentry->d_inode); - if (!cinode->clientCanCacheAll && + if (!CIFS_CACHE_WRITE(cinode) && rsp->OplockLevel == SMB2_OPLOCK_LEVEL_NONE) cfile->oplock_break_cancelled = true; else cfile->oplock_break_cancelled = false; - smb2_set_oplock_level(cinode, - rsp->OplockLevel ? SMB2_OPLOCK_LEVEL_II : 0); + server->ops->set_oplock_level(cinode, + rsp->OplockLevel ? SMB2_OPLOCK_LEVEL_II : 0, + 0, NULL); queue_work(cifsiod_wq, &cfile->oplock_break); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index f259e6cc8357..861b33214144 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -24,6 +24,7 @@ #include "smb2proto.h" #include "cifsproto.h" #include "cifs_debug.h" +#include "cifs_unicode.h" #include "smb2status.h" #include "smb2glob.h" @@ -229,7 +230,7 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, oparms.fid = &fid; oparms.reconnect = false; - rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL); + rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL); if (rc) { kfree(utf16_path); return rc; @@ -376,10 +377,13 @@ static void smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) { struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); + struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; + cfile->fid.persistent_fid = fid->persistent_fid; cfile->fid.volatile_fid = fid->volatile_fid; - smb2_set_oplock_level(cinode, oplock); - cinode->can_cache_brlcks = cinode->clientCanCacheAll; + server->ops->set_oplock_level(cinode, oplock, fid->epoch, + &fid->purge_cache); + cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode); } static void @@ -463,7 +467,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, oparms.fid = fid; oparms.reconnect = false; - rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL); + rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL); kfree(utf16_path); if (rc) { cifs_dbg(VFS, "open dir failed\n"); @@ -530,7 +534,7 @@ smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid, return SMB2_oplock_break(0, tcon, fid->persistent_fid, fid->volatile_fid, - cinode->clientCanCacheRead ? 1 : 0); + CIFS_CACHE_READ(cinode) ? 1 : 0); } static int @@ -550,7 +554,7 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon, oparms.fid = &fid; oparms.reconnect = false; - rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL); + rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL, NULL); if (rc) return rc; buf->f_type = SMB2_MAGIC_NUMBER; @@ -596,7 +600,245 @@ smb2_new_lease_key(struct cifs_fid *fid) get_random_bytes(fid->lease_key, SMB2_LEASE_KEY_SIZE); } -struct smb_version_operations smb21_operations = { +static int +smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, + const char *full_path, char **target_path, + struct cifs_sb_info *cifs_sb) +{ + int rc; + __le16 *utf16_path; + __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; + struct cifs_open_parms oparms; + struct cifs_fid fid; + struct smb2_err_rsp *err_buf = NULL; + struct smb2_symlink_err_rsp *symlink; + unsigned int sub_len, sub_offset; + + cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path); + + utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb); + if (!utf16_path) + return -ENOMEM; + + oparms.tcon = tcon; + oparms.desired_access = FILE_READ_ATTRIBUTES; + oparms.disposition = FILE_OPEN; + oparms.create_options = 0; + oparms.fid = &fid; + oparms.reconnect = false; + + rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, &err_buf); + + if (!rc || !err_buf) { + kfree(utf16_path); + return -ENOENT; + } + /* open must fail on symlink - reset rc */ + rc = 0; + symlink = (struct smb2_symlink_err_rsp *)err_buf->ErrorData; + sub_len = le16_to_cpu(symlink->SubstituteNameLength); + sub_offset = le16_to_cpu(symlink->SubstituteNameOffset); + *target_path = cifs_strndup_from_utf16( + (char *)symlink->PathBuffer + sub_offset, + sub_len, true, cifs_sb->local_nls); + if (!(*target_path)) { + kfree(utf16_path); + return -ENOMEM; + } + convert_delimiter(*target_path, '/'); + cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path); + kfree(utf16_path); + return rc; +} + +static void +smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, + unsigned int epoch, bool *purge_cache) +{ + oplock &= 0xFF; + if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE) + return; + if (oplock == SMB2_OPLOCK_LEVEL_BATCH) { + cinode->oplock = CIFS_CACHE_RHW_FLG; + cifs_dbg(FYI, "Batch Oplock granted on inode %p\n", + &cinode->vfs_inode); + } else if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE) { + cinode->oplock = CIFS_CACHE_RW_FLG; + cifs_dbg(FYI, "Exclusive Oplock granted on inode %p\n", + &cinode->vfs_inode); + } else if (oplock == SMB2_OPLOCK_LEVEL_II) { + cinode->oplock = CIFS_CACHE_READ_FLG; + cifs_dbg(FYI, "Level II Oplock granted on inode %p\n", + &cinode->vfs_inode); + } else + cinode->oplock = 0; +} + +static void +smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, + unsigned int epoch, bool *purge_cache) +{ + char message[5] = {0}; + + oplock &= 0xFF; + if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE) + return; + + cinode->oplock = 0; + if (oplock & SMB2_LEASE_READ_CACHING_HE) { + cinode->oplock |= CIFS_CACHE_READ_FLG; + strcat(message, "R"); + } + if (oplock & SMB2_LEASE_HANDLE_CACHING_HE) { + cinode->oplock |= CIFS_CACHE_HANDLE_FLG; + strcat(message, "H"); + } + if (oplock & SMB2_LEASE_WRITE_CACHING_HE) { + cinode->oplock |= CIFS_CACHE_WRITE_FLG; + strcat(message, "W"); + } + if (!cinode->oplock) + strcat(message, "None"); + cifs_dbg(FYI, "%s Lease granted on inode %p\n", message, + &cinode->vfs_inode); +} + +static void +smb3_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, + unsigned int epoch, bool *purge_cache) +{ + unsigned int old_oplock = cinode->oplock; + + smb21_set_oplock_level(cinode, oplock, epoch, purge_cache); + + if (purge_cache) { + *purge_cache = false; + if (old_oplock == CIFS_CACHE_READ_FLG) { + if (cinode->oplock == CIFS_CACHE_READ_FLG && + (epoch - cinode->epoch > 0)) + *purge_cache = true; + else if (cinode->oplock == CIFS_CACHE_RH_FLG && + (epoch - cinode->epoch > 1)) + *purge_cache = true; + else if (cinode->oplock == CIFS_CACHE_RHW_FLG && + (epoch - cinode->epoch > 1)) + *purge_cache = true; + else if (cinode->oplock == 0 && + (epoch - cinode->epoch > 0)) + *purge_cache = true; + } else if (old_oplock == CIFS_CACHE_RH_FLG) { + if (cinode->oplock == CIFS_CACHE_RH_FLG && + (epoch - cinode->epoch > 0)) + *purge_cache = true; + else if (cinode->oplock == CIFS_CACHE_RHW_FLG && + (epoch - cinode->epoch > 1)) + *purge_cache = true; + } + cinode->epoch = epoch; + } +} + +static bool +smb2_is_read_op(__u32 oplock) +{ + return oplock == SMB2_OPLOCK_LEVEL_II; +} + +static bool +smb21_is_read_op(__u32 oplock) +{ + return (oplock & SMB2_LEASE_READ_CACHING_HE) && + !(oplock & SMB2_LEASE_WRITE_CACHING_HE); +} + +static __le32 +map_oplock_to_lease(u8 oplock) +{ + if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE) + return SMB2_LEASE_WRITE_CACHING | SMB2_LEASE_READ_CACHING; + else if (oplock == SMB2_OPLOCK_LEVEL_II) + return SMB2_LEASE_READ_CACHING; + else if (oplock == SMB2_OPLOCK_LEVEL_BATCH) + return SMB2_LEASE_HANDLE_CACHING | SMB2_LEASE_READ_CACHING | + SMB2_LEASE_WRITE_CACHING; + return 0; +} + +static char * +smb2_create_lease_buf(u8 *lease_key, u8 oplock) +{ + struct create_lease *buf; + + buf = kzalloc(sizeof(struct create_lease), GFP_KERNEL); + if (!buf) + return NULL; + + buf->lcontext.LeaseKeyLow = cpu_to_le64(*((u64 *)lease_key)); + buf->lcontext.LeaseKeyHigh = cpu_to_le64(*((u64 *)(lease_key + 8))); + buf->lcontext.LeaseState = map_oplock_to_lease(oplock); + + buf->ccontext.DataOffset = cpu_to_le16(offsetof + (struct create_lease, lcontext)); + buf->ccontext.DataLength = cpu_to_le32(sizeof(struct lease_context)); + buf->ccontext.NameOffset = cpu_to_le16(offsetof + (struct create_lease, Name)); + buf->ccontext.NameLength = cpu_to_le16(4); + buf->Name[0] = 'R'; + buf->Name[1] = 'q'; + buf->Name[2] = 'L'; + buf->Name[3] = 's'; + return (char *)buf; +} + +static char * +smb3_create_lease_buf(u8 *lease_key, u8 oplock) +{ + struct create_lease_v2 *buf; + + buf = kzalloc(sizeof(struct create_lease_v2), GFP_KERNEL); + if (!buf) + return NULL; + + buf->lcontext.LeaseKeyLow = cpu_to_le64(*((u64 *)lease_key)); + buf->lcontext.LeaseKeyHigh = cpu_to_le64(*((u64 *)(lease_key + 8))); + buf->lcontext.LeaseState = map_oplock_to_lease(oplock); + + buf->ccontext.DataOffset = cpu_to_le16(offsetof + (struct create_lease_v2, lcontext)); + buf->ccontext.DataLength = cpu_to_le32(sizeof(struct lease_context_v2)); + buf->ccontext.NameOffset = cpu_to_le16(offsetof + (struct create_lease_v2, Name)); + buf->ccontext.NameLength = cpu_to_le16(4); + buf->Name[0] = 'R'; + buf->Name[1] = 'q'; + buf->Name[2] = 'L'; + buf->Name[3] = 's'; + return (char *)buf; +} + +static __u8 +smb2_parse_lease_buf(void *buf, unsigned int *epoch) +{ + struct create_lease *lc = (struct create_lease *)buf; + + *epoch = 0; /* not used */ + if (lc->lcontext.LeaseFlags & SMB2_LEASE_FLAG_BREAK_IN_PROGRESS) + return SMB2_OPLOCK_LEVEL_NOCHANGE; + return le32_to_cpu(lc->lcontext.LeaseState); +} + +static __u8 +smb3_parse_lease_buf(void *buf, unsigned int *epoch) +{ + struct create_lease_v2 *lc = (struct create_lease_v2 *)buf; + + *epoch = le16_to_cpu(lc->lcontext.Epoch); + if (lc->lcontext.LeaseFlags & SMB2_LEASE_FLAG_BREAK_IN_PROGRESS) + return SMB2_OPLOCK_LEVEL_NOCHANGE; + return le32_to_cpu(lc->lcontext.LeaseState); +} + +struct smb_version_operations smb20_operations = { .compare_fids = smb2_compare_fids, .setup_request = smb2_setup_request, .setup_async_request = smb2_setup_async_request, @@ -638,6 +880,7 @@ struct smb_version_operations smb21_operations = { .unlink = smb2_unlink, .rename = smb2_rename_path, .create_hardlink = smb2_create_hardlink, + .query_symlink = smb2_query_symlink, .open = smb2_open_file, .set_fid = smb2_set_fid, .close = smb2_close_file, @@ -660,8 +903,82 @@ struct smb_version_operations smb21_operations = { .set_lease_key = smb2_set_lease_key, .new_lease_key = smb2_new_lease_key, .calc_signature = smb2_calc_signature, + .is_read_op = smb2_is_read_op, + .set_oplock_level = smb2_set_oplock_level, + .create_lease_buf = smb2_create_lease_buf, + .parse_lease_buf = smb2_parse_lease_buf, }; +struct smb_version_operations smb21_operations = { + .compare_fids = smb2_compare_fids, + .setup_request = smb2_setup_request, + .setup_async_request = smb2_setup_async_request, + .check_receive = smb2_check_receive, + .add_credits = smb2_add_credits, + .set_credits = smb2_set_credits, + .get_credits_field = smb2_get_credits_field, + .get_credits = smb2_get_credits, + .get_next_mid = smb2_get_next_mid, + .read_data_offset = smb2_read_data_offset, + .read_data_length = smb2_read_data_length, + .map_error = map_smb2_to_linux_error, + .find_mid = smb2_find_mid, + .check_message = smb2_check_message, + .dump_detail = smb2_dump_detail, + .clear_stats = smb2_clear_stats, + .print_stats = smb2_print_stats, + .is_oplock_break = smb2_is_valid_oplock_break, + .need_neg = smb2_need_neg, + .negotiate = smb2_negotiate, + .negotiate_wsize = smb2_negotiate_wsize, + .negotiate_rsize = smb2_negotiate_rsize, + .sess_setup = SMB2_sess_setup, + .logoff = SMB2_logoff, + .tree_connect = SMB2_tcon, + .tree_disconnect = SMB2_tdis, + .is_path_accessible = smb2_is_path_accessible, + .can_echo = smb2_can_echo, + .echo = SMB2_echo, + .query_path_info = smb2_query_path_info, + .get_srv_inum = smb2_get_srv_inum, + .query_file_info = smb2_query_file_info, + .set_path_size = smb2_set_path_size, + .set_file_size = smb2_set_file_size, + .set_file_info = smb2_set_file_info, + .mkdir = smb2_mkdir, + .mkdir_setinfo = smb2_mkdir_setinfo, + .rmdir = smb2_rmdir, + .unlink = smb2_unlink, + .rename = smb2_rename_path, + .create_hardlink = smb2_create_hardlink, + .query_symlink = smb2_query_symlink, + .open = smb2_open_file, + .set_fid = smb2_set_fid, + .close = smb2_close_file, + .flush = smb2_flush_file, + .async_readv = smb2_async_readv, + .async_writev = smb2_async_writev, + .sync_read = smb2_sync_read, + .sync_write = smb2_sync_write, + .query_dir_first = smb2_query_dir_first, + .query_dir_next = smb2_query_dir_next, + .close_dir = smb2_close_dir, + .calc_smb_size = smb2_calc_size, + .is_status_pending = smb2_is_status_pending, + .oplock_response = smb2_oplock_response, + .queryfs = smb2_queryfs, + .mand_lock = smb2_mand_lock, + .mand_unlock_range = smb2_unlock_range, + .push_mand_locks = smb2_push_mandatory_locks, + .get_lease_key = smb2_get_lease_key, + .set_lease_key = smb2_set_lease_key, + .new_lease_key = smb2_new_lease_key, + .calc_signature = smb2_calc_signature, + .is_read_op = smb21_is_read_op, + .set_oplock_level = smb21_set_oplock_level, + .create_lease_buf = smb2_create_lease_buf, + .parse_lease_buf = smb2_parse_lease_buf, +}; struct smb_version_operations smb30_operations = { .compare_fids = smb2_compare_fids, @@ -706,6 +1023,7 @@ struct smb_version_operations smb30_operations = { .unlink = smb2_unlink, .rename = smb2_rename_path, .create_hardlink = smb2_create_hardlink, + .query_symlink = smb2_query_symlink, .open = smb2_open_file, .set_fid = smb2_set_fid, .close = smb2_close_file, @@ -729,6 +1047,10 @@ struct smb_version_operations smb30_operations = { .new_lease_key = smb2_new_lease_key, .generate_signingkey = generate_smb3signingkey, .calc_signature = smb3_calc_signature, + .is_read_op = smb21_is_read_op, + .set_oplock_level = smb3_set_oplock_level, + .create_lease_buf = smb3_create_lease_buf, + .parse_lease_buf = smb3_parse_lease_buf, }; struct smb_version_values smb20_values = { @@ -746,9 +1068,9 @@ struct smb_version_values smb20_values = { .cap_unix = 0, .cap_nt_find = SMB2_NT_FIND, .cap_large_files = SMB2_LARGE_FILES, - .oplock_read = SMB2_OPLOCK_LEVEL_II, .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED, .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, + .create_lease_size = sizeof(struct create_lease), }; struct smb_version_values smb21_values = { @@ -766,9 +1088,9 @@ struct smb_version_values smb21_values = { .cap_unix = 0, .cap_nt_find = SMB2_NT_FIND, .cap_large_files = SMB2_LARGE_FILES, - .oplock_read = SMB2_OPLOCK_LEVEL_II, .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED, .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, + .create_lease_size = sizeof(struct create_lease), }; struct smb_version_values smb30_values = { @@ -786,9 +1108,9 @@ struct smb_version_values smb30_values = { .cap_unix = 0, .cap_nt_find = SMB2_NT_FIND, .cap_large_files = SMB2_LARGE_FILES, - .oplock_read = SMB2_OPLOCK_LEVEL_II, .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED, .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, + .create_lease_size = sizeof(struct create_lease_v2), }; struct smb_version_values smb302_values = { @@ -806,7 +1128,7 @@ struct smb_version_values smb302_values = { .cap_unix = 0, .cap_nt_find = SMB2_NT_FIND, .cap_large_files = SMB2_LARGE_FILES, - .oplock_read = SMB2_OPLOCK_LEVEL_II, .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED, .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, + .create_lease_size = sizeof(struct create_lease_v2), }; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index abc9c2809b51..eba0efde66d7 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -478,12 +478,20 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, } /* + * If we are here due to reconnect, free per-smb session key + * in case signing was required. + */ + kfree(ses->auth_key.response); + ses->auth_key.response = NULL; + + /* * If memory allocation is successful, caller of this function * frees it. */ ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL); if (!ses->ntlmssp) return -ENOMEM; + ses->ntlmssp->sesskey_per_smbsess = true; /* FIXME: allow for other auth types besides NTLMSSP (e.g. krb5) */ ses->sectype = RawNTLMSSP; @@ -628,6 +636,40 @@ ssetup_exit: /* if ntlmssp, and negotiate succeeded, proceed to authenticate phase */ if ((phase == NtLmChallenge) && (rc == 0)) goto ssetup_ntlmssp_authenticate; + + if (!rc) { + mutex_lock(&server->srv_mutex); + if (server->sign && server->ops->generate_signingkey) { + rc = server->ops->generate_signingkey(ses); + kfree(ses->auth_key.response); + ses->auth_key.response = NULL; + if (rc) { + cifs_dbg(FYI, + "SMB3 session key generation failed\n"); + mutex_unlock(&server->srv_mutex); + goto keygen_exit; + } + } + if (!server->session_estab) { + server->sequence_number = 0x2; + server->session_estab = true; + } + mutex_unlock(&server->srv_mutex); + + cifs_dbg(FYI, "SMB2/3 session established successfully\n"); + spin_lock(&GlobalMid_Lock); + ses->status = CifsGood; + ses->need_reconnect = false; + spin_unlock(&GlobalMid_Lock); + } + +keygen_exit: + if (!server->sign) { + kfree(ses->auth_key.response); + ses->auth_key.response = NULL; + } + kfree(ses->ntlmssp); + return rc; } @@ -813,39 +855,6 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon) return rc; } -static struct create_lease * -create_lease_buf(u8 *lease_key, u8 oplock) -{ - struct create_lease *buf; - - buf = kzalloc(sizeof(struct create_lease), GFP_KERNEL); - if (!buf) - return NULL; - - buf->lcontext.LeaseKeyLow = cpu_to_le64(*((u64 *)lease_key)); - buf->lcontext.LeaseKeyHigh = cpu_to_le64(*((u64 *)(lease_key + 8))); - if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE) - buf->lcontext.LeaseState = SMB2_LEASE_WRITE_CACHING | - SMB2_LEASE_READ_CACHING; - else if (oplock == SMB2_OPLOCK_LEVEL_II) - buf->lcontext.LeaseState = SMB2_LEASE_READ_CACHING; - else if (oplock == SMB2_OPLOCK_LEVEL_BATCH) - buf->lcontext.LeaseState = SMB2_LEASE_HANDLE_CACHING | - SMB2_LEASE_READ_CACHING | - SMB2_LEASE_WRITE_CACHING; - - buf->ccontext.DataOffset = cpu_to_le16(offsetof - (struct create_lease, lcontext)); - buf->ccontext.DataLength = cpu_to_le32(sizeof(struct lease_context)); - buf->ccontext.NameOffset = cpu_to_le16(offsetof - (struct create_lease, Name)); - buf->ccontext.NameLength = cpu_to_le16(4); - buf->Name[0] = 'R'; - buf->Name[1] = 'q'; - buf->Name[2] = 'L'; - buf->Name[3] = 's'; - return buf; -} static struct create_durable * create_durable_buf(void) @@ -894,55 +903,49 @@ create_reconnect_durable_buf(struct cifs_fid *fid) } static __u8 -parse_lease_state(struct smb2_create_rsp *rsp) +parse_lease_state(struct TCP_Server_Info *server, struct smb2_create_rsp *rsp, + unsigned int *epoch) { char *data_offset; - struct create_lease *lc; - bool found = false; + struct create_context *cc; unsigned int next = 0; char *name; data_offset = (char *)rsp + 4 + le32_to_cpu(rsp->CreateContextsOffset); - lc = (struct create_lease *)data_offset; + cc = (struct create_context *)data_offset; do { - lc = (struct create_lease *)((char *)lc + next); - name = le16_to_cpu(lc->ccontext.NameOffset) + (char *)lc; - if (le16_to_cpu(lc->ccontext.NameLength) != 4 || + cc = (struct create_context *)((char *)cc + next); + name = le16_to_cpu(cc->NameOffset) + (char *)cc; + if (le16_to_cpu(cc->NameLength) != 4 || strncmp(name, "RqLs", 4)) { - next = le32_to_cpu(lc->ccontext.Next); + next = le32_to_cpu(cc->Next); continue; } - if (lc->lcontext.LeaseFlags & SMB2_LEASE_FLAG_BREAK_IN_PROGRESS) - return SMB2_OPLOCK_LEVEL_NOCHANGE; - found = true; - break; + return server->ops->parse_lease_buf(cc, epoch); } while (next != 0); - if (!found) - return 0; - - return smb2_map_lease_to_oplock(lc->lcontext.LeaseState); + return 0; } static int -add_lease_context(struct kvec *iov, unsigned int *num_iovec, __u8 *oplock) +add_lease_context(struct TCP_Server_Info *server, struct kvec *iov, + unsigned int *num_iovec, __u8 *oplock) { struct smb2_create_req *req = iov[0].iov_base; unsigned int num = *num_iovec; - iov[num].iov_base = create_lease_buf(oplock+1, *oplock); + iov[num].iov_base = server->ops->create_lease_buf(oplock+1, *oplock); if (iov[num].iov_base == NULL) return -ENOMEM; - iov[num].iov_len = sizeof(struct create_lease); + iov[num].iov_len = server->vals->create_lease_size; req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_LEASE; if (!req->CreateContextsOffset) req->CreateContextsOffset = cpu_to_le32( sizeof(struct smb2_create_req) - 4 + iov[num - 1].iov_len); - req->CreateContextsLength = cpu_to_le32( - le32_to_cpu(req->CreateContextsLength) + - sizeof(struct create_lease)); - inc_rfc1001_len(&req->hdr, sizeof(struct create_lease)); + le32_add_cpu(&req->CreateContextsLength, + server->vals->create_lease_size); + inc_rfc1001_len(&req->hdr, server->vals->create_lease_size); *num_iovec = num + 1; return 0; } @@ -967,9 +970,7 @@ add_durable_context(struct kvec *iov, unsigned int *num_iovec, req->CreateContextsOffset = cpu_to_le32(sizeof(struct smb2_create_req) - 4 + iov[1].iov_len); - req->CreateContextsLength = - cpu_to_le32(le32_to_cpu(req->CreateContextsLength) + - sizeof(struct create_durable)); + le32_add_cpu(&req->CreateContextsLength, sizeof(struct create_durable)); inc_rfc1001_len(&req->hdr, sizeof(struct create_durable)); *num_iovec = num + 1; return 0; @@ -977,7 +978,8 @@ add_durable_context(struct kvec *iov, unsigned int *num_iovec, int SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, - __u8 *oplock, struct smb2_file_all_info *buf) + __u8 *oplock, struct smb2_file_all_info *buf, + struct smb2_err_rsp **err_buf) { struct smb2_create_req *req; struct smb2_create_rsp *rsp; @@ -1048,11 +1050,11 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, if (!server->oplocks) *oplock = SMB2_OPLOCK_LEVEL_NONE; - if (!(tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) || + if (!(server->capabilities & SMB2_GLOBAL_CAP_LEASING) || *oplock == SMB2_OPLOCK_LEVEL_NONE) req->RequestedOplockLevel = *oplock; else { - rc = add_lease_context(iov, &num_iovecs, oplock); + rc = add_lease_context(server, iov, &num_iovecs, oplock); if (rc) { cifs_small_buf_release(req); kfree(copy_path); @@ -1062,11 +1064,11 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, if (*oplock == SMB2_OPLOCK_LEVEL_BATCH) { /* need to set Next field of lease context if we request it */ - if (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) { + if (server->capabilities & SMB2_GLOBAL_CAP_LEASING) { struct create_context *ccontext = (struct create_context *)iov[num_iovecs-1].iov_base; ccontext->Next = - cpu_to_le32(sizeof(struct create_lease)); + cpu_to_le32(server->vals->create_lease_size); } rc = add_durable_context(iov, &num_iovecs, oparms); if (rc) { @@ -1082,6 +1084,9 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, if (rc != 0) { cifs_stats_fail_inc(tcon, SMB2_CREATE_HE); + if (err_buf) + *err_buf = kmemdup(rsp, get_rfc1002_length(rsp) + 4, + GFP_KERNEL); goto creat_exit; } @@ -1098,7 +1103,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, } if (rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE) - *oplock = parse_lease_state(rsp); + *oplock = parse_lease_state(server, rsp, &oparms->fid->epoch); else *oplock = rsp->OplockLevel; creat_exit: diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 36b0d37ea69b..b83d0118a757 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -150,6 +150,20 @@ struct smb2_err_rsp { __u8 ErrorData[1]; /* variable length */ } __packed; +struct smb2_symlink_err_rsp { + __le32 SymLinkLength; + __le32 SymLinkErrorTag; + __le32 ReparseTag; + __le16 ReparseDataLength; + __le16 UnparsedPathLength; + __le16 SubstituteNameOffset; + __le16 SubstituteNameLength; + __le16 PrintNameOffset; + __le16 PrintNameLength; + __le32 Flags; + __u8 PathBuffer[0]; +} __packed; + #define SMB2_CLIENT_GUID_SIZE 16 extern __u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE]; @@ -462,6 +476,10 @@ struct create_context { __u8 Buffer[0]; } __packed; +#define SMB2_LEASE_READ_CACHING_HE 0x01 +#define SMB2_LEASE_HANDLE_CACHING_HE 0x02 +#define SMB2_LEASE_WRITE_CACHING_HE 0x04 + #define SMB2_LEASE_NONE __constant_cpu_to_le32(0x00) #define SMB2_LEASE_READ_CACHING __constant_cpu_to_le32(0x01) #define SMB2_LEASE_HANDLE_CACHING __constant_cpu_to_le32(0x02) @@ -479,12 +497,31 @@ struct lease_context { __le64 LeaseDuration; } __packed; +struct lease_context_v2 { + __le64 LeaseKeyLow; + __le64 LeaseKeyHigh; + __le32 LeaseState; + __le32 LeaseFlags; + __le64 LeaseDuration; + __le64 ParentLeaseKeyLow; + __le64 ParentLeaseKeyHigh; + __le16 Epoch; + __le16 Reserved; +} __packed; + struct create_lease { struct create_context ccontext; __u8 Name[8]; struct lease_context lcontext; } __packed; +struct create_lease_v2 { + struct create_context ccontext; + __u8 Name[8]; + struct lease_context_v2 lcontext; + __u8 Pad[4]; +} __packed; + struct create_durable { struct create_context ccontext; __u8 Name[8]; diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 1a5ecbed40ed..e3fb4801ee96 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -53,7 +53,6 @@ extern int smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server); extern void smb2_echo_request(struct work_struct *work); extern __le32 smb2_get_lease_state(struct cifsInodeInfo *cinode); -extern __u8 smb2_map_lease_to_oplock(__le32 lease_state); extern bool smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv); @@ -87,7 +86,6 @@ extern int smb2_create_hardlink(const unsigned int xid, struct cifs_tcon *tcon, extern int smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, __u32 *oplock, FILE_ALL_INFO *buf); -extern void smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock); extern int smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, const unsigned int xid); extern int smb2_push_mandatory_locks(struct cifsFileInfo *cfile); @@ -106,7 +104,8 @@ extern int SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, extern int SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon); extern int SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, __u8 *oplock, - struct smb2_file_all_info *buf); + struct smb2_file_all_info *buf, + struct smb2_err_rsp **err_buf); extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, u32 opcode, bool is_fsctl, char *in_data, u32 indatalen, diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 4f2300d020c7..340abca3aa52 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -114,6 +114,23 @@ smb3_crypto_shash_allocate(struct TCP_Server_Info *server) return 0; } +static struct cifs_ses * +smb2_find_smb_ses(struct smb2_hdr *smb2hdr, struct TCP_Server_Info *server) +{ + struct cifs_ses *ses; + + spin_lock(&cifs_tcp_ses_lock); + list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + if (ses->Suid != smb2hdr->SessionId) + continue; + spin_unlock(&cifs_tcp_ses_lock); + return ses; + } + spin_unlock(&cifs_tcp_ses_lock); + + return NULL; +} + int smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) @@ -124,6 +141,13 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) struct kvec *iov = rqst->rq_iov; int n_vec = rqst->rq_nvec; struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base; + struct cifs_ses *ses; + + ses = smb2_find_smb_ses(smb2_pdu, server); + if (!ses) { + cifs_dbg(VFS, "%s: Could not find session\n", __func__); + return 0; + } memset(smb2_signature, 0x0, SMB2_HMACSHA256_SIZE); memset(smb2_pdu->Signature, 0x0, SMB2_SIGNATURE_SIZE); @@ -135,7 +159,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) } rc = crypto_shash_setkey(server->secmech.hmacsha256, - server->session_key.response, SMB2_NTLMV2_SESSKEY_SIZE); + ses->auth_key.response, SMB2_NTLMV2_SESSKEY_SIZE); if (rc) { cifs_dbg(VFS, "%s: Could not update with response\n", __func__); return rc; @@ -198,8 +222,8 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) return rc; } -void -generate_smb3signingkey(struct TCP_Server_Info *server) +int +generate_smb3signingkey(struct cifs_ses *ses) { unsigned char zero = 0x0; __u8 i[4] = {0, 0, 0, 1}; @@ -209,90 +233,99 @@ generate_smb3signingkey(struct TCP_Server_Info *server) unsigned char *hashptr = prfhash; memset(prfhash, 0x0, SMB2_HMACSHA256_SIZE); - memset(server->smb3signingkey, 0x0, SMB3_SIGNKEY_SIZE); + memset(ses->smb3signingkey, 0x0, SMB3_SIGNKEY_SIZE); - rc = smb3_crypto_shash_allocate(server); + rc = smb3_crypto_shash_allocate(ses->server); if (rc) { cifs_dbg(VFS, "%s: crypto alloc failed\n", __func__); goto smb3signkey_ret; } - rc = crypto_shash_setkey(server->secmech.hmacsha256, - server->session_key.response, SMB2_NTLMV2_SESSKEY_SIZE); + rc = crypto_shash_setkey(ses->server->secmech.hmacsha256, + ses->auth_key.response, SMB2_NTLMV2_SESSKEY_SIZE); if (rc) { cifs_dbg(VFS, "%s: Could not set with session key\n", __func__); goto smb3signkey_ret; } - rc = crypto_shash_init(&server->secmech.sdeschmacsha256->shash); + rc = crypto_shash_init(&ses->server->secmech.sdeschmacsha256->shash); if (rc) { cifs_dbg(VFS, "%s: Could not init sign hmac\n", __func__); goto smb3signkey_ret; } - rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, + rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash, i, 4); if (rc) { cifs_dbg(VFS, "%s: Could not update with n\n", __func__); goto smb3signkey_ret; } - rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, + rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash, "SMB2AESCMAC", 12); if (rc) { cifs_dbg(VFS, "%s: Could not update with label\n", __func__); goto smb3signkey_ret; } - rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, + rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash, &zero, 1); if (rc) { cifs_dbg(VFS, "%s: Could not update with zero\n", __func__); goto smb3signkey_ret; } - rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, + rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash, "SmbSign", 8); if (rc) { cifs_dbg(VFS, "%s: Could not update with context\n", __func__); goto smb3signkey_ret; } - rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, + rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash, L, 4); if (rc) { cifs_dbg(VFS, "%s: Could not update with L\n", __func__); goto smb3signkey_ret; } - rc = crypto_shash_final(&server->secmech.sdeschmacsha256->shash, + rc = crypto_shash_final(&ses->server->secmech.sdeschmacsha256->shash, hashptr); if (rc) { cifs_dbg(VFS, "%s: Could not generate sha256 hash\n", __func__); goto smb3signkey_ret; } - memcpy(server->smb3signingkey, hashptr, SMB3_SIGNKEY_SIZE); + memcpy(ses->smb3signingkey, hashptr, SMB3_SIGNKEY_SIZE); smb3signkey_ret: - return; + return rc; } int smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) { - int i, rc; + int i; + int rc = 0; unsigned char smb3_signature[SMB2_CMACAES_SIZE]; unsigned char *sigptr = smb3_signature; struct kvec *iov = rqst->rq_iov; int n_vec = rqst->rq_nvec; struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base; + struct cifs_ses *ses; + + ses = smb2_find_smb_ses(smb2_pdu, server); + if (!ses) { + cifs_dbg(VFS, "%s: Could not find session\n", __func__); + return 0; + } memset(smb3_signature, 0x0, SMB2_CMACAES_SIZE); memset(smb2_pdu->Signature, 0x0, SMB2_SIGNATURE_SIZE); rc = crypto_shash_setkey(server->secmech.cmacaes, - server->smb3signingkey, SMB2_CMACAES_SIZE); + ses->smb3signingkey, SMB2_CMACAES_SIZE); + if (rc) { cifs_dbg(VFS, "%s: Could not set key for cmac aes\n", __func__); return rc; @@ -389,6 +422,7 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)rqst->rq_iov[0].iov_base; if ((smb2_pdu->Command == SMB2_NEGOTIATE) || + (smb2_pdu->Command == SMB2_SESSION_SETUP) || (smb2_pdu->Command == SMB2_OPLOCK_BREAK) || (!server->session_estab)) return 0; diff --git a/fs/cifs/winucase.c b/fs/cifs/winucase.c new file mode 100644 index 000000000000..1506d4fddb2c --- /dev/null +++ b/fs/cifs/winucase.c @@ -0,0 +1,663 @@ +/* + * fs/cifs/winucase.c + * + * Copyright (c) Jeffrey Layton <jlayton@redhat.com>, 2013 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * The const tables in this file were converted from the following info + * provided by Microsoft: + * + * 3.1.5.3 Mapping UTF-16 Strings to Upper Case: + * + * http://msdn.microsoft.com/en-us/library/hh877830.aspx + * http://www.microsoft.com/en-us/download/details.aspx?displaylang=en&id=10921 + * + * In particular, the table in "Windows 8 Upper Case Mapping Table.txt" was + * post-processed using the winucase_convert.pl script. + */ + +#include <linux/nls.h> + +wchar_t cifs_toupper(wchar_t in); /* quiet sparse */ + +static const wchar_t t2_00[256] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, + 0x0058, 0x0059, 0x005a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, + 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, + 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0000, + 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x0178, +}; + +static const wchar_t t2_01[256] = { + 0x0000, 0x0100, 0x0000, 0x0102, 0x0000, 0x0104, 0x0000, 0x0106, + 0x0000, 0x0108, 0x0000, 0x010a, 0x0000, 0x010c, 0x0000, 0x010e, + 0x0000, 0x0110, 0x0000, 0x0112, 0x0000, 0x0114, 0x0000, 0x0116, + 0x0000, 0x0118, 0x0000, 0x011a, 0x0000, 0x011c, 0x0000, 0x011e, + 0x0000, 0x0120, 0x0000, 0x0122, 0x0000, 0x0124, 0x0000, 0x0126, + 0x0000, 0x0128, 0x0000, 0x012a, 0x0000, 0x012c, 0x0000, 0x012e, + 0x0000, 0x0000, 0x0000, 0x0132, 0x0000, 0x0134, 0x0000, 0x0136, + 0x0000, 0x0000, 0x0139, 0x0000, 0x013b, 0x0000, 0x013d, 0x0000, + 0x013f, 0x0000, 0x0141, 0x0000, 0x0143, 0x0000, 0x0145, 0x0000, + 0x0147, 0x0000, 0x0000, 0x014a, 0x0000, 0x014c, 0x0000, 0x014e, + 0x0000, 0x0150, 0x0000, 0x0152, 0x0000, 0x0154, 0x0000, 0x0156, + 0x0000, 0x0158, 0x0000, 0x015a, 0x0000, 0x015c, 0x0000, 0x015e, + 0x0000, 0x0160, 0x0000, 0x0162, 0x0000, 0x0164, 0x0000, 0x0166, + 0x0000, 0x0168, 0x0000, 0x016a, 0x0000, 0x016c, 0x0000, 0x016e, + 0x0000, 0x0170, 0x0000, 0x0172, 0x0000, 0x0174, 0x0000, 0x0176, + 0x0000, 0x0000, 0x0179, 0x0000, 0x017b, 0x0000, 0x017d, 0x0000, + 0x0243, 0x0000, 0x0000, 0x0182, 0x0000, 0x0184, 0x0000, 0x0000, + 0x0187, 0x0000, 0x0000, 0x0000, 0x018b, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0191, 0x0000, 0x0000, 0x01f6, 0x0000, 0x0000, + 0x0000, 0x0198, 0x023d, 0x0000, 0x0000, 0x0000, 0x0220, 0x0000, + 0x0000, 0x01a0, 0x0000, 0x01a2, 0x0000, 0x01a4, 0x0000, 0x0000, + 0x01a7, 0x0000, 0x0000, 0x0000, 0x0000, 0x01ac, 0x0000, 0x0000, + 0x01af, 0x0000, 0x0000, 0x0000, 0x01b3, 0x0000, 0x01b5, 0x0000, + 0x0000, 0x01b8, 0x0000, 0x0000, 0x0000, 0x01bc, 0x0000, 0x01f7, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x01c4, 0x0000, + 0x0000, 0x01c7, 0x0000, 0x0000, 0x01ca, 0x0000, 0x01cd, 0x0000, + 0x01cf, 0x0000, 0x01d1, 0x0000, 0x01d3, 0x0000, 0x01d5, 0x0000, + 0x01d7, 0x0000, 0x01d9, 0x0000, 0x01db, 0x018e, 0x0000, 0x01de, + 0x0000, 0x01e0, 0x0000, 0x01e2, 0x0000, 0x01e4, 0x0000, 0x01e6, + 0x0000, 0x01e8, 0x0000, 0x01ea, 0x0000, 0x01ec, 0x0000, 0x01ee, + 0x0000, 0x0000, 0x0000, 0x01f1, 0x0000, 0x01f4, 0x0000, 0x0000, + 0x0000, 0x01f8, 0x0000, 0x01fa, 0x0000, 0x01fc, 0x0000, 0x01fe, +}; + +static const wchar_t t2_02[256] = { + 0x0000, 0x0200, 0x0000, 0x0202, 0x0000, 0x0204, 0x0000, 0x0206, + 0x0000, 0x0208, 0x0000, 0x020a, 0x0000, 0x020c, 0x0000, 0x020e, + 0x0000, 0x0210, 0x0000, 0x0212, 0x0000, 0x0214, 0x0000, 0x0216, + 0x0000, 0x0218, 0x0000, 0x021a, 0x0000, 0x021c, 0x0000, 0x021e, + 0x0000, 0x0000, 0x0000, 0x0222, 0x0000, 0x0224, 0x0000, 0x0226, + 0x0000, 0x0228, 0x0000, 0x022a, 0x0000, 0x022c, 0x0000, 0x022e, + 0x0000, 0x0230, 0x0000, 0x0232, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x023b, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0241, 0x0000, 0x0000, 0x0000, 0x0000, 0x0246, + 0x0000, 0x0248, 0x0000, 0x024a, 0x0000, 0x024c, 0x0000, 0x024e, + 0x2c6f, 0x2c6d, 0x0000, 0x0181, 0x0186, 0x0000, 0x0189, 0x018a, + 0x0000, 0x018f, 0x0000, 0x0190, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0193, 0x0000, 0x0000, 0x0194, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0197, 0x0196, 0x0000, 0x2c62, 0x0000, 0x0000, 0x0000, 0x019c, + 0x0000, 0x2c6e, 0x019d, 0x0000, 0x0000, 0x019f, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2c64, 0x0000, 0x0000, + 0x01a6, 0x0000, 0x0000, 0x01a9, 0x0000, 0x0000, 0x0000, 0x0000, + 0x01ae, 0x0244, 0x01b1, 0x01b2, 0x0245, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x01b7, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +}; + +static const wchar_t t2_03[256] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0370, 0x0000, 0x0372, 0x0000, 0x0000, 0x0000, 0x0376, + 0x0000, 0x0000, 0x0000, 0x03fd, 0x03fe, 0x03ff, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0386, 0x0388, 0x0389, 0x038a, + 0x0000, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, + 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, + 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, + 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x038c, 0x038e, 0x038f, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x03cf, + 0x0000, 0x03d8, 0x0000, 0x03da, 0x0000, 0x03dc, 0x0000, 0x03de, + 0x0000, 0x03e0, 0x0000, 0x03e2, 0x0000, 0x03e4, 0x0000, 0x03e6, + 0x0000, 0x03e8, 0x0000, 0x03ea, 0x0000, 0x03ec, 0x0000, 0x03ee, + 0x0000, 0x0000, 0x03f9, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x03f7, 0x0000, 0x0000, 0x03fa, 0x0000, 0x0000, 0x0000, 0x0000, +}; + +static const wchar_t t2_04[256] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, + 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, + 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, + 0x0400, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, + 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x040d, 0x040e, 0x040f, + 0x0000, 0x0460, 0x0000, 0x0462, 0x0000, 0x0464, 0x0000, 0x0466, + 0x0000, 0x0468, 0x0000, 0x046a, 0x0000, 0x046c, 0x0000, 0x046e, + 0x0000, 0x0470, 0x0000, 0x0472, 0x0000, 0x0474, 0x0000, 0x0476, + 0x0000, 0x0478, 0x0000, 0x047a, 0x0000, 0x047c, 0x0000, 0x047e, + 0x0000, 0x0480, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x048a, 0x0000, 0x048c, 0x0000, 0x048e, + 0x0000, 0x0490, 0x0000, 0x0492, 0x0000, 0x0494, 0x0000, 0x0496, + 0x0000, 0x0498, 0x0000, 0x049a, 0x0000, 0x049c, 0x0000, 0x049e, + 0x0000, 0x04a0, 0x0000, 0x04a2, 0x0000, 0x04a4, 0x0000, 0x04a6, + 0x0000, 0x04a8, 0x0000, 0x04aa, 0x0000, 0x04ac, 0x0000, 0x04ae, + 0x0000, 0x04b0, 0x0000, 0x04b2, 0x0000, 0x04b4, 0x0000, 0x04b6, + 0x0000, 0x04b8, 0x0000, 0x04ba, 0x0000, 0x04bc, 0x0000, 0x04be, + 0x0000, 0x0000, 0x04c1, 0x0000, 0x04c3, 0x0000, 0x04c5, 0x0000, + 0x04c7, 0x0000, 0x04c9, 0x0000, 0x04cb, 0x0000, 0x04cd, 0x04c0, + 0x0000, 0x04d0, 0x0000, 0x04d2, 0x0000, 0x04d4, 0x0000, 0x04d6, + 0x0000, 0x04d8, 0x0000, 0x04da, 0x0000, 0x04dc, 0x0000, 0x04de, + 0x0000, 0x04e0, 0x0000, 0x04e2, 0x0000, 0x04e4, 0x0000, 0x04e6, + 0x0000, 0x04e8, 0x0000, 0x04ea, 0x0000, 0x04ec, 0x0000, 0x04ee, + 0x0000, 0x04f0, 0x0000, 0x04f2, 0x0000, 0x04f4, 0x0000, 0x04f6, + 0x0000, 0x04f8, 0x0000, 0x04fa, 0x0000, 0x04fc, 0x0000, 0x04fe, +}; + +static const wchar_t t2_05[256] = { + 0x0000, 0x0500, 0x0000, 0x0502, 0x0000, 0x0504, 0x0000, 0x0506, + 0x0000, 0x0508, 0x0000, 0x050a, 0x0000, 0x050c, 0x0000, 0x050e, + 0x0000, 0x0510, 0x0000, 0x0512, 0x0000, 0x0514, 0x0000, 0x0516, + 0x0000, 0x0518, 0x0000, 0x051a, 0x0000, 0x051c, 0x0000, 0x051e, + 0x0000, 0x0520, 0x0000, 0x0522, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0531, 0x0532, 0x0533, 0x0534, 0x0535, 0x0536, 0x0537, + 0x0538, 0x0539, 0x053a, 0x053b, 0x053c, 0x053d, 0x053e, 0x053f, + 0x0540, 0x0541, 0x0542, 0x0543, 0x0544, 0x0545, 0x0546, 0x0547, + 0x0548, 0x0549, 0x054a, 0x054b, 0x054c, 0x054d, 0x054e, 0x054f, + 0x0550, 0x0551, 0x0552, 0x0553, 0x0554, 0x0555, 0x0556, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +}; + +static const wchar_t t2_1d[256] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0xa77d, 0x0000, 0x0000, 0x0000, 0x2c63, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +}; + +static const wchar_t t2_1e[256] = { + 0x0000, 0x1e00, 0x0000, 0x1e02, 0x0000, 0x1e04, 0x0000, 0x1e06, + 0x0000, 0x1e08, 0x0000, 0x1e0a, 0x0000, 0x1e0c, 0x0000, 0x1e0e, + 0x0000, 0x1e10, 0x0000, 0x1e12, 0x0000, 0x1e14, 0x0000, 0x1e16, + 0x0000, 0x1e18, 0x0000, 0x1e1a, 0x0000, 0x1e1c, 0x0000, 0x1e1e, + 0x0000, 0x1e20, 0x0000, 0x1e22, 0x0000, 0x1e24, 0x0000, 0x1e26, + 0x0000, 0x1e28, 0x0000, 0x1e2a, 0x0000, 0x1e2c, 0x0000, 0x1e2e, + 0x0000, 0x1e30, 0x0000, 0x1e32, 0x0000, 0x1e34, 0x0000, 0x1e36, + 0x0000, 0x1e38, 0x0000, 0x1e3a, 0x0000, 0x1e3c, 0x0000, 0x1e3e, + 0x0000, 0x1e40, 0x0000, 0x1e42, 0x0000, 0x1e44, 0x0000, 0x1e46, + 0x0000, 0x1e48, 0x0000, 0x1e4a, 0x0000, 0x1e4c, 0x0000, 0x1e4e, + 0x0000, 0x1e50, 0x0000, 0x1e52, 0x0000, 0x1e54, 0x0000, 0x1e56, + 0x0000, 0x1e58, 0x0000, 0x1e5a, 0x0000, 0x1e5c, 0x0000, 0x1e5e, + 0x0000, 0x1e60, 0x0000, 0x1e62, 0x0000, 0x1e64, 0x0000, 0x1e66, + 0x0000, 0x1e68, 0x0000, 0x1e6a, 0x0000, 0x1e6c, 0x0000, 0x1e6e, + 0x0000, 0x1e70, 0x0000, 0x1e72, 0x0000, 0x1e74, 0x0000, 0x1e76, + 0x0000, 0x1e78, 0x0000, 0x1e7a, 0x0000, 0x1e7c, 0x0000, 0x1e7e, + 0x0000, 0x1e80, 0x0000, 0x1e82, 0x0000, 0x1e84, 0x0000, 0x1e86, + 0x0000, 0x1e88, 0x0000, 0x1e8a, 0x0000, 0x1e8c, 0x0000, 0x1e8e, + 0x0000, 0x1e90, 0x0000, 0x1e92, 0x0000, 0x1e94, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x1ea0, 0x0000, 0x1ea2, 0x0000, 0x1ea4, 0x0000, 0x1ea6, + 0x0000, 0x1ea8, 0x0000, 0x1eaa, 0x0000, 0x1eac, 0x0000, 0x1eae, + 0x0000, 0x1eb0, 0x0000, 0x1eb2, 0x0000, 0x1eb4, 0x0000, 0x1eb6, + 0x0000, 0x1eb8, 0x0000, 0x1eba, 0x0000, 0x1ebc, 0x0000, 0x1ebe, + 0x0000, 0x1ec0, 0x0000, 0x1ec2, 0x0000, 0x1ec4, 0x0000, 0x1ec6, + 0x0000, 0x1ec8, 0x0000, 0x1eca, 0x0000, 0x1ecc, 0x0000, 0x1ece, + 0x0000, 0x1ed0, 0x0000, 0x1ed2, 0x0000, 0x1ed4, 0x0000, 0x1ed6, + 0x0000, 0x1ed8, 0x0000, 0x1eda, 0x0000, 0x1edc, 0x0000, 0x1ede, + 0x0000, 0x1ee0, 0x0000, 0x1ee2, 0x0000, 0x1ee4, 0x0000, 0x1ee6, + 0x0000, 0x1ee8, 0x0000, 0x1eea, 0x0000, 0x1eec, 0x0000, 0x1eee, + 0x0000, 0x1ef0, 0x0000, 0x1ef2, 0x0000, 0x1ef4, 0x0000, 0x1ef6, + 0x0000, 0x1ef8, 0x0000, 0x1efa, 0x0000, 0x1efc, 0x0000, 0x1efe, +}; + +static const wchar_t t2_1f[256] = { + 0x1f08, 0x1f09, 0x1f0a, 0x1f0b, 0x1f0c, 0x1f0d, 0x1f0e, 0x1f0f, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x1f18, 0x1f19, 0x1f1a, 0x1f1b, 0x1f1c, 0x1f1d, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x1f28, 0x1f29, 0x1f2a, 0x1f2b, 0x1f2c, 0x1f2d, 0x1f2e, 0x1f2f, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x1f38, 0x1f39, 0x1f3a, 0x1f3b, 0x1f3c, 0x1f3d, 0x1f3e, 0x1f3f, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x1f48, 0x1f49, 0x1f4a, 0x1f4b, 0x1f4c, 0x1f4d, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x1f59, 0x0000, 0x1f5b, 0x0000, 0x1f5d, 0x0000, 0x1f5f, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x1f68, 0x1f69, 0x1f6a, 0x1f6b, 0x1f6c, 0x1f6d, 0x1f6e, 0x1f6f, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x1fba, 0x1fbb, 0x1fc8, 0x1fc9, 0x1fca, 0x1fcb, 0x1fda, 0x1fdb, + 0x1ff8, 0x1ff9, 0x1fea, 0x1feb, 0x1ffa, 0x1ffb, 0x0000, 0x0000, + 0x1f88, 0x1f89, 0x1f8a, 0x1f8b, 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x1f98, 0x1f99, 0x1f9a, 0x1f9b, 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x1fa8, 0x1fa9, 0x1faa, 0x1fab, 0x1fac, 0x1fad, 0x1fae, 0x1faf, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x1fb8, 0x1fb9, 0x0000, 0x1fbc, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x1fcc, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x1fd8, 0x1fd9, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x1fe8, 0x1fe9, 0x0000, 0x0000, 0x0000, 0x1fec, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x1ffc, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +}; + +static const wchar_t t2_21[256] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2132, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x2160, 0x2161, 0x2162, 0x2163, 0x2164, 0x2165, 0x2166, 0x2167, + 0x2168, 0x2169, 0x216a, 0x216b, 0x216c, 0x216d, 0x216e, 0x216f, + 0x0000, 0x0000, 0x0000, 0x0000, 0x2183, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +}; + +static const wchar_t t2_24[256] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x24b6, 0x24b7, 0x24b8, 0x24b9, 0x24ba, 0x24bb, 0x24bc, 0x24bd, + 0x24be, 0x24bf, 0x24c0, 0x24c1, 0x24c2, 0x24c3, 0x24c4, 0x24c5, + 0x24c6, 0x24c7, 0x24c8, 0x24c9, 0x24ca, 0x24cb, 0x24cc, 0x24cd, + 0x24ce, 0x24cf, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +}; + +static const wchar_t t2_2c[256] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x2c00, 0x2c01, 0x2c02, 0x2c03, 0x2c04, 0x2c05, 0x2c06, 0x2c07, + 0x2c08, 0x2c09, 0x2c0a, 0x2c0b, 0x2c0c, 0x2c0d, 0x2c0e, 0x2c0f, + 0x2c10, 0x2c11, 0x2c12, 0x2c13, 0x2c14, 0x2c15, 0x2c16, 0x2c17, + 0x2c18, 0x2c19, 0x2c1a, 0x2c1b, 0x2c1c, 0x2c1d, 0x2c1e, 0x2c1f, + 0x2c20, 0x2c21, 0x2c22, 0x2c23, 0x2c24, 0x2c25, 0x2c26, 0x2c27, + 0x2c28, 0x2c29, 0x2c2a, 0x2c2b, 0x2c2c, 0x2c2d, 0x2c2e, 0x0000, + 0x0000, 0x2c60, 0x0000, 0x0000, 0x0000, 0x023a, 0x023e, 0x0000, + 0x2c67, 0x0000, 0x2c69, 0x0000, 0x2c6b, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x2c72, 0x0000, 0x0000, 0x2c75, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x2c80, 0x0000, 0x2c82, 0x0000, 0x2c84, 0x0000, 0x2c86, + 0x0000, 0x2c88, 0x0000, 0x2c8a, 0x0000, 0x2c8c, 0x0000, 0x2c8e, + 0x0000, 0x2c90, 0x0000, 0x2c92, 0x0000, 0x2c94, 0x0000, 0x2c96, + 0x0000, 0x2c98, 0x0000, 0x2c9a, 0x0000, 0x2c9c, 0x0000, 0x2c9e, + 0x0000, 0x2ca0, 0x0000, 0x2ca2, 0x0000, 0x2ca4, 0x0000, 0x2ca6, + 0x0000, 0x2ca8, 0x0000, 0x2caa, 0x0000, 0x2cac, 0x0000, 0x2cae, + 0x0000, 0x2cb0, 0x0000, 0x2cb2, 0x0000, 0x2cb4, 0x0000, 0x2cb6, + 0x0000, 0x2cb8, 0x0000, 0x2cba, 0x0000, 0x2cbc, 0x0000, 0x2cbe, + 0x0000, 0x2cc0, 0x0000, 0x2cc2, 0x0000, 0x2cc4, 0x0000, 0x2cc6, + 0x0000, 0x2cc8, 0x0000, 0x2cca, 0x0000, 0x2ccc, 0x0000, 0x2cce, + 0x0000, 0x2cd0, 0x0000, 0x2cd2, 0x0000, 0x2cd4, 0x0000, 0x2cd6, + 0x0000, 0x2cd8, 0x0000, 0x2cda, 0x0000, 0x2cdc, 0x0000, 0x2cde, + 0x0000, 0x2ce0, 0x0000, 0x2ce2, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +}; + +static const wchar_t t2_2d[256] = { + 0x10a0, 0x10a1, 0x10a2, 0x10a3, 0x10a4, 0x10a5, 0x10a6, 0x10a7, + 0x10a8, 0x10a9, 0x10aa, 0x10ab, 0x10ac, 0x10ad, 0x10ae, 0x10af, + 0x10b0, 0x10b1, 0x10b2, 0x10b3, 0x10b4, 0x10b5, 0x10b6, 0x10b7, + 0x10b8, 0x10b9, 0x10ba, 0x10bb, 0x10bc, 0x10bd, 0x10be, 0x10bf, + 0x10c0, 0x10c1, 0x10c2, 0x10c3, 0x10c4, 0x10c5, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +}; + +static const wchar_t t2_a6[256] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0xa640, 0x0000, 0xa642, 0x0000, 0xa644, 0x0000, 0xa646, + 0x0000, 0xa648, 0x0000, 0xa64a, 0x0000, 0xa64c, 0x0000, 0xa64e, + 0x0000, 0xa650, 0x0000, 0xa652, 0x0000, 0xa654, 0x0000, 0xa656, + 0x0000, 0xa658, 0x0000, 0xa65a, 0x0000, 0xa65c, 0x0000, 0xa65e, + 0x0000, 0x0000, 0x0000, 0xa662, 0x0000, 0xa664, 0x0000, 0xa666, + 0x0000, 0xa668, 0x0000, 0xa66a, 0x0000, 0xa66c, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0xa680, 0x0000, 0xa682, 0x0000, 0xa684, 0x0000, 0xa686, + 0x0000, 0xa688, 0x0000, 0xa68a, 0x0000, 0xa68c, 0x0000, 0xa68e, + 0x0000, 0xa690, 0x0000, 0xa692, 0x0000, 0xa694, 0x0000, 0xa696, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +}; + +static const wchar_t t2_a7[256] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0xa722, 0x0000, 0xa724, 0x0000, 0xa726, + 0x0000, 0xa728, 0x0000, 0xa72a, 0x0000, 0xa72c, 0x0000, 0xa72e, + 0x0000, 0x0000, 0x0000, 0xa732, 0x0000, 0xa734, 0x0000, 0xa736, + 0x0000, 0xa738, 0x0000, 0xa73a, 0x0000, 0xa73c, 0x0000, 0xa73e, + 0x0000, 0xa740, 0x0000, 0xa742, 0x0000, 0xa744, 0x0000, 0xa746, + 0x0000, 0xa748, 0x0000, 0xa74a, 0x0000, 0xa74c, 0x0000, 0xa74e, + 0x0000, 0xa750, 0x0000, 0xa752, 0x0000, 0xa754, 0x0000, 0xa756, + 0x0000, 0xa758, 0x0000, 0xa75a, 0x0000, 0xa75c, 0x0000, 0xa75e, + 0x0000, 0xa760, 0x0000, 0xa762, 0x0000, 0xa764, 0x0000, 0xa766, + 0x0000, 0xa768, 0x0000, 0xa76a, 0x0000, 0xa76c, 0x0000, 0xa76e, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0xa779, 0x0000, 0xa77b, 0x0000, 0x0000, 0xa77e, + 0x0000, 0xa780, 0x0000, 0xa782, 0x0000, 0xa784, 0x0000, 0xa786, + 0x0000, 0x0000, 0x0000, 0x0000, 0xa78b, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +}; + +static const wchar_t t2_ff[256] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0xff21, 0xff22, 0xff23, 0xff24, 0xff25, 0xff26, 0xff27, + 0xff28, 0xff29, 0xff2a, 0xff2b, 0xff2c, 0xff2d, 0xff2e, 0xff2f, + 0xff30, 0xff31, 0xff32, 0xff33, 0xff34, 0xff35, 0xff36, 0xff37, + 0xff38, 0xff39, 0xff3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +}; + +static const wchar_t *const toplevel[256] = { + t2_00, t2_01, t2_02, t2_03, t2_04, t2_05, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, t2_1d, t2_1e, t2_1f, + NULL, t2_21, NULL, NULL, t2_24, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, t2_2c, t2_2d, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, t2_a6, t2_a7, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, t2_ff, +}; + +/** + * cifs_toupper - convert a wchar_t from lower to uppercase + * @in: character to convert from lower to uppercase + * + * This function consults the static tables above to convert a wchar_t from + * lower to uppercase. In the event that there is no mapping, the original + * "in" character is returned. + */ +wchar_t +cifs_toupper(wchar_t in) +{ + unsigned char idx; + const wchar_t *tbl; + wchar_t out; + + /* grab upper byte */ + idx = (in & 0xff00) >> 8; + + /* find pointer to 2nd layer table */ + tbl = toplevel[idx]; + if (!tbl) + return in; + + /* grab lower byte */ + idx = in & 0xff; + + /* look up character in table */ + out = tbl[idx]; + if (out) + return out; + + return in; +} diff --git a/fs/dcache.c b/fs/dcache.c index 4d9df3c940e6..41000305d716 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -37,6 +37,7 @@ #include <linux/rculist_bl.h> #include <linux/prefetch.h> #include <linux/ratelimit.h> +#include <linux/list_lru.h> #include "internal.h" #include "mount.h" @@ -48,7 +49,7 @@ * - the dcache hash table * s_anon bl list spinlock protects: * - the s_anon list (see __d_drop) - * dcache_lru_lock protects: + * dentry->d_sb->s_dentry_lru_lock protects: * - the dcache lru lists and counters * d_lock protects: * - d_flags @@ -63,7 +64,7 @@ * Ordering: * dentry->d_inode->i_lock * dentry->d_lock - * dcache_lru_lock + * dentry->d_sb->s_dentry_lru_lock * dcache_hash_bucket lock * s_anon lock * @@ -81,7 +82,6 @@ int sysctl_vfs_cache_pressure __read_mostly = 100; EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); -static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock); __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); EXPORT_SYMBOL(rename_lock); @@ -90,8 +90,8 @@ static struct kmem_cache *dentry_cache __read_mostly; /** * read_seqbegin_or_lock - begin a sequence number check or locking block - * lock: sequence lock - * seq : sequence number to be checked + * @lock: sequence lock + * @seq : sequence number to be checked * * First try it once optimistically without taking the lock. If that fails, * take the lock. The sequence number is also used as a marker for deciding @@ -103,7 +103,7 @@ static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) if (!(*seq & 1)) /* Even */ *seq = read_seqbegin(lock); else /* Odd */ - write_seqlock(lock); + read_seqlock_excl(lock); } static inline int need_seqretry(seqlock_t *lock, int seq) @@ -114,7 +114,7 @@ static inline int need_seqretry(seqlock_t *lock, int seq) static inline void done_seqretry(seqlock_t *lock, int seq) { if (seq & 1) - write_sequnlock(lock); + read_sequnlock_excl(lock); } /* @@ -146,23 +146,47 @@ struct dentry_stat_t dentry_stat = { .age_limit = 45, }; -static DEFINE_PER_CPU(unsigned int, nr_dentry); +static DEFINE_PER_CPU(long, nr_dentry); +static DEFINE_PER_CPU(long, nr_dentry_unused); #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) -static int get_nr_dentry(void) + +/* + * Here we resort to our own counters instead of using generic per-cpu counters + * for consistency with what the vfs inode code does. We are expected to harvest + * better code and performance by having our own specialized counters. + * + * Please note that the loop is done over all possible CPUs, not over all online + * CPUs. The reason for this is that we don't want to play games with CPUs going + * on and off. If one of them goes off, we will just keep their counters. + * + * glommer: See cffbc8a for details, and if you ever intend to change this, + * please update all vfs counters to match. + */ +static long get_nr_dentry(void) { int i; - int sum = 0; + long sum = 0; for_each_possible_cpu(i) sum += per_cpu(nr_dentry, i); return sum < 0 ? 0 : sum; } +static long get_nr_dentry_unused(void) +{ + int i; + long sum = 0; + for_each_possible_cpu(i) + sum += per_cpu(nr_dentry_unused, i); + return sum < 0 ? 0 : sum; +} + int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { dentry_stat.nr_dentry = get_nr_dentry(); - return proc_dointvec(table, write, buffer, lenp, ppos); + dentry_stat.nr_unused = get_nr_dentry_unused(); + return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } #endif @@ -333,52 +357,96 @@ static void dentry_unlink_inode(struct dentry * dentry) } /* - * dentry_lru_(add|del|prune|move_tail) must be called with d_lock held. + * The DCACHE_LRU_LIST bit is set whenever the 'd_lru' entry + * is in use - which includes both the "real" per-superblock + * LRU list _and_ the DCACHE_SHRINK_LIST use. + * + * The DCACHE_SHRINK_LIST bit is set whenever the dentry is + * on the shrink list (ie not on the superblock LRU list). + * + * The per-cpu "nr_dentry_unused" counters are updated with + * the DCACHE_LRU_LIST bit. + * + * These helper functions make sure we always follow the + * rules. d_lock must be held by the caller. */ -static void dentry_lru_add(struct dentry *dentry) +#define D_FLAG_VERIFY(dentry,x) WARN_ON_ONCE(((dentry)->d_flags & (DCACHE_LRU_LIST | DCACHE_SHRINK_LIST)) != (x)) +static void d_lru_add(struct dentry *dentry) { - if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) { - spin_lock(&dcache_lru_lock); - dentry->d_flags |= DCACHE_LRU_LIST; - list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); - dentry->d_sb->s_nr_dentry_unused++; - dentry_stat.nr_unused++; - spin_unlock(&dcache_lru_lock); - } + D_FLAG_VERIFY(dentry, 0); + dentry->d_flags |= DCACHE_LRU_LIST; + this_cpu_inc(nr_dentry_unused); + WARN_ON_ONCE(!list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru)); } -static void __dentry_lru_del(struct dentry *dentry) +static void d_lru_del(struct dentry *dentry) { + D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); + dentry->d_flags &= ~DCACHE_LRU_LIST; + this_cpu_dec(nr_dentry_unused); + WARN_ON_ONCE(!list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru)); +} + +static void d_shrink_del(struct dentry *dentry) +{ + D_FLAG_VERIFY(dentry, DCACHE_SHRINK_LIST | DCACHE_LRU_LIST); list_del_init(&dentry->d_lru); dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST); - dentry->d_sb->s_nr_dentry_unused--; - dentry_stat.nr_unused--; + this_cpu_dec(nr_dentry_unused); +} + +static void d_shrink_add(struct dentry *dentry, struct list_head *list) +{ + D_FLAG_VERIFY(dentry, 0); + list_add(&dentry->d_lru, list); + dentry->d_flags |= DCACHE_SHRINK_LIST | DCACHE_LRU_LIST; + this_cpu_inc(nr_dentry_unused); } /* - * Remove a dentry with references from the LRU. + * These can only be called under the global LRU lock, ie during the + * callback for freeing the LRU list. "isolate" removes it from the + * LRU lists entirely, while shrink_move moves it to the indicated + * private list. */ -static void dentry_lru_del(struct dentry *dentry) +static void d_lru_isolate(struct dentry *dentry) { - if (!list_empty(&dentry->d_lru)) { - spin_lock(&dcache_lru_lock); - __dentry_lru_del(dentry); - spin_unlock(&dcache_lru_lock); - } + D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); + dentry->d_flags &= ~DCACHE_LRU_LIST; + this_cpu_dec(nr_dentry_unused); + list_del_init(&dentry->d_lru); } -static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list) +static void d_lru_shrink_move(struct dentry *dentry, struct list_head *list) { - spin_lock(&dcache_lru_lock); - if (list_empty(&dentry->d_lru)) { - dentry->d_flags |= DCACHE_LRU_LIST; - list_add_tail(&dentry->d_lru, list); - dentry->d_sb->s_nr_dentry_unused++; - dentry_stat.nr_unused++; - } else { - list_move_tail(&dentry->d_lru, list); + D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); + dentry->d_flags |= DCACHE_SHRINK_LIST; + list_move_tail(&dentry->d_lru, list); +} + +/* + * dentry_lru_(add|del)_list) must be called with d_lock held. + */ +static void dentry_lru_add(struct dentry *dentry) +{ + if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) + d_lru_add(dentry); +} + +/* + * Remove a dentry with references from the LRU. + * + * If we are on the shrink list, then we can get to try_prune_one_dentry() and + * lose our last reference through the parent walk. In this case, we need to + * remove ourselves from the shrink list, not the LRU. + */ +static void dentry_lru_del(struct dentry *dentry) +{ + if (dentry->d_flags & DCACHE_LRU_LIST) { + if (dentry->d_flags & DCACHE_SHRINK_LIST) + return d_shrink_del(dentry); + d_lru_del(dentry); } - spin_unlock(&dcache_lru_lock); } /** @@ -474,7 +542,8 @@ EXPORT_SYMBOL(d_drop); * If ref is non-zero, then decrement the refcount too. * Returns dentry requiring refcount drop, or NULL if we're done. */ -static inline struct dentry *dentry_kill(struct dentry *dentry) +static inline struct dentry * +dentry_kill(struct dentry *dentry, int unlock_on_failure) __releases(dentry->d_lock) { struct inode *inode; @@ -483,8 +552,10 @@ static inline struct dentry *dentry_kill(struct dentry *dentry) inode = dentry->d_inode; if (inode && !spin_trylock(&inode->i_lock)) { relock: - spin_unlock(&dentry->d_lock); - cpu_relax(); + if (unlock_on_failure) { + spin_unlock(&dentry->d_lock); + cpu_relax(); + } return dentry; /* try again with same dentry */ } if (IS_ROOT(dentry)) @@ -567,7 +638,7 @@ repeat: return; kill_it: - dentry = dentry_kill(dentry); + dentry = dentry_kill(dentry, 1); if (dentry) goto repeat; } @@ -787,12 +858,12 @@ EXPORT_SYMBOL(d_prune_aliases); * * This may fail if locks cannot be acquired no problem, just try again. */ -static void try_prune_one_dentry(struct dentry *dentry) +static struct dentry * try_prune_one_dentry(struct dentry *dentry) __releases(dentry->d_lock) { struct dentry *parent; - parent = dentry_kill(dentry); + parent = dentry_kill(dentry, 0); /* * If dentry_kill returns NULL, we have nothing more to do. * if it returns the same dentry, trylocks failed. In either @@ -804,17 +875,18 @@ static void try_prune_one_dentry(struct dentry *dentry) * fragmentation. */ if (!parent) - return; + return NULL; if (parent == dentry) - return; + return dentry; /* Prune ancestors. */ dentry = parent; while (dentry) { if (lockref_put_or_lock(&dentry->d_lockref)) - return; - dentry = dentry_kill(dentry); + return NULL; + dentry = dentry_kill(dentry, 1); } + return NULL; } static void shrink_dentry_list(struct list_head *list) @@ -826,6 +898,12 @@ static void shrink_dentry_list(struct list_head *list) dentry = list_entry_rcu(list->prev, struct dentry, d_lru); if (&dentry->d_lru == list) break; /* empty */ + + /* + * Get the dentry lock, and re-verify that the dentry is + * this on the shrinking list. If it is, we know that + * DCACHE_SHRINK_LIST and DCACHE_LRU_LIST are set. + */ spin_lock(&dentry->d_lock); if (dentry != list_entry(list->prev, struct dentry, d_lru)) { spin_unlock(&dentry->d_lock); @@ -833,76 +911,146 @@ static void shrink_dentry_list(struct list_head *list) } /* + * The dispose list is isolated and dentries are not accounted + * to the LRU here, so we can simply remove it from the list + * here regardless of whether it is referenced or not. + */ + d_shrink_del(dentry); + + /* * We found an inuse dentry which was not removed from - * the LRU because of laziness during lookup. Do not free - * it - just keep it off the LRU list. + * the LRU because of laziness during lookup. Do not free it. */ if (dentry->d_lockref.count) { - dentry_lru_del(dentry); spin_unlock(&dentry->d_lock); continue; } - rcu_read_unlock(); - try_prune_one_dentry(dentry); + /* + * If 'try_to_prune()' returns a dentry, it will + * be the same one we passed in, and d_lock will + * have been held the whole time, so it will not + * have been added to any other lists. We failed + * to get the inode lock. + * + * We just add it back to the shrink list. + */ + dentry = try_prune_one_dentry(dentry); rcu_read_lock(); + if (dentry) { + d_shrink_add(dentry, list); + spin_unlock(&dentry->d_lock); + } } rcu_read_unlock(); } +static enum lru_status +dentry_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg) +{ + struct list_head *freeable = arg; + struct dentry *dentry = container_of(item, struct dentry, d_lru); + + + /* + * we are inverting the lru lock/dentry->d_lock here, + * so use a trylock. If we fail to get the lock, just skip + * it + */ + if (!spin_trylock(&dentry->d_lock)) + return LRU_SKIP; + + /* + * Referenced dentries are still in use. If they have active + * counts, just remove them from the LRU. Otherwise give them + * another pass through the LRU. + */ + if (dentry->d_lockref.count) { + d_lru_isolate(dentry); + spin_unlock(&dentry->d_lock); + return LRU_REMOVED; + } + + if (dentry->d_flags & DCACHE_REFERENCED) { + dentry->d_flags &= ~DCACHE_REFERENCED; + spin_unlock(&dentry->d_lock); + + /* + * The list move itself will be made by the common LRU code. At + * this point, we've dropped the dentry->d_lock but keep the + * lru lock. This is safe to do, since every list movement is + * protected by the lru lock even if both locks are held. + * + * This is guaranteed by the fact that all LRU management + * functions are intermediated by the LRU API calls like + * list_lru_add and list_lru_del. List movement in this file + * only ever occur through this functions or through callbacks + * like this one, that are called from the LRU API. + * + * The only exceptions to this are functions like + * shrink_dentry_list, and code that first checks for the + * DCACHE_SHRINK_LIST flag. Those are guaranteed to be + * operating only with stack provided lists after they are + * properly isolated from the main list. It is thus, always a + * local access. + */ + return LRU_ROTATE; + } + + d_lru_shrink_move(dentry, freeable); + spin_unlock(&dentry->d_lock); + + return LRU_REMOVED; +} + /** * prune_dcache_sb - shrink the dcache * @sb: superblock - * @count: number of entries to try to free + * @nr_to_scan : number of entries to try to free + * @nid: which node to scan for freeable entities * - * Attempt to shrink the superblock dcache LRU by @count entries. This is + * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is * done when we need more memory an called from the superblock shrinker * function. * * This function may fail to free any resources if all the dentries are in * use. */ -void prune_dcache_sb(struct super_block *sb, int count) +long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan, + int nid) { - struct dentry *dentry; - LIST_HEAD(referenced); - LIST_HEAD(tmp); + LIST_HEAD(dispose); + long freed; -relock: - spin_lock(&dcache_lru_lock); - while (!list_empty(&sb->s_dentry_lru)) { - dentry = list_entry(sb->s_dentry_lru.prev, - struct dentry, d_lru); - BUG_ON(dentry->d_sb != sb); - - if (!spin_trylock(&dentry->d_lock)) { - spin_unlock(&dcache_lru_lock); - cpu_relax(); - goto relock; - } + freed = list_lru_walk_node(&sb->s_dentry_lru, nid, dentry_lru_isolate, + &dispose, &nr_to_scan); + shrink_dentry_list(&dispose); + return freed; +} - if (dentry->d_flags & DCACHE_REFERENCED) { - dentry->d_flags &= ~DCACHE_REFERENCED; - list_move(&dentry->d_lru, &referenced); - spin_unlock(&dentry->d_lock); - } else { - list_move_tail(&dentry->d_lru, &tmp); - dentry->d_flags |= DCACHE_SHRINK_LIST; - spin_unlock(&dentry->d_lock); - if (!--count) - break; - } - cond_resched_lock(&dcache_lru_lock); - } - if (!list_empty(&referenced)) - list_splice(&referenced, &sb->s_dentry_lru); - spin_unlock(&dcache_lru_lock); +static enum lru_status dentry_lru_isolate_shrink(struct list_head *item, + spinlock_t *lru_lock, void *arg) +{ + struct list_head *freeable = arg; + struct dentry *dentry = container_of(item, struct dentry, d_lru); + + /* + * we are inverting the lru lock/dentry->d_lock here, + * so use a trylock. If we fail to get the lock, just skip + * it + */ + if (!spin_trylock(&dentry->d_lock)) + return LRU_SKIP; + + d_lru_shrink_move(dentry, freeable); + spin_unlock(&dentry->d_lock); - shrink_dentry_list(&tmp); + return LRU_REMOVED; } + /** * shrink_dcache_sb - shrink dcache for a superblock * @sb: superblock @@ -912,16 +1060,17 @@ relock: */ void shrink_dcache_sb(struct super_block *sb) { - LIST_HEAD(tmp); + long freed; - spin_lock(&dcache_lru_lock); - while (!list_empty(&sb->s_dentry_lru)) { - list_splice_init(&sb->s_dentry_lru, &tmp); - spin_unlock(&dcache_lru_lock); - shrink_dentry_list(&tmp); - spin_lock(&dcache_lru_lock); - } - spin_unlock(&dcache_lru_lock); + do { + LIST_HEAD(dispose); + + freed = list_lru_walk(&sb->s_dentry_lru, + dentry_lru_isolate_shrink, &dispose, UINT_MAX); + + this_cpu_sub(nr_dentry_unused, freed); + shrink_dentry_list(&dispose); + } while (freed > 0); } EXPORT_SYMBOL(shrink_dcache_sb); @@ -1283,8 +1432,13 @@ static enum d_walk_ret select_collect(void *_data, struct dentry *dentry) if (dentry->d_lockref.count) { dentry_lru_del(dentry); } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { - dentry_lru_move_list(dentry, &data->dispose); - dentry->d_flags |= DCACHE_SHRINK_LIST; + /* + * We can't use d_lru_shrink_move() because we + * need to get the global LRU lock and do the + * LRU accounting. + */ + d_lru_del(dentry); + d_shrink_add(dentry, &data->dispose); data->found++; ret = D_WALK_NORETRY; } @@ -2673,9 +2827,9 @@ static int prepend(char **buffer, int *buflen, const char *str, int namelen) /** * prepend_name - prepend a pathname in front of current buffer pointer - * buffer: buffer pointer - * buflen: allocated length of the buffer - * name: name string and length qstr structure + * @buffer: buffer pointer + * @buflen: allocated length of the buffer + * @name: name string and length qstr structure * * With RCU path tracing, it may race with d_move(). Use ACCESS_ONCE() to * make sure that either the old or the new name pointer and length are @@ -2713,14 +2867,15 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name) * @buffer: pointer to the end of the buffer * @buflen: pointer to buffer length * - * The function tries to write out the pathname without taking any lock other - * than the RCU read lock to make sure that dentries won't go away. It only - * checks the sequence number of the global rename_lock as any change in the - * dentry's d_seq will be preceded by changes in the rename_lock sequence - * number. If the sequence number had been change, it will restart the whole - * pathname back-tracing sequence again. It performs a total of 3 trials of - * lockless back-tracing sequences before falling back to take the - * rename_lock. + * The function will first try to write out the pathname without taking any + * lock other than the RCU read lock to make sure that dentries won't go away. + * It only checks the sequence number of the global rename_lock as any change + * in the dentry's d_seq will be preceded by changes in the rename_lock + * sequence number. If the sequence number had been changed, it will restart + * the whole pathname back-tracing sequence again by taking the rename_lock. + * In this case, there is no need to take the RCU read lock as the recursive + * parent pointer references will keep the dentry chain alive as long as no + * rename operation is performed. */ static int prepend_path(const struct path *path, const struct path *root, @@ -2868,6 +3023,16 @@ static int prepend_unreachable(char **buffer, int *buflen) return prepend(buffer, buflen, "(unreachable)", 13); } +static void get_fs_root_rcu(struct fs_struct *fs, struct path *root) +{ + unsigned seq; + + do { + seq = read_seqcount_begin(&fs->seq); + *root = fs->root; + } while (read_seqcount_retry(&fs->seq, seq)); +} + /** * d_path - return the path of a dentry * @path: path to report @@ -2900,13 +3065,15 @@ char *d_path(const struct path *path, char *buf, int buflen) if (path->dentry->d_op && path->dentry->d_op->d_dname) return path->dentry->d_op->d_dname(path->dentry, buf, buflen); - get_fs_root(current->fs, &root); + rcu_read_lock(); + get_fs_root_rcu(current->fs, &root); br_read_lock(&vfsmount_lock); error = path_with_deleted(path, &root, &res, &buflen); br_read_unlock(&vfsmount_lock); + rcu_read_unlock(); + if (error < 0) res = ERR_PTR(error); - path_put(&root); return res; } EXPORT_SYMBOL(d_path); @@ -3014,6 +3181,18 @@ Elong: return ERR_PTR(-ENAMETOOLONG); } +static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path *root, + struct path *pwd) +{ + unsigned seq; + + do { + seq = read_seqcount_begin(&fs->seq); + *root = fs->root; + *pwd = fs->pwd; + } while (read_seqcount_retry(&fs->seq, seq)); +} + /* * NOTE! The user-level library version returns a * character pointer. The kernel system call just @@ -3036,23 +3215,25 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) { int error; struct path pwd, root; - char *page = (char *) __get_free_page(GFP_USER); + char *page = __getname(); if (!page) return -ENOMEM; - get_fs_root_and_pwd(current->fs, &root, &pwd); + rcu_read_lock(); + get_fs_root_and_pwd_rcu(current->fs, &root, &pwd); error = -ENOENT; br_read_lock(&vfsmount_lock); if (!d_unlinked(pwd.dentry)) { unsigned long len; - char *cwd = page + PAGE_SIZE; - int buflen = PAGE_SIZE; + char *cwd = page + PATH_MAX; + int buflen = PATH_MAX; prepend(&cwd, &buflen, "\0", 1); error = prepend_path(&pwd, &root, &cwd, &buflen); br_read_unlock(&vfsmount_lock); + rcu_read_unlock(); if (error < 0) goto out; @@ -3065,7 +3246,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) } error = -ERANGE; - len = PAGE_SIZE + page - cwd; + len = PATH_MAX + page - cwd; if (len <= size) { error = len; if (copy_to_user(buf, cwd, len)) @@ -3073,12 +3254,11 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) } } else { br_read_unlock(&vfsmount_lock); + rcu_read_unlock(); } out: - path_put(&pwd); - path_put(&root); - free_page((unsigned long) page); + __putname(page); return error; } diff --git a/fs/drop_caches.c b/fs/drop_caches.c index c00e055b6282..9fd702f5bfb2 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -44,6 +44,7 @@ static void drop_slab(void) .gfp_mask = GFP_KERNEL, }; + nodes_setall(shrink.nodes_to_scan); do { nr_objects = shrink_slab(&shrink, 1000, 1000); } while (nr_objects > 10); diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 2ec8eb1ab269..a52a5d23c30b 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -861,7 +861,7 @@ static int exofs_writepage(struct page *page, struct writeback_control *wbc) static void _write_failed(struct inode *inode, loff_t to) { if (to > inode->i_size) - truncate_pagecache(inode, to, inode->i_size); + truncate_pagecache(inode, inode->i_size); } int exofs_write_begin(struct file *file, struct address_space *mapping, diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 0a87bb10998d..c260de6d7b6d 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -58,7 +58,7 @@ static void ext2_write_failed(struct address_space *mapping, loff_t to) struct inode *inode = mapping->host; if (to > inode->i_size) { - truncate_pagecache(inode, to, inode->i_size); + truncate_pagecache(inode, inode->i_size); ext2_truncate_blocks(inode, inode->i_size); } } diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 2d1bdbe78c04..3981ff783950 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -931,13 +931,15 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, struct ext4_inode_info *ei; struct list_head *cur, *tmp; LIST_HEAD(skipped); - int ret, nr_shrunk = 0; + int nr_shrunk = 0; int retried = 0, skip_precached = 1, nr_skipped = 0; spin_lock(&sbi->s_es_lru_lock); retry: list_for_each_safe(cur, tmp, &sbi->s_es_lru) { + int shrunk; + /* * If we have already reclaimed all extents from extent * status tree, just stop the loop immediately. @@ -964,13 +966,13 @@ retry: continue; write_lock(&ei->i_es_lock); - ret = __es_try_to_reclaim_extents(ei, nr_to_scan); + shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); if (ei->i_es_lru_nr == 0) list_del_init(&ei->i_es_lru); write_unlock(&ei->i_es_lock); - nr_shrunk += ret; - nr_to_scan -= ret; + nr_shrunk += shrunk; + nr_to_scan -= shrunk; if (nr_to_scan == 0) break; } @@ -1007,7 +1009,20 @@ retry: return nr_shrunk; } -static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) +static unsigned long ext4_es_count(struct shrinker *shrink, + struct shrink_control *sc) +{ + unsigned long nr; + struct ext4_sb_info *sbi; + + sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); + nr = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); + trace_ext4_es_shrink_enter(sbi->s_sb, sc->nr_to_scan, nr); + return nr; +} + +static unsigned long ext4_es_scan(struct shrinker *shrink, + struct shrink_control *sc) { struct ext4_sb_info *sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); @@ -1022,9 +1037,8 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); - ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); - return ret; + return nr_shrunk; } void ext4_es_register_shrinker(struct ext4_sb_info *sbi) @@ -1032,7 +1046,8 @@ void ext4_es_register_shrinker(struct ext4_sb_info *sbi) INIT_LIST_HEAD(&sbi->s_es_lru); spin_lock_init(&sbi->s_es_lru_lock); sbi->s_es_last_sorted = 0; - sbi->s_es_shrinker.shrink = ext4_es_shrink; + sbi->s_es_shrinker.scan_objects = ext4_es_scan; + sbi->s_es_shrinker.count_objects = ext4_es_count; sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; register_shrinker(&sbi->s_es_shrinker); } @@ -1076,7 +1091,7 @@ static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, struct ext4_es_tree *tree = &ei->i_es_tree; struct rb_node *node; struct extent_status *es; - int nr_shrunk = 0; + unsigned long nr_shrunk = 0; static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c79fd7dabe79..0d424d7ac02b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4587,7 +4587,6 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) { handle_t *handle; - loff_t oldsize = inode->i_size; if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); @@ -4650,7 +4649,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) * Truncate pagecache after we've waited for commit * in data=journal mode to make pages freeable. */ - truncate_pagecache(inode, oldsize, inode->i_size); + truncate_pagecache(inode, inode->i_size); } /* * We want to call ext4_truncate() even if attr->ia_size == diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 11b51bb55b42..0062da21dd8b 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -147,7 +147,7 @@ static void fat_write_failed(struct address_space *mapping, loff_t to) struct inode *inode = mapping->host; if (to > inode->i_size) { - truncate_pagecache(inode, to, inode->i_size); + truncate_pagecache(inode, inode->i_size); fat_truncate_blocks(inode, inode->i_size); } } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 30f6f27d5a59..9f4935b8f208 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -69,7 +69,7 @@ static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) { struct super_block *sb = inode->i_sb; - if (strcmp(sb->s_type->name, "bdev") == 0) + if (sb_is_blkdev_sb(sb)) return inode->i_mapping->backing_dev_info; return sb->s_bdi; @@ -251,11 +251,13 @@ static int move_expired_inodes(struct list_head *delaying_queue, if (work->older_than_this && inode_dirtied_after(inode, *work->older_than_this)) break; + list_move(&inode->i_wb_list, &tmp); + moved++; + if (sb_is_blkdev_sb(inode->i_sb)) + continue; if (sb && sb != inode->i_sb) do_sb_sort = 1; sb = inode->i_sb; - list_move(&inode->i_wb_list, &tmp); - moved++; } /* just one sb in list, splice to dispatch_queue and we're done */ diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 318e8433527c..b2a86e324aac 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -586,7 +586,8 @@ int __fscache_check_consistency(struct fscache_cookie *cookie) fscache_operation_init(op, NULL, NULL); op->flags = FSCACHE_OP_MYTHREAD | - (1 << FSCACHE_OP_WAITING); + (1 << FSCACHE_OP_WAITING) | + (1 << FSCACHE_OP_UNUSE_COOKIE); spin_lock(&cookie->lock); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 3ac91086f41f..62b43b577bfc 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1678,7 +1678,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock. */ if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { - truncate_pagecache(inode, oldsize, outarg.attr.size); + truncate_pagecache(inode, outarg.attr.size); invalidate_inode_pages2(inode->i_mapping); } diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 84434594e80e..a8ce6dab60a0 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -218,7 +218,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, bool inval = false; if (oldsize != attr->size) { - truncate_pagecache(inode, oldsize, attr->size); + truncate_pagecache(inode, attr->size); inval = true; } else if (fc->auto_inval_data) { struct timespec new_mtime = { diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 5e2f56fccf6b..62a65fc448dc 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1016,7 +1016,7 @@ static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize chunk = oldsize - newsize; if (chunk > max_chunk) chunk = max_chunk; - truncate_pagecache(inode, oldsize, oldsize - chunk); + truncate_pagecache(inode, oldsize - chunk); oldsize -= chunk; gfs2_trans_end(sdp); error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES); @@ -1067,7 +1067,7 @@ static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize) if (journaled) error = gfs2_journaled_truncate(inode, oldsize, newsize); else - truncate_pagecache(inode, oldsize, newsize); + truncate_pagecache(inode, newsize); if (error) { brelse(dibh); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 722329cac98f..c2f41b4d00b9 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1427,21 +1427,22 @@ __acquires(&lru_lock) * gfs2_dispose_glock_lru() above. */ -static void gfs2_scan_glock_lru(int nr) +static long gfs2_scan_glock_lru(int nr) { struct gfs2_glock *gl; LIST_HEAD(skipped); LIST_HEAD(dispose); + long freed = 0; spin_lock(&lru_lock); - while(nr && !list_empty(&lru_list)) { + while ((nr-- >= 0) && !list_empty(&lru_list)) { gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru); /* Test for being demotable */ if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { list_move(&gl->gl_lru, &dispose); atomic_dec(&lru_count); - nr--; + freed++; continue; } @@ -1451,23 +1452,28 @@ static void gfs2_scan_glock_lru(int nr) if (!list_empty(&dispose)) gfs2_dispose_glock_lru(&dispose); spin_unlock(&lru_lock); + + return freed; } -static int gfs2_shrink_glock_memory(struct shrinker *shrink, - struct shrink_control *sc) +static unsigned long gfs2_glock_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc) { - if (sc->nr_to_scan) { - if (!(sc->gfp_mask & __GFP_FS)) - return -1; - gfs2_scan_glock_lru(sc->nr_to_scan); - } + if (!(sc->gfp_mask & __GFP_FS)) + return SHRINK_STOP; + return gfs2_scan_glock_lru(sc->nr_to_scan); +} - return (atomic_read(&lru_count) / 100) * sysctl_vfs_cache_pressure; +static unsigned long gfs2_glock_shrink_count(struct shrinker *shrink, + struct shrink_control *sc) +{ + return vfs_pressure_ratio(atomic_read(&lru_count)); } static struct shrinker glock_shrinker = { - .shrink = gfs2_shrink_glock_memory, .seeks = DEFAULT_SEEKS, + .count_objects = gfs2_glock_shrink_count, + .scan_objects = gfs2_glock_shrink_scan, }; /** diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 64915eeae5a7..ced3257f06e8 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -694,8 +694,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, mark_inode_dirty(inode); d_instantiate(dentry, inode); - if (file) + if (file) { + *opened |= FILE_CREATED; error = finish_open(file, dentry, gfs2_open_common, opened); + } gfs2_glock_dq_uninit(ghs); gfs2_glock_dq_uninit(ghs + 1); return error; diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 7b0f5043cf24..351586e24e30 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -32,7 +32,8 @@ struct workqueue_struct *gfs2_control_wq; static struct shrinker qd_shrinker = { - .shrink = gfs2_shrink_qd_memory, + .count_objects = gfs2_qd_shrink_count, + .scan_objects = gfs2_qd_shrink_scan, .seeks = DEFAULT_SEEKS, }; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 3768c2f40e43..db441359ee8c 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -75,17 +75,16 @@ static LIST_HEAD(qd_lru_list); static atomic_t qd_lru_count = ATOMIC_INIT(0); static DEFINE_SPINLOCK(qd_lru_lock); -int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc) +unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc) { struct gfs2_quota_data *qd; struct gfs2_sbd *sdp; int nr_to_scan = sc->nr_to_scan; - - if (nr_to_scan == 0) - goto out; + long freed = 0; if (!(sc->gfp_mask & __GFP_FS)) - return -1; + return SHRINK_STOP; spin_lock(&qd_lru_lock); while (nr_to_scan && !list_empty(&qd_lru_list)) { @@ -110,11 +109,16 @@ int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc) kmem_cache_free(gfs2_quotad_cachep, qd); spin_lock(&qd_lru_lock); nr_to_scan--; + freed++; } spin_unlock(&qd_lru_lock); + return freed; +} -out: - return (atomic_read(&qd_lru_count) * sysctl_vfs_cache_pressure) / 100; +unsigned long gfs2_qd_shrink_count(struct shrinker *shrink, + struct shrink_control *sc) +{ + return vfs_pressure_ratio(atomic_read(&qd_lru_count)); } static u64 qd2index(struct gfs2_quota_data *qd) diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 4f5e6e44ed83..0f64d9deb1b0 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -53,8 +53,10 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) return ret; } -extern int gfs2_shrink_qd_memory(struct shrinker *shrink, - struct shrink_control *sc); +extern unsigned long gfs2_qd_shrink_count(struct shrinker *shrink, + struct shrink_control *sc); +extern unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc); extern const struct quotactl_ops gfs2_quotactl_ops; #endif /* __QUOTA_DOT_H__ */ diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index f9299d8a64e3..380ab31b5e0f 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -41,7 +41,7 @@ static void hfs_write_failed(struct address_space *mapping, loff_t to) struct inode *inode = mapping->host; if (to > inode->i_size) { - truncate_pagecache(inode, to, inode->i_size); + truncate_pagecache(inode, inode->i_size); hfs_file_truncate(inode); } } diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 4d2edaea891c..37213d075f3c 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -36,7 +36,7 @@ static void hfsplus_write_failed(struct address_space *mapping, loff_t to) struct inode *inode = mapping->host; if (to > inode->i_size) { - truncate_pagecache(inode, to, inode->i_size); + truncate_pagecache(inode, inode->i_size); hfsplus_file_truncate(inode); } } diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index 4e9dabcf1f4c..67c1a61e0955 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -138,7 +138,7 @@ static void hpfs_write_failed(struct address_space *mapping, loff_t to) hpfs_lock(inode->i_sb); if (to > inode->i_size) { - truncate_pagecache(inode, to, inode->i_size); + truncate_pagecache(inode, inode->i_size); hpfs_truncate(inode); } diff --git a/fs/inode.c b/fs/inode.c index 93a0625b46e4..b33ba8e021cc 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -17,6 +17,7 @@ #include <linux/prefetch.h> #include <linux/buffer_head.h> /* for inode_has_buffers */ #include <linux/ratelimit.h> +#include <linux/list_lru.h> #include "internal.h" /* @@ -24,7 +25,7 @@ * * inode->i_lock protects: * inode->i_state, inode->i_hash, __iget() - * inode->i_sb->s_inode_lru_lock protects: + * Inode LRU list locks protect: * inode->i_sb->s_inode_lru, inode->i_lru * inode_sb_list_lock protects: * sb->s_inodes, inode->i_sb_list @@ -37,7 +38,7 @@ * * inode_sb_list_lock * inode->i_lock - * inode->i_sb->s_inode_lru_lock + * Inode LRU list locks * * bdi->wb.list_lock * inode->i_lock @@ -70,33 +71,33 @@ EXPORT_SYMBOL(empty_aops); */ struct inodes_stat_t inodes_stat; -static DEFINE_PER_CPU(unsigned int, nr_inodes); -static DEFINE_PER_CPU(unsigned int, nr_unused); +static DEFINE_PER_CPU(unsigned long, nr_inodes); +static DEFINE_PER_CPU(unsigned long, nr_unused); static struct kmem_cache *inode_cachep __read_mostly; -static int get_nr_inodes(void) +static long get_nr_inodes(void) { int i; - int sum = 0; + long sum = 0; for_each_possible_cpu(i) sum += per_cpu(nr_inodes, i); return sum < 0 ? 0 : sum; } -static inline int get_nr_inodes_unused(void) +static inline long get_nr_inodes_unused(void) { int i; - int sum = 0; + long sum = 0; for_each_possible_cpu(i) sum += per_cpu(nr_unused, i); return sum < 0 ? 0 : sum; } -int get_nr_dirty_inodes(void) +long get_nr_dirty_inodes(void) { /* not actually dirty inodes, but a wild approximation */ - int nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); + long nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); return nr_dirty > 0 ? nr_dirty : 0; } @@ -109,7 +110,7 @@ int proc_nr_inodes(ctl_table *table, int write, { inodes_stat.nr_inodes = get_nr_inodes(); inodes_stat.nr_unused = get_nr_inodes_unused(); - return proc_dointvec(table, write, buffer, lenp, ppos); + return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } #endif @@ -401,13 +402,8 @@ EXPORT_SYMBOL(ihold); static void inode_lru_list_add(struct inode *inode) { - spin_lock(&inode->i_sb->s_inode_lru_lock); - if (list_empty(&inode->i_lru)) { - list_add(&inode->i_lru, &inode->i_sb->s_inode_lru); - inode->i_sb->s_nr_inodes_unused++; + if (list_lru_add(&inode->i_sb->s_inode_lru, &inode->i_lru)) this_cpu_inc(nr_unused); - } - spin_unlock(&inode->i_sb->s_inode_lru_lock); } /* @@ -425,13 +421,9 @@ void inode_add_lru(struct inode *inode) static void inode_lru_list_del(struct inode *inode) { - spin_lock(&inode->i_sb->s_inode_lru_lock); - if (!list_empty(&inode->i_lru)) { - list_del_init(&inode->i_lru); - inode->i_sb->s_nr_inodes_unused--; + + if (list_lru_del(&inode->i_sb->s_inode_lru, &inode->i_lru)) this_cpu_dec(nr_unused); - } - spin_unlock(&inode->i_sb->s_inode_lru_lock); } /** @@ -675,24 +667,8 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) return busy; } -static int can_unuse(struct inode *inode) -{ - if (inode->i_state & ~I_REFERENCED) - return 0; - if (inode_has_buffers(inode)) - return 0; - if (atomic_read(&inode->i_count)) - return 0; - if (inode->i_data.nrpages) - return 0; - return 1; -} - /* - * Walk the superblock inode LRU for freeable inodes and attempt to free them. - * This is called from the superblock shrinker function with a number of inodes - * to trim from the LRU. Inodes to be freed are moved to a temporary list and - * then are freed outside inode_lock by dispose_list(). + * Isolate the inode from the LRU in preparation for freeing it. * * Any inodes which are pinned purely because of attached pagecache have their * pagecache removed. If the inode has metadata buffers attached to @@ -706,89 +682,82 @@ static int can_unuse(struct inode *inode) * LRU does not have strict ordering. Hence we don't want to reclaim inodes * with this flag set because they are the inodes that are out of order. */ -void prune_icache_sb(struct super_block *sb, int nr_to_scan) +static enum lru_status +inode_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg) { - LIST_HEAD(freeable); - int nr_scanned; - unsigned long reap = 0; + struct list_head *freeable = arg; + struct inode *inode = container_of(item, struct inode, i_lru); - spin_lock(&sb->s_inode_lru_lock); - for (nr_scanned = nr_to_scan; nr_scanned >= 0; nr_scanned--) { - struct inode *inode; + /* + * we are inverting the lru lock/inode->i_lock here, so use a trylock. + * If we fail to get the lock, just skip it. + */ + if (!spin_trylock(&inode->i_lock)) + return LRU_SKIP; - if (list_empty(&sb->s_inode_lru)) - break; + /* + * Referenced or dirty inodes are still in use. Give them another pass + * through the LRU as we canot reclaim them now. + */ + if (atomic_read(&inode->i_count) || + (inode->i_state & ~I_REFERENCED)) { + list_del_init(&inode->i_lru); + spin_unlock(&inode->i_lock); + this_cpu_dec(nr_unused); + return LRU_REMOVED; + } - inode = list_entry(sb->s_inode_lru.prev, struct inode, i_lru); + /* recently referenced inodes get one more pass */ + if (inode->i_state & I_REFERENCED) { + inode->i_state &= ~I_REFERENCED; + spin_unlock(&inode->i_lock); + return LRU_ROTATE; + } - /* - * we are inverting the sb->s_inode_lru_lock/inode->i_lock here, - * so use a trylock. If we fail to get the lock, just move the - * inode to the back of the list so we don't spin on it. - */ - if (!spin_trylock(&inode->i_lock)) { - list_move(&inode->i_lru, &sb->s_inode_lru); - continue; + if (inode_has_buffers(inode) || inode->i_data.nrpages) { + __iget(inode); + spin_unlock(&inode->i_lock); + spin_unlock(lru_lock); + if (remove_inode_buffers(inode)) { + unsigned long reap; + reap = invalidate_mapping_pages(&inode->i_data, 0, -1); + if (current_is_kswapd()) + __count_vm_events(KSWAPD_INODESTEAL, reap); + else + __count_vm_events(PGINODESTEAL, reap); + if (current->reclaim_state) + current->reclaim_state->reclaimed_slab += reap; } + iput(inode); + spin_lock(lru_lock); + return LRU_RETRY; + } - /* - * Referenced or dirty inodes are still in use. Give them - * another pass through the LRU as we canot reclaim them now. - */ - if (atomic_read(&inode->i_count) || - (inode->i_state & ~I_REFERENCED)) { - list_del_init(&inode->i_lru); - spin_unlock(&inode->i_lock); - sb->s_nr_inodes_unused--; - this_cpu_dec(nr_unused); - continue; - } + WARN_ON(inode->i_state & I_NEW); + inode->i_state |= I_FREEING; + list_move(&inode->i_lru, freeable); + spin_unlock(&inode->i_lock); - /* recently referenced inodes get one more pass */ - if (inode->i_state & I_REFERENCED) { - inode->i_state &= ~I_REFERENCED; - list_move(&inode->i_lru, &sb->s_inode_lru); - spin_unlock(&inode->i_lock); - continue; - } - if (inode_has_buffers(inode) || inode->i_data.nrpages) { - __iget(inode); - spin_unlock(&inode->i_lock); - spin_unlock(&sb->s_inode_lru_lock); - if (remove_inode_buffers(inode)) - reap += invalidate_mapping_pages(&inode->i_data, - 0, -1); - iput(inode); - spin_lock(&sb->s_inode_lru_lock); - - if (inode != list_entry(sb->s_inode_lru.next, - struct inode, i_lru)) - continue; /* wrong inode or list_empty */ - /* avoid lock inversions with trylock */ - if (!spin_trylock(&inode->i_lock)) - continue; - if (!can_unuse(inode)) { - spin_unlock(&inode->i_lock); - continue; - } - } - WARN_ON(inode->i_state & I_NEW); - inode->i_state |= I_FREEING; - spin_unlock(&inode->i_lock); + this_cpu_dec(nr_unused); + return LRU_REMOVED; +} - list_move(&inode->i_lru, &freeable); - sb->s_nr_inodes_unused--; - this_cpu_dec(nr_unused); - } - if (current_is_kswapd()) - __count_vm_events(KSWAPD_INODESTEAL, reap); - else - __count_vm_events(PGINODESTEAL, reap); - spin_unlock(&sb->s_inode_lru_lock); - if (current->reclaim_state) - current->reclaim_state->reclaimed_slab += reap; +/* + * Walk the superblock inode LRU for freeable inodes and attempt to free them. + * This is called from the superblock shrinker function with a number of inodes + * to trim from the LRU. Inodes to be freed are moved to a temporary list and + * then are freed outside inode_lock by dispose_list(). + */ +long prune_icache_sb(struct super_block *sb, unsigned long nr_to_scan, + int nid) +{ + LIST_HEAD(freeable); + long freed; + freed = list_lru_walk_node(&sb->s_inode_lru, nid, inode_lru_isolate, + &freeable, &nr_to_scan); dispose_list(&freeable); + return freed; } static void __wait_on_freeing_inode(struct inode *inode); diff --git a/fs/internal.h b/fs/internal.h index 2be46ea5dd0b..513e0d859a6c 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -114,6 +114,8 @@ extern int open_check_o_direct(struct file *f); * inode.c */ extern spinlock_t inode_sb_list_lock; +extern long prune_icache_sb(struct super_block *sb, unsigned long nr_to_scan, + int nid); extern void inode_add_lru(struct inode *inode); /* @@ -121,7 +123,7 @@ extern void inode_add_lru(struct inode *inode); */ extern void inode_wb_list_del(struct inode *inode); -extern int get_nr_dirty_inodes(void); +extern long get_nr_dirty_inodes(void); extern void evict_inodes(struct super_block *); extern int invalidate_inodes(struct super_block *, bool); @@ -130,6 +132,8 @@ extern int invalidate_inodes(struct super_block *, bool); */ extern struct dentry *__d_alloc(struct super_block *, const struct qstr *); extern int d_set_mounted(struct dentry *dentry); +extern long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan, + int nid); /* * read_write.c diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 730f24e282a6..f4aab719add5 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -306,7 +306,7 @@ static void jfs_write_failed(struct address_space *mapping, loff_t to) struct inode *inode = mapping->host; if (to > inode->i_size) { - truncate_pagecache(inode, to, inode->i_size); + truncate_pagecache(inode, inode->i_size); jfs_truncate(inode); } } diff --git a/fs/mbcache.c b/fs/mbcache.c index 8c32ef3ba88e..e519e45bf673 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -86,18 +86,6 @@ static LIST_HEAD(mb_cache_list); static LIST_HEAD(mb_cache_lru_list); static DEFINE_SPINLOCK(mb_cache_spinlock); -/* - * What the mbcache registers as to get shrunk dynamically. - */ - -static int mb_cache_shrink_fn(struct shrinker *shrink, - struct shrink_control *sc); - -static struct shrinker mb_cache_shrinker = { - .shrink = mb_cache_shrink_fn, - .seeks = DEFAULT_SEEKS, -}; - static inline int __mb_cache_entry_is_hashed(struct mb_cache_entry *ce) { @@ -151,7 +139,7 @@ forget: /* - * mb_cache_shrink_fn() memory pressure callback + * mb_cache_shrink_scan() memory pressure callback * * This function is called by the kernel memory management when memory * gets low. @@ -159,17 +147,16 @@ forget: * @shrink: (ignored) * @sc: shrink_control passed from reclaim * - * Returns the number of objects which are present in the cache. + * Returns the number of objects freed. */ -static int -mb_cache_shrink_fn(struct shrinker *shrink, struct shrink_control *sc) +static unsigned long +mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { LIST_HEAD(free_list); - struct mb_cache *cache; struct mb_cache_entry *entry, *tmp; - int count = 0; int nr_to_scan = sc->nr_to_scan; gfp_t gfp_mask = sc->gfp_mask; + unsigned long freed = 0; mb_debug("trying to free %d entries", nr_to_scan); spin_lock(&mb_cache_spinlock); @@ -179,19 +166,37 @@ mb_cache_shrink_fn(struct shrinker *shrink, struct shrink_control *sc) struct mb_cache_entry, e_lru_list); list_move_tail(&ce->e_lru_list, &free_list); __mb_cache_entry_unhash(ce); + freed++; + } + spin_unlock(&mb_cache_spinlock); + list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { + __mb_cache_entry_forget(entry, gfp_mask); } + return freed; +} + +static unsigned long +mb_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc) +{ + struct mb_cache *cache; + unsigned long count = 0; + + spin_lock(&mb_cache_spinlock); list_for_each_entry(cache, &mb_cache_list, c_cache_list) { mb_debug("cache %s (%d)", cache->c_name, atomic_read(&cache->c_entry_count)); count += atomic_read(&cache->c_entry_count); } spin_unlock(&mb_cache_spinlock); - list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { - __mb_cache_entry_forget(entry, gfp_mask); - } - return (count / 100) * sysctl_vfs_cache_pressure; + + return vfs_pressure_ratio(count); } +static struct shrinker mb_cache_shrinker = { + .count_objects = mb_cache_shrink_count, + .scan_objects = mb_cache_shrink_scan, + .seeks = DEFAULT_SEEKS, +}; /* * mb_cache_create() create a new cache diff --git a/fs/minix/inode.c b/fs/minix/inode.c index df122496f328..0332109162a5 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -400,7 +400,7 @@ static void minix_write_failed(struct address_space *mapping, loff_t to) struct inode *inode = mapping->host; if (to > inode->i_size) { - truncate_pagecache(inode, to, inode->i_size); + truncate_pagecache(inode, inode->i_size); minix_truncate(inode); } } diff --git a/fs/namei.c b/fs/namei.c index 409a441ba2ae..645268f23eb6 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -660,29 +660,6 @@ static __always_inline void set_root_rcu(struct nameidata *nd) } } -static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) -{ - int ret; - - if (IS_ERR(link)) - goto fail; - - if (*link == '/') { - set_root(nd); - path_put(&nd->path); - nd->path = nd->root; - path_get(&nd->root); - nd->flags |= LOOKUP_JUMPED; - } - nd->inode = nd->path.dentry->d_inode; - - ret = link_path_walk(link, nd); - return ret; -fail: - path_put(&nd->path); - return PTR_ERR(link); -} - static void path_put_conditional(struct path *path, struct nameidata *nd) { dput(path->dentry); @@ -874,7 +851,20 @@ follow_link(struct path *link, struct nameidata *nd, void **p) error = 0; s = nd_get_link(nd); if (s) { - error = __vfs_follow_link(nd, s); + if (unlikely(IS_ERR(s))) { + path_put(&nd->path); + put_link(nd, link, *p); + return PTR_ERR(s); + } + if (*s == '/') { + set_root(nd); + path_put(&nd->path); + nd->path = nd->root; + path_get(&nd->root); + nd->flags |= LOOKUP_JUMPED; + } + nd->inode = nd->path.dentry->d_inode; + error = link_path_walk(s, nd); if (unlikely(error)) put_link(nd, link, *p); } @@ -2271,12 +2261,15 @@ mountpoint_last(struct nameidata *nd, struct path *path) dentry = d_alloc(dir, &nd->last); if (!dentry) { error = -ENOMEM; + mutex_unlock(&dir->d_inode->i_mutex); goto out; } dentry = lookup_real(dir->d_inode, dentry, nd->flags); error = PTR_ERR(dentry); - if (IS_ERR(dentry)) + if (IS_ERR(dentry)) { + mutex_unlock(&dir->d_inode->i_mutex); goto out; + } } mutex_unlock(&dir->d_inode->i_mutex); @@ -2663,6 +2656,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, int acc_mode; int create_error = 0; struct dentry *const DENTRY_NOT_SET = (void *) -1UL; + bool excl; BUG_ON(dentry->d_inode); @@ -2676,10 +2670,9 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, if ((open_flag & O_CREAT) && !IS_POSIXACL(dir)) mode &= ~current_umask(); - if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT)) { + excl = (open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT); + if (excl) open_flag &= ~O_TRUNC; - *opened |= FILE_CREATED; - } /* * Checking write permission is tricky, bacuse we don't know if we are @@ -2732,12 +2725,6 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, goto out; } - acc_mode = op->acc_mode; - if (*opened & FILE_CREATED) { - fsnotify_create(dir, dentry); - acc_mode = MAY_OPEN; - } - if (error) { /* returned 1, that is */ if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) { error = -EIO; @@ -2747,9 +2734,19 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, dput(dentry); dentry = file->f_path.dentry; } - if (create_error && dentry->d_inode == NULL) { - error = create_error; - goto out; + if (*opened & FILE_CREATED) + fsnotify_create(dir, dentry); + if (!dentry->d_inode) { + WARN_ON(*opened & FILE_CREATED); + if (create_error) { + error = create_error; + goto out; + } + } else { + if (excl && !(*opened & FILE_CREATED)) { + error = -EEXIST; + goto out; + } } goto looked_up; } @@ -2758,6 +2755,12 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, * We didn't have the inode before the open, so check open permission * here. */ + acc_mode = op->acc_mode; + if (*opened & FILE_CREATED) { + WARN_ON(!(open_flag & O_CREAT)); + fsnotify_create(dir, dentry); + acc_mode = MAY_OPEN; + } error = may_open(&file->f_path, acc_mode, open_flag); if (error) fput(file); @@ -4236,11 +4239,6 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) return res; } -int vfs_follow_link(struct nameidata *nd, const char *link) -{ - return __vfs_follow_link(nd, link); -} - /* get the link contents into pagecache */ static char *page_getlink(struct dentry * dentry, struct page **ppage) { @@ -4352,7 +4350,6 @@ EXPORT_SYMBOL(vfs_path_lookup); EXPORT_SYMBOL(inode_permission); EXPORT_SYMBOL(unlock_rename); EXPORT_SYMBOL(vfs_create); -EXPORT_SYMBOL(vfs_follow_link); EXPORT_SYMBOL(vfs_link); EXPORT_SYMBOL(vfs_mkdir); EXPORT_SYMBOL(vfs_mknod); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e79bc6ce828e..854a8f05a610 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1392,6 +1392,9 @@ static int nfs_finish_open(struct nfs_open_context *ctx, { int err; + if ((open_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) + *opened |= FILE_CREATED; + err = finish_open(file, dentry, do_open, opened); if (err) goto out; @@ -2006,17 +2009,18 @@ static void nfs_access_free_list(struct list_head *head) } } -int nfs_access_cache_shrinker(struct shrinker *shrink, - struct shrink_control *sc) +unsigned long +nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc) { LIST_HEAD(head); struct nfs_inode *nfsi, *next; struct nfs_access_entry *cache; int nr_to_scan = sc->nr_to_scan; gfp_t gfp_mask = sc->gfp_mask; + long freed = 0; if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) - return (nr_to_scan == 0) ? 0 : -1; + return SHRINK_STOP; spin_lock(&nfs_access_lru_lock); list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) { @@ -2032,6 +2036,7 @@ int nfs_access_cache_shrinker(struct shrinker *shrink, struct nfs_access_entry, lru); list_move(&cache->lru, &head); rb_erase(&cache->rb_node, &nfsi->access_cache); + freed++; if (!list_empty(&nfsi->access_cache_entry_lru)) list_move_tail(&nfsi->access_cache_inode_lru, &nfs_access_lru_list); @@ -2046,7 +2051,13 @@ remove_lru_entry: } spin_unlock(&nfs_access_lru_lock); nfs_access_free_list(&head); - return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure; + return freed; +} + +unsigned long +nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc) +{ + return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries)); } static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 0bd7a55a5f07..91ff089d3412 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -130,7 +130,6 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_ return -EINVAL; #else - VM_BUG_ON(iocb->ki_left != PAGE_SIZE); VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE); if (rw == READ || rw == KERNEL_READ) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 87e797640828..eda8879171c4 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -541,7 +541,6 @@ EXPORT_SYMBOL_GPL(nfs_setattr); */ static int nfs_vmtruncate(struct inode * inode, loff_t offset) { - loff_t oldsize; int err; err = inode_newsize_ok(inode, offset); @@ -549,11 +548,10 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset) goto out; spin_lock(&inode->i_lock); - oldsize = inode->i_size; i_size_write(inode, offset); spin_unlock(&inode->i_lock); - truncate_pagecache(inode, oldsize, offset); + truncate_pagecache(inode, offset); out: return err; } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index d388302c005f..38da8c2b81ac 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -273,8 +273,10 @@ extern struct nfs_client *nfs_init_client(struct nfs_client *clp, const char *ip_addr); /* dir.c */ -extern int nfs_access_cache_shrinker(struct shrinker *shrink, - struct shrink_control *sc); +extern unsigned long nfs_access_cache_count(struct shrinker *shrink, + struct shrink_control *sc); +extern unsigned long nfs_access_cache_scan(struct shrinker *shrink, + struct shrink_control *sc); struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int); int nfs_create(struct inode *, struct dentry *, umode_t, bool); int nfs_mkdir(struct inode *, struct dentry *, umode_t); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index f520a1113b38..28842abafab4 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -279,15 +279,15 @@ _nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode, if (test_bit(sp4_mode, &clp->cl_sp4_flags)) { spin_lock(&clp->cl_lock); if (clp->cl_machine_cred != NULL) - newcred = get_rpccred(clp->cl_machine_cred); + /* don't call get_rpccred on the machine cred - + * a reference will be held for life of clp */ + newcred = clp->cl_machine_cred; spin_unlock(&clp->cl_lock); - if (msg->rpc_cred) - put_rpccred(msg->rpc_cred); msg->rpc_cred = newcred; flavor = clp->cl_rpcclient->cl_auth->au_flavor; - WARN_ON(flavor != RPC_AUTH_GSS_KRB5I && - flavor != RPC_AUTH_GSS_KRB5P); + WARN_ON_ONCE(flavor != RPC_AUTH_GSS_KRB5I && + flavor != RPC_AUTH_GSS_KRB5P); *clntp = clp->cl_rpcclient; return true; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 39b6cf2d1683..989bb9d3074d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6001,10 +6001,12 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct .rpc_resp = &res, }; struct rpc_clnt *clnt = NFS_SERVER(dir)->client; + struct rpc_cred *cred = NULL; if (use_integrity) { clnt = NFS_SERVER(dir)->nfs_client->cl_rpcclient; - msg.rpc_cred = nfs4_get_clid_cred(NFS_SERVER(dir)->nfs_client); + cred = nfs4_get_clid_cred(NFS_SERVER(dir)->nfs_client); + msg.rpc_cred = cred; } dprintk("NFS call secinfo %s\n", name->name); @@ -6016,8 +6018,8 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct &res.seq_res, 0); dprintk("NFS reply secinfo: %d\n", status); - if (msg.rpc_cred) - put_rpccred(msg.rpc_cred); + if (cred) + put_rpccred(cred); return status; } @@ -6151,11 +6153,13 @@ static const struct nfs41_state_protection nfs4_sp4_mach_cred_request = { }, .allow.u.words = { [0] = 1 << (OP_CLOSE) | - 1 << (OP_LOCKU), + 1 << (OP_LOCKU) | + 1 << (OP_COMMIT), [1] = 1 << (OP_SECINFO - 32) | 1 << (OP_SECINFO_NO_NAME - 32) | 1 << (OP_TEST_STATEID - 32) | - 1 << (OP_FREE_STATEID - 32) + 1 << (OP_FREE_STATEID - 32) | + 1 << (OP_WRITE - 32) } }; @@ -7496,11 +7500,13 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_resp = &res, }; struct rpc_clnt *clnt = server->client; + struct rpc_cred *cred = NULL; int status; if (use_integrity) { clnt = server->nfs_client->cl_rpcclient; - msg.rpc_cred = nfs4_get_clid_cred(server->nfs_client); + cred = nfs4_get_clid_cred(server->nfs_client); + msg.rpc_cred = cred; } dprintk("--> %s\n", __func__); @@ -7508,8 +7514,8 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, &res.seq_res, 0); dprintk("<-- %s status=%d\n", __func__, status); - if (msg.rpc_cred) - put_rpccred(msg.rpc_cred); + if (cred) + put_rpccred(cred); return status; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index fbdad9e1719f..79210d23f607 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -414,7 +414,7 @@ static int nfs4_stat_to_errno(int); #define decode_test_stateid_maxsz (op_decode_hdr_maxsz + 2 + 1) #define encode_free_stateid_maxsz (op_encode_hdr_maxsz + 1 + \ XDR_QUADLEN(NFS4_STATEID_SIZE)) -#define decode_free_stateid_maxsz (op_decode_hdr_maxsz + 1) +#define decode_free_stateid_maxsz (op_decode_hdr_maxsz) #else /* CONFIG_NFS_V4_1 */ #define encode_sequence_maxsz 0 #define decode_sequence_maxsz 0 @@ -5966,21 +5966,8 @@ out: static int decode_free_stateid(struct xdr_stream *xdr, struct nfs41_free_stateid_res *res) { - __be32 *p; - int status; - - status = decode_op_hdr(xdr, OP_FREE_STATEID); - if (status) - return status; - - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - goto out_overflow; - res->status = be32_to_cpup(p++); + res->status = decode_op_hdr(xdr, OP_FREE_STATEID); return res->status; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 5793f24613c8..a03b9c6f9489 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -360,7 +360,8 @@ static void unregister_nfs4_fs(void) #endif static struct shrinker acl_shrinker = { - .shrink = nfs_access_cache_shrinker, + .count_objects = nfs_access_cache_count, + .scan_objects = nfs_access_cache_scan, .seeks = DEFAULT_SEEKS, }; diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index e76244edd748..9186c7ce0b14 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -59,11 +59,14 @@ static unsigned int longest_chain_cachesize; static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); static void cache_cleaner_func(struct work_struct *unused); -static int nfsd_reply_cache_shrink(struct shrinker *shrink, - struct shrink_control *sc); +static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, + struct shrink_control *sc); +static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink, + struct shrink_control *sc); static struct shrinker nfsd_reply_cache_shrinker = { - .shrink = nfsd_reply_cache_shrink, + .scan_objects = nfsd_reply_cache_scan, + .count_objects = nfsd_reply_cache_count, .seeks = 1, }; @@ -232,16 +235,18 @@ nfsd_cache_entry_expired(struct svc_cacherep *rp) * Walk the LRU list and prune off entries that are older than RC_EXPIRE. * Also prune the oldest ones when the total exceeds the max number of entries. */ -static void +static long prune_cache_entries(void) { struct svc_cacherep *rp, *tmp; + long freed = 0; list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) { if (!nfsd_cache_entry_expired(rp) && num_drc_entries <= max_drc_entries) break; nfsd_reply_cache_free_locked(rp); + freed++; } /* @@ -254,6 +259,7 @@ prune_cache_entries(void) cancel_delayed_work(&cache_cleaner); else mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE); + return freed; } static void @@ -264,20 +270,28 @@ cache_cleaner_func(struct work_struct *unused) spin_unlock(&cache_lock); } -static int -nfsd_reply_cache_shrink(struct shrinker *shrink, struct shrink_control *sc) +static unsigned long +nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) { - unsigned int num; + unsigned long num; spin_lock(&cache_lock); - if (sc->nr_to_scan) - prune_cache_entries(); num = num_drc_entries; spin_unlock(&cache_lock); return num; } +static unsigned long +nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc) +{ + unsigned long freed; + + spin_lock(&cache_lock); + freed = prune_cache_entries(); + spin_unlock(&cache_lock); + return freed; +} /* * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes */ diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index b1a5277cfd18..7e350c562e0e 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -254,7 +254,7 @@ void nilfs_write_failed(struct address_space *mapping, loff_t to) struct inode *inode = mapping->host; if (to > inode->i_size) { - truncate_pagecache(inode, to, inode->i_size); + truncate_pagecache(inode, inode->i_size); nilfs_truncate(inode); } } diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index c5670b8d198c..ea4ba9daeb47 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -1768,7 +1768,7 @@ static void ntfs_write_failed(struct address_space *mapping, loff_t to) struct inode *inode = mapping->host; if (to > inode->i_size) { - truncate_pagecache(inode, to, inode->i_size); + truncate_pagecache(inode, inode->i_size); ntfs_truncate_vfs(inode); } } diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 4f8197caa487..d71903c6068b 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2242,7 +2242,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, file->f_path.dentry->d_name.name, (unsigned int)nr_segs); - if (iocb->ki_left == 0) + if (iocb->ki_nbytes == 0) return 0; appending = file->f_flags & O_APPEND ? 1 : 0; @@ -2293,7 +2293,7 @@ relock: can_do_direct = direct_io; ret = ocfs2_prepare_inode_for_write(file, ppos, - iocb->ki_left, appending, + iocb->ki_nbytes, appending, &can_do_direct, &has_refcount); if (ret < 0) { mlog_errno(ret); @@ -2301,7 +2301,7 @@ relock: } if (direct_io && !is_sync_kiocb(iocb)) - unaligned_dio = ocfs2_is_io_unaligned(inode, iocb->ki_left, + unaligned_dio = ocfs2_is_io_unaligned(inode, iocb->ki_nbytes, *ppos); /* diff --git a/fs/omfs/file.c b/fs/omfs/file.c index e0d9b3e722bd..54d57d6ba68d 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c @@ -311,7 +311,7 @@ static void omfs_write_failed(struct address_space *mapping, loff_t to) struct inode *inode = mapping->host; if (to > inode->i_size) { - truncate_pagecache(inode, to, inode->i_size); + truncate_pagecache(inode, inode->i_size); omfs_truncate(inode); } } diff --git a/fs/open.c b/fs/open.c index 2a731b0d08bc..d420331ca32a 100644 --- a/fs/open.c +++ b/fs/open.c @@ -744,14 +744,24 @@ cleanup_file: /** * finish_open - finish opening a file - * @od: opaque open data + * @file: file pointer * @dentry: pointer to dentry * @open: open callback + * @opened: state of open * * This can be used to finish opening a file passed to i_op->atomic_open(). * * If the open callback is set to NULL, then the standard f_op->open() * filesystem callback is substituted. + * + * NB: the dentry reference is _not_ consumed. If, for example, the dentry is + * the return value of d_splice_alias(), then the caller needs to perform dput() + * on it after finish_open(). + * + * On successful return @file is a fully instantiated open file. After this, if + * an error occurs in ->atomic_open(), it needs to clean up with fput(). + * + * Returns zero on success or -errno if the open failed. */ int finish_open(struct file *file, struct dentry *dentry, int (*open)(struct inode *, struct file *), @@ -772,11 +782,16 @@ EXPORT_SYMBOL(finish_open); /** * finish_no_open - finish ->atomic_open() without opening the file * - * @od: opaque open data + * @file: file pointer * @dentry: dentry or NULL (as returned from ->lookup()) * * This can be used to set the result of a successful lookup in ->atomic_open(). - * The filesystem's atomic_open() method shall return NULL after calling this. + * + * NB: unlike finish_open() this function does consume the dentry reference and + * the caller need not dput() it. + * + * Returns "1" which must be the return value of ->atomic_open() after having + * called this function. */ int finish_no_open(struct file *file, struct dentry *dentry) { diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 5aa847a603c0..59d85d608898 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -132,13 +132,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) K(i.freeswap), K(global_page_state(NR_FILE_DIRTY)), K(global_page_state(NR_WRITEBACK)), -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - K(global_page_state(NR_ANON_PAGES) - + global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) * - HPAGE_PMD_NR), -#else K(global_page_state(NR_ANON_PAGES)), -#endif K(global_page_state(NR_FILE_MAPPED)), K(global_page_state(NR_SHMEM)), K(global_page_state(NR_SLAB_RECLAIMABLE) + diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 4ffb7ab5e397..b8e93a40a5d3 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -168,7 +168,7 @@ static int pstore_decompress(void *in, void *out, size_t inlen, size_t outlen) int err, ret; ret = -EIO; - err = zlib_inflateInit(&stream); + err = zlib_inflateInit2(&stream, WINDOW_BITS); if (err != Z_OK) goto error; @@ -195,8 +195,29 @@ error: static void allocate_buf_for_compression(void) { size_t size; + size_t cmpr; + + switch (psinfo->bufsize) { + /* buffer range for efivars */ + case 1000 ... 2000: + cmpr = 56; + break; + case 2001 ... 3000: + cmpr = 54; + break; + case 3001 ... 3999: + cmpr = 52; + break; + /* buffer range for nvram, erst */ + case 4000 ... 10000: + cmpr = 45; + break; + default: + cmpr = 60; + break; + } - big_oops_buf_sz = (psinfo->bufsize * 100) / 45; + big_oops_buf_sz = (psinfo->bufsize * 100) / cmpr; big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL); if (big_oops_buf) { size = max(zlib_deflate_workspacesize(WINDOW_BITS, MEM_LEVEL), @@ -295,10 +316,6 @@ static void pstore_dump(struct kmsg_dumper *dumper, compressed = true; total_len = zipped_len; } else { - pr_err("pstore: compression failed for Part %d" - " returned %d\n", part, zipped_len); - pr_err("pstore: Capture uncompressed" - " oops/panic report of Part %d\n", part); compressed = false; total_len = copy_kmsg_to_buffer(hsize, len); } diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 9a702e193538..831d49a4111f 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -687,45 +687,37 @@ int dquot_quota_sync(struct super_block *sb, int type) } EXPORT_SYMBOL(dquot_quota_sync); -/* Free unused dquots from cache */ -static void prune_dqcache(int count) +static unsigned long +dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { struct list_head *head; struct dquot *dquot; + unsigned long freed = 0; head = free_dquots.prev; - while (head != &free_dquots && count) { + while (head != &free_dquots && sc->nr_to_scan) { dquot = list_entry(head, struct dquot, dq_free); remove_dquot_hash(dquot); remove_free_dquot(dquot); remove_inuse(dquot); do_destroy_dquot(dquot); - count--; + sc->nr_to_scan--; + freed++; head = free_dquots.prev; } + return freed; } -/* - * This is called from kswapd when we think we need some - * more memory - */ -static int shrink_dqcache_memory(struct shrinker *shrink, - struct shrink_control *sc) +static unsigned long +dqcache_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { - int nr = sc->nr_to_scan; - - if (nr) { - spin_lock(&dq_list_lock); - prune_dqcache(nr); - spin_unlock(&dq_list_lock); - } - return ((unsigned) - percpu_counter_read_positive(&dqstats.counter[DQST_FREE_DQUOTS]) - /100) * sysctl_vfs_cache_pressure; + return vfs_pressure_ratio( + percpu_counter_read_positive(&dqstats.counter[DQST_FREE_DQUOTS])); } static struct shrinker dqcache_shrinker = { - .shrink = shrink_dqcache_memory, + .count_objects = dqcache_shrink_count, + .scan_objects = dqcache_shrink_scan, .seeks = DEFAULT_SEEKS, }; diff --git a/fs/read_write.c b/fs/read_write.c index 122a3846d9e1..e3cd280b158c 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -367,7 +367,6 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; - kiocb.ki_left = len; kiocb.ki_nbytes = len; ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); @@ -417,7 +416,6 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; - kiocb.ki_left = len; kiocb.ki_nbytes = len; ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); @@ -599,7 +597,6 @@ static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; - kiocb.ki_left = len; kiocb.ki_nbytes = len; ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); diff --git a/fs/super.c b/fs/super.c index f6961ea84c56..3a96c9783a8b 100644 --- a/fs/super.c +++ b/fs/super.c @@ -53,11 +53,15 @@ static char *sb_writers_name[SB_FREEZE_LEVELS] = { * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we * take a passive reference to the superblock to avoid this from occurring. */ -static int prune_super(struct shrinker *shrink, struct shrink_control *sc) +static unsigned long super_cache_scan(struct shrinker *shrink, + struct shrink_control *sc) { struct super_block *sb; - int fs_objects = 0; - int total_objects; + long fs_objects = 0; + long total_objects; + long freed = 0; + long dentries; + long inodes; sb = container_of(shrink, struct super_block, s_shrink); @@ -65,46 +69,62 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc) * Deadlock avoidance. We may hold various FS locks, and we don't want * to recurse into the FS that called us in clear_inode() and friends.. */ - if (sc->nr_to_scan && !(sc->gfp_mask & __GFP_FS)) - return -1; + if (!(sc->gfp_mask & __GFP_FS)) + return SHRINK_STOP; if (!grab_super_passive(sb)) - return -1; + return SHRINK_STOP; if (sb->s_op->nr_cached_objects) - fs_objects = sb->s_op->nr_cached_objects(sb); - - total_objects = sb->s_nr_dentry_unused + - sb->s_nr_inodes_unused + fs_objects + 1; - - if (sc->nr_to_scan) { - int dentries; - int inodes; - - /* proportion the scan between the caches */ - dentries = (sc->nr_to_scan * sb->s_nr_dentry_unused) / - total_objects; - inodes = (sc->nr_to_scan * sb->s_nr_inodes_unused) / - total_objects; - if (fs_objects) - fs_objects = (sc->nr_to_scan * fs_objects) / - total_objects; - /* - * prune the dcache first as the icache is pinned by it, then - * prune the icache, followed by the filesystem specific caches - */ - prune_dcache_sb(sb, dentries); - prune_icache_sb(sb, inodes); + fs_objects = sb->s_op->nr_cached_objects(sb, sc->nid); - if (fs_objects && sb->s_op->free_cached_objects) { - sb->s_op->free_cached_objects(sb, fs_objects); - fs_objects = sb->s_op->nr_cached_objects(sb); - } - total_objects = sb->s_nr_dentry_unused + - sb->s_nr_inodes_unused + fs_objects; + inodes = list_lru_count_node(&sb->s_inode_lru, sc->nid); + dentries = list_lru_count_node(&sb->s_dentry_lru, sc->nid); + total_objects = dentries + inodes + fs_objects + 1; + + /* proportion the scan between the caches */ + dentries = mult_frac(sc->nr_to_scan, dentries, total_objects); + inodes = mult_frac(sc->nr_to_scan, inodes, total_objects); + + /* + * prune the dcache first as the icache is pinned by it, then + * prune the icache, followed by the filesystem specific caches + */ + freed = prune_dcache_sb(sb, dentries, sc->nid); + freed += prune_icache_sb(sb, inodes, sc->nid); + + if (fs_objects) { + fs_objects = mult_frac(sc->nr_to_scan, fs_objects, + total_objects); + freed += sb->s_op->free_cached_objects(sb, fs_objects, + sc->nid); } - total_objects = (total_objects / 100) * sysctl_vfs_cache_pressure; + drop_super(sb); + return freed; +} + +static unsigned long super_cache_count(struct shrinker *shrink, + struct shrink_control *sc) +{ + struct super_block *sb; + long total_objects = 0; + + sb = container_of(shrink, struct super_block, s_shrink); + + if (!grab_super_passive(sb)) + return 0; + + if (sb->s_op && sb->s_op->nr_cached_objects) + total_objects = sb->s_op->nr_cached_objects(sb, + sc->nid); + + total_objects += list_lru_count_node(&sb->s_dentry_lru, + sc->nid); + total_objects += list_lru_count_node(&sb->s_inode_lru, + sc->nid); + + total_objects = vfs_pressure_ratio(total_objects); drop_super(sb); return total_objects; } @@ -175,9 +195,12 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) INIT_HLIST_NODE(&s->s_instances); INIT_HLIST_BL_HEAD(&s->s_anon); INIT_LIST_HEAD(&s->s_inodes); - INIT_LIST_HEAD(&s->s_dentry_lru); - INIT_LIST_HEAD(&s->s_inode_lru); - spin_lock_init(&s->s_inode_lru_lock); + + if (list_lru_init(&s->s_dentry_lru)) + goto err_out; + if (list_lru_init(&s->s_inode_lru)) + goto err_out_dentry_lru; + INIT_LIST_HEAD(&s->s_mounts); init_rwsem(&s->s_umount); lockdep_set_class(&s->s_umount, &type->s_umount_key); @@ -210,11 +233,16 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) s->cleancache_poolid = -1; s->s_shrink.seeks = DEFAULT_SEEKS; - s->s_shrink.shrink = prune_super; + s->s_shrink.scan_objects = super_cache_scan; + s->s_shrink.count_objects = super_cache_count; s->s_shrink.batch = 1024; + s->s_shrink.flags = SHRINKER_NUMA_AWARE; } out: return s; + +err_out_dentry_lru: + list_lru_destroy(&s->s_dentry_lru); err_out: security_sb_free(s); #ifdef CONFIG_SMP @@ -295,6 +323,9 @@ void deactivate_locked_super(struct super_block *s) /* caches are now gone, we can safely kill the shrinker now */ unregister_shrinker(&s->s_shrink); + list_lru_destroy(&s->s_dentry_lru); + list_lru_destroy(&s->s_inode_lru); + put_filesystem(fs); put_super(s); } else { diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c index c1a591a4725b..66bc316927e8 100644 --- a/fs/sysv/itree.c +++ b/fs/sysv/itree.c @@ -469,7 +469,7 @@ static void sysv_write_failed(struct address_space *mapping, loff_t to) struct inode *inode = mapping->host; if (to > inode->i_size) { - truncate_pagecache(inode, to, inode->i_size); + truncate_pagecache(inode, inode->i_size); sysv_truncate(inode); } } diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 7f60e900edff..6e025e02ffde 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -2587,10 +2587,11 @@ int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, return -EROFS; failing = power_cut_emulated(c, lnum, 1); - if (failing) + if (failing) { len = corrupt_data(c, buf, len); - ubifs_warn("actually write %d bytes to LEB %d:%d (the buffer was corrupted)", - len, lnum, offs); + ubifs_warn("actually write %d bytes to LEB %d:%d (the buffer was corrupted)", + len, lnum, offs); + } err = ubi_leb_write(c->ubi, lnum, buf, offs, len); if (err) return err; diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c index 9e1d05666fed..f35135e28e96 100644 --- a/fs/ubifs/shrinker.c +++ b/fs/ubifs/shrinker.c @@ -277,18 +277,25 @@ static int kick_a_thread(void) return 0; } -int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc) +unsigned long ubifs_shrink_count(struct shrinker *shrink, + struct shrink_control *sc) { - int nr = sc->nr_to_scan; - int freed, contention = 0; long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); - if (nr == 0) - /* - * Due to the way UBIFS updates the clean znode counter it may - * temporarily be negative. - */ - return clean_zn_cnt >= 0 ? clean_zn_cnt : 1; + /* + * Due to the way UBIFS updates the clean znode counter it may + * temporarily be negative. + */ + return clean_zn_cnt >= 0 ? clean_zn_cnt : 1; +} + +unsigned long ubifs_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc) +{ + unsigned long nr = sc->nr_to_scan; + int contention = 0; + unsigned long freed; + long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); if (!clean_zn_cnt) { /* @@ -316,10 +323,10 @@ int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc) if (!freed && contention) { dbg_tnc("freed nothing, but contention"); - return -1; + return SHRINK_STOP; } out: - dbg_tnc("%d znodes were freed, requested %d", freed, nr); + dbg_tnc("%lu znodes were freed, requested %lu", freed, nr); return freed; } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 879b9976c12b..3e4aa7281e04 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -49,7 +49,8 @@ struct kmem_cache *ubifs_inode_slab; /* UBIFS TNC shrinker description */ static struct shrinker ubifs_shrinker_info = { - .shrink = ubifs_shrinker, + .scan_objects = ubifs_shrink_scan, + .count_objects = ubifs_shrink_count, .seeks = DEFAULT_SEEKS, }; diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index b2babce4d70f..e8c8cfe1435c 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1624,7 +1624,10 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot); int ubifs_tnc_end_commit(struct ubifs_info *c); /* shrinker.c */ -int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc); +unsigned long ubifs_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc); +unsigned long ubifs_shrink_count(struct shrinker *shrink, + struct shrink_control *sc); /* commit.c */ int ubifs_bg_thread(void *info); diff --git a/fs/udf/file.c b/fs/udf/file.c index 29569dd08168..c02a27a19c6d 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -141,7 +141,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); int err, pos; - size_t count = iocb->ki_left; + size_t count = iocb->ki_nbytes; struct udf_inode_info *iinfo = UDF_I(inode); down_write(&iinfo->i_data_sem); diff --git a/fs/udf/inode.c b/fs/udf/inode.c index b6d15d349810..062b7925bca0 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -172,7 +172,7 @@ static void udf_write_failed(struct address_space *mapping, loff_t to) loff_t isize = inode->i_size; if (to > isize) { - truncate_pagecache(inode, to, isize); + truncate_pagecache(inode, isize); if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { down_write(&iinfo->i_data_sem); udf_clear_extent_cache(inode); diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index ff24e4449ece..c8ca96086784 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -531,7 +531,7 @@ static void ufs_write_failed(struct address_space *mapping, loff_t to) struct inode *inode = mapping->host; if (to > inode->i_size) - truncate_pagecache(inode, to, inode->i_size); + truncate_pagecache(inode, inode->i_size); } static int ufs_write_begin(struct file *file, struct address_space *mapping, diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index 4a7286c1dc80..a02cfb9e3bce 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -27,8 +27,6 @@ /* * Greedy allocation. May fail and may return vmalloced memory. - * - * Must be freed using kmem_free_large. */ void * kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize) @@ -36,7 +34,7 @@ kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize) void *ptr; size_t kmsize = maxsize; - while (!(ptr = kmem_zalloc_large(kmsize))) { + while (!(ptr = vzalloc(kmsize))) { if ((kmsize >>= 1) <= minsize) kmsize = minsize; } @@ -75,6 +73,17 @@ kmem_zalloc(size_t size, xfs_km_flags_t flags) return ptr; } +void * +kmem_zalloc_large(size_t size, xfs_km_flags_t flags) +{ + void *ptr; + + ptr = kmem_zalloc(size, flags | KM_MAYFAIL); + if (ptr) + return ptr; + return vzalloc(size); +} + void kmem_free(const void *ptr) { diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index b2f2620f9a87..3a7371cab508 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -57,17 +57,10 @@ kmem_flags_convert(xfs_km_flags_t flags) extern void *kmem_alloc(size_t, xfs_km_flags_t); extern void *kmem_zalloc(size_t, xfs_km_flags_t); +extern void *kmem_zalloc_large(size_t size, xfs_km_flags_t); extern void *kmem_realloc(const void *, size_t, size_t, xfs_km_flags_t); extern void kmem_free(const void *); -static inline void *kmem_zalloc_large(size_t size) -{ - return vzalloc(size); -} -static inline void kmem_free_large(void *ptr) -{ - vfree(ptr); -} extern void *kmem_zalloc_greedy(size_t *, size_t, size_t); diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 69518960b2ba..0e2f37efedd0 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -152,7 +152,7 @@ xfs_get_acl(struct inode *inode, int type) * go out to the disk. */ len = XFS_ACL_MAX_SIZE(ip->i_mount); - xfs_acl = kzalloc(len, GFP_KERNEL); + xfs_acl = kmem_zalloc_large(len, KM_SLEEP); if (!xfs_acl) return ERR_PTR(-ENOMEM); @@ -175,10 +175,10 @@ xfs_get_acl(struct inode *inode, int type) if (IS_ERR(acl)) goto out; - out_update_cache: +out_update_cache: set_cached_acl(inode, type, acl); - out: - kfree(xfs_acl); +out: + kmem_free(xfs_acl); return acl; } @@ -209,7 +209,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) struct xfs_acl *xfs_acl; int len = XFS_ACL_MAX_SIZE(ip->i_mount); - xfs_acl = kzalloc(len, GFP_KERNEL); + xfs_acl = kmem_zalloc_large(len, KM_SLEEP); if (!xfs_acl) return -ENOMEM; @@ -222,7 +222,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl, len, ATTR_ROOT); - kfree(xfs_acl); + kmem_free(xfs_acl); } else { /* * A NULL ACL argument means we want to remove the ACL. diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 977da0ec6604..e51e581454e9 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1582,7 +1582,7 @@ xfs_vm_write_begin( unlock_page(page); if (pos + len > i_size_read(inode)) - truncate_pagecache(inode, pos + len, i_size_read(inode)); + truncate_pagecache(inode, i_size_read(inode)); page_cache_release(page); page = NULL; @@ -1618,7 +1618,7 @@ xfs_vm_write_end( loff_t to = pos + len; if (to > isize) { - truncate_pagecache(inode, to, isize); + truncate_pagecache(inode, isize); xfs_vm_kill_delalloc_range(inode, isize, to); } } diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 92b830901d60..f47e65c30be6 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -4450,7 +4450,7 @@ xfs_bmapi_write( { struct xfs_mount *mp = ip->i_mount; struct xfs_ifork *ifp; - struct xfs_bmalloca bma = { 0 }; /* args for xfs_bmap_alloc */ + struct xfs_bmalloca bma = { NULL }; /* args for xfs_bmap_alloc */ xfs_fileoff_t end; /* end of mapped file region */ int eof; /* after the end of extents */ int error; /* error return */ diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index cf3bc76710c3..bb8de8e399c4 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -925,3 +925,47 @@ xfs_bmdr_maxrecs( return blocklen / sizeof(xfs_bmdr_rec_t); return blocklen / (sizeof(xfs_bmdr_key_t) + sizeof(xfs_bmdr_ptr_t)); } + +/* + * Change the owner of a btree format fork fo the inode passed in. Change it to + * the owner of that is passed in so that we can change owners before or after + * we switch forks between inodes. The operation that the caller is doing will + * determine whether is needs to change owner before or after the switch. + * + * For demand paged transactional modification, the fork switch should be done + * after reading in all the blocks, modifying them and pinning them in the + * transaction. For modification when the buffers are already pinned in memory, + * the fork switch can be done before changing the owner as we won't need to + * validate the owner until the btree buffers are unpinned and writes can occur + * again. + * + * For recovery based ownership change, there is no transactional context and + * so a buffer list must be supplied so that we can record the buffers that we + * modified for the caller to issue IO on. + */ +int +xfs_bmbt_change_owner( + struct xfs_trans *tp, + struct xfs_inode *ip, + int whichfork, + xfs_ino_t new_owner, + struct list_head *buffer_list) +{ + struct xfs_btree_cur *cur; + int error; + + ASSERT(tp || buffer_list); + ASSERT(!(tp && buffer_list)); + if (whichfork == XFS_DATA_FORK) + ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_BTREE); + else + ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_BTREE); + + cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork); + if (!cur) + return ENOMEM; + + error = xfs_btree_change_owner(cur, new_owner, buffer_list); + xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + return error; +} diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 1b726d626941..e367461a638e 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -236,6 +236,10 @@ extern int xfs_bmbt_get_maxrecs(struct xfs_btree_cur *, int level); extern int xfs_bmdr_maxrecs(struct xfs_mount *, int blocklen, int leaf); extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf); +extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip, + int whichfork, xfs_ino_t new_owner, + struct list_head *buffer_list); + extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, int); diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 541d59f5e658..97f952caea74 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -612,13 +612,9 @@ xfs_getbmap( if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) return XFS_ERROR(ENOMEM); - out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL); - if (!out) { - out = kmem_zalloc_large(bmv->bmv_count * - sizeof(struct getbmapx)); - if (!out) - return XFS_ERROR(ENOMEM); - } + out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0); + if (!out) + return XFS_ERROR(ENOMEM); xfs_ilock(ip, XFS_IOLOCK_SHARED); if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) { @@ -754,10 +750,7 @@ xfs_getbmap( break; } - if (is_vmalloc_addr(out)) - kmem_free_large(out); - else - kmem_free(out); + kmem_free(out); return error; } @@ -1789,14 +1782,6 @@ xfs_swap_extents( int taforkblks = 0; __uint64_t tmp; - /* - * We have no way of updating owner information in the BMBT blocks for - * each inode on CRC enabled filesystems, so to avoid corrupting the - * this metadata we simply don't allow extent swaps to occur. - */ - if (xfs_sb_version_hascrc(&mp->m_sb)) - return XFS_ERROR(EINVAL); - tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); if (!tempifp) { error = XFS_ERROR(ENOMEM); @@ -1920,6 +1905,42 @@ xfs_swap_extents( goto out_trans_cancel; } + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); + xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); + + /* + * Before we've swapped the forks, lets set the owners of the forks + * appropriately. We have to do this as we are demand paging the btree + * buffers, and so the validation done on read will expect the owner + * field to be correctly set. Once we change the owners, we can swap the + * inode forks. + * + * Note the trickiness in setting the log flags - we set the owner log + * flag on the opposite inode (i.e. the inode we are setting the new + * owner to be) because once we swap the forks and log that, log + * recovery is going to see the fork as owned by the swapped inode, + * not the pre-swapped inodes. + */ + src_log_flags = XFS_ILOG_CORE; + target_log_flags = XFS_ILOG_CORE; + if (ip->i_d.di_version == 3 && + ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { + target_log_flags |= XFS_ILOG_DOWNER; + error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, + tip->i_ino, NULL); + if (error) + goto out_trans_cancel; + } + + if (tip->i_d.di_version == 3 && + tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { + src_log_flags |= XFS_ILOG_DOWNER; + error = xfs_bmbt_change_owner(tp, tip, XFS_DATA_FORK, + ip->i_ino, NULL); + if (error) + goto out_trans_cancel; + } + /* * Swap the data forks of the inodes */ @@ -1957,7 +1978,6 @@ xfs_swap_extents( tip->i_delayed_blks = ip->i_delayed_blks; ip->i_delayed_blks = 0; - src_log_flags = XFS_ILOG_CORE; switch (ip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: /* If the extents fit in the inode, fix the @@ -1971,11 +1991,12 @@ xfs_swap_extents( src_log_flags |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: + ASSERT(ip->i_d.di_version < 3 || + (src_log_flags & XFS_ILOG_DOWNER)); src_log_flags |= XFS_ILOG_DBROOT; break; } - target_log_flags = XFS_ILOG_CORE; switch (tip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: /* If the extents fit in the inode, fix the @@ -1990,13 +2011,11 @@ xfs_swap_extents( break; case XFS_DINODE_FMT_BTREE: target_log_flags |= XFS_ILOG_DBROOT; + ASSERT(tip->i_d.di_version < 3 || + (target_log_flags & XFS_ILOG_DOWNER)); break; } - - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - xfs_trans_log_inode(tp, ip, src_log_flags); xfs_trans_log_inode(tp, tip, target_log_flags); diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 7a2b4da3c0db..5690e102243d 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -855,6 +855,41 @@ xfs_btree_readahead( return xfs_btree_readahead_sblock(cur, lr, block); } +STATIC xfs_daddr_t +xfs_btree_ptr_to_daddr( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO)); + + return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l)); + } else { + ASSERT(cur->bc_private.a.agno != NULLAGNUMBER); + ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK)); + + return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno, + be32_to_cpu(ptr->s)); + } +} + +/* + * Readahead @count btree blocks at the given @ptr location. + * + * We don't need to care about long or short form btrees here as we have a + * method of converting the ptr directly to a daddr available to us. + */ +STATIC void +xfs_btree_readahead_ptr( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr, + xfs_extlen_t count) +{ + xfs_buf_readahead(cur->bc_mp->m_ddev_targp, + xfs_btree_ptr_to_daddr(cur, ptr), + cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops); +} + /* * Set the buffer for level "lev" in the cursor to bp, releasing * any previous buffer. @@ -1073,24 +1108,6 @@ xfs_btree_buf_to_ptr( } } -STATIC xfs_daddr_t -xfs_btree_ptr_to_daddr( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *ptr) -{ - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO)); - - return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l)); - } else { - ASSERT(cur->bc_private.a.agno != NULLAGNUMBER); - ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK)); - - return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno, - be32_to_cpu(ptr->s)); - } -} - STATIC void xfs_btree_set_refs( struct xfs_btree_cur *cur, @@ -3869,3 +3886,120 @@ xfs_btree_get_rec( *stat = 1; return 0; } + +/* + * Change the owner of a btree. + * + * The mechanism we use here is ordered buffer logging. Because we don't know + * how many buffers were are going to need to modify, we don't really want to + * have to make transaction reservations for the worst case of every buffer in a + * full size btree as that may be more space that we can fit in the log.... + * + * We do the btree walk in the most optimal manner possible - we have sibling + * pointers so we can just walk all the blocks on each level from left to right + * in a single pass, and then move to the next level and do the same. We can + * also do readahead on the sibling pointers to get IO moving more quickly, + * though for slow disks this is unlikely to make much difference to performance + * as the amount of CPU work we have to do before moving to the next block is + * relatively small. + * + * For each btree block that we load, modify the owner appropriately, set the + * buffer as an ordered buffer and log it appropriately. We need to ensure that + * we mark the region we change dirty so that if the buffer is relogged in + * a subsequent transaction the changes we make here as an ordered buffer are + * correctly relogged in that transaction. If we are in recovery context, then + * just queue the modified buffer as delayed write buffer so the transaction + * recovery completion writes the changes to disk. + */ +static int +xfs_btree_block_change_owner( + struct xfs_btree_cur *cur, + int level, + __uint64_t new_owner, + struct list_head *buffer_list) +{ + struct xfs_btree_block *block; + struct xfs_buf *bp; + union xfs_btree_ptr rptr; + + /* do right sibling readahead */ + xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); + + /* modify the owner */ + block = xfs_btree_get_block(cur, level, &bp); + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + block->bb_u.l.bb_owner = cpu_to_be64(new_owner); + else + block->bb_u.s.bb_owner = cpu_to_be32(new_owner); + + /* + * If the block is a root block hosted in an inode, we might not have a + * buffer pointer here and we shouldn't attempt to log the change as the + * information is already held in the inode and discarded when the root + * block is formatted into the on-disk inode fork. We still change it, + * though, so everything is consistent in memory. + */ + if (bp) { + if (cur->bc_tp) { + xfs_trans_ordered_buf(cur->bc_tp, bp); + xfs_btree_log_block(cur, bp, XFS_BB_OWNER); + } else { + xfs_buf_delwri_queue(bp, buffer_list); + } + } else { + ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); + ASSERT(level == cur->bc_nlevels - 1); + } + + /* now read rh sibling block for next iteration */ + xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); + if (xfs_btree_ptr_is_null(cur, &rptr)) + return ENOENT; + + return xfs_btree_lookup_get_block(cur, level, &rptr, &block); +} + +int +xfs_btree_change_owner( + struct xfs_btree_cur *cur, + __uint64_t new_owner, + struct list_head *buffer_list) +{ + union xfs_btree_ptr lptr; + int level; + struct xfs_btree_block *block = NULL; + int error = 0; + + cur->bc_ops->init_ptr_from_cur(cur, &lptr); + + /* for each level */ + for (level = cur->bc_nlevels - 1; level >= 0; level--) { + /* grab the left hand block */ + error = xfs_btree_lookup_get_block(cur, level, &lptr, &block); + if (error) + return error; + + /* readahead the left most block for the next level down */ + if (level > 0) { + union xfs_btree_ptr *ptr; + + ptr = xfs_btree_ptr_addr(cur, 1, block); + xfs_btree_readahead_ptr(cur, ptr, 1); + + /* save for the next iteration of the loop */ + lptr = *ptr; + } + + /* for each buffer in the level */ + do { + error = xfs_btree_block_change_owner(cur, level, + new_owner, + buffer_list); + } while (!error); + + if (error != ENOENT) + return error; + } + + return 0; +} diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index c8473c7ef45e..06729b67ad58 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -121,15 +121,18 @@ union xfs_btree_rec { /* * For logging record fields. */ -#define XFS_BB_MAGIC 0x01 -#define XFS_BB_LEVEL 0x02 -#define XFS_BB_NUMRECS 0x04 -#define XFS_BB_LEFTSIB 0x08 -#define XFS_BB_RIGHTSIB 0x10 -#define XFS_BB_BLKNO 0x20 +#define XFS_BB_MAGIC (1 << 0) +#define XFS_BB_LEVEL (1 << 1) +#define XFS_BB_NUMRECS (1 << 2) +#define XFS_BB_LEFTSIB (1 << 3) +#define XFS_BB_RIGHTSIB (1 << 4) +#define XFS_BB_BLKNO (1 << 5) +#define XFS_BB_LSN (1 << 6) +#define XFS_BB_UUID (1 << 7) +#define XFS_BB_OWNER (1 << 8) #define XFS_BB_NUM_BITS 5 #define XFS_BB_ALL_BITS ((1 << XFS_BB_NUM_BITS) - 1) -#define XFS_BB_NUM_BITS_CRC 8 +#define XFS_BB_NUM_BITS_CRC 9 #define XFS_BB_ALL_BITS_CRC ((1 << XFS_BB_NUM_BITS_CRC) - 1) /* @@ -442,6 +445,8 @@ int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *); int xfs_btree_insert(struct xfs_btree_cur *, int *); int xfs_btree_delete(struct xfs_btree_cur *, int *); int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *); +int xfs_btree_change_owner(struct xfs_btree_cur *cur, __uint64_t new_owner, + struct list_head *buffer_list); /* * btree block CRC helpers diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index c06823fe10d3..263470075ea2 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -81,54 +81,6 @@ xfs_buf_vmap_len( } /* - * xfs_buf_lru_add - add a buffer to the LRU. - * - * The LRU takes a new reference to the buffer so that it will only be freed - * once the shrinker takes the buffer off the LRU. - */ -STATIC void -xfs_buf_lru_add( - struct xfs_buf *bp) -{ - struct xfs_buftarg *btp = bp->b_target; - - spin_lock(&btp->bt_lru_lock); - if (list_empty(&bp->b_lru)) { - atomic_inc(&bp->b_hold); - list_add_tail(&bp->b_lru, &btp->bt_lru); - btp->bt_lru_nr++; - bp->b_lru_flags &= ~_XBF_LRU_DISPOSE; - } - spin_unlock(&btp->bt_lru_lock); -} - -/* - * xfs_buf_lru_del - remove a buffer from the LRU - * - * The unlocked check is safe here because it only occurs when there are not - * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there - * to optimise the shrinker removing the buffer from the LRU and calling - * xfs_buf_free(). i.e. it removes an unnecessary round trip on the - * bt_lru_lock. - */ -STATIC void -xfs_buf_lru_del( - struct xfs_buf *bp) -{ - struct xfs_buftarg *btp = bp->b_target; - - if (list_empty(&bp->b_lru)) - return; - - spin_lock(&btp->bt_lru_lock); - if (!list_empty(&bp->b_lru)) { - list_del_init(&bp->b_lru); - btp->bt_lru_nr--; - } - spin_unlock(&btp->bt_lru_lock); -} - -/* * When we mark a buffer stale, we remove the buffer from the LRU and clear the * b_lru_ref count so that the buffer is freed immediately when the buffer * reference count falls to zero. If the buffer is already on the LRU, we need @@ -151,20 +103,14 @@ xfs_buf_stale( */ bp->b_flags &= ~_XBF_DELWRI_Q; - atomic_set(&(bp)->b_lru_ref, 0); - if (!list_empty(&bp->b_lru)) { - struct xfs_buftarg *btp = bp->b_target; + spin_lock(&bp->b_lock); + atomic_set(&bp->b_lru_ref, 0); + if (!(bp->b_state & XFS_BSTATE_DISPOSE) && + (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru))) + atomic_dec(&bp->b_hold); - spin_lock(&btp->bt_lru_lock); - if (!list_empty(&bp->b_lru) && - !(bp->b_lru_flags & _XBF_LRU_DISPOSE)) { - list_del_init(&bp->b_lru); - btp->bt_lru_nr--; - atomic_dec(&bp->b_hold); - } - spin_unlock(&btp->bt_lru_lock); - } ASSERT(atomic_read(&bp->b_hold) >= 1); + spin_unlock(&bp->b_lock); } static int @@ -228,6 +174,7 @@ _xfs_buf_alloc( INIT_LIST_HEAD(&bp->b_list); RB_CLEAR_NODE(&bp->b_rbnode); sema_init(&bp->b_sema, 0); /* held, no waiters */ + spin_lock_init(&bp->b_lock); XB_SET_OWNER(bp); bp->b_target = target; bp->b_flags = flags; @@ -917,12 +864,33 @@ xfs_buf_rele( ASSERT(atomic_read(&bp->b_hold) > 0); if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { - if (!(bp->b_flags & XBF_STALE) && - atomic_read(&bp->b_lru_ref)) { - xfs_buf_lru_add(bp); + spin_lock(&bp->b_lock); + if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { + /* + * If the buffer is added to the LRU take a new + * reference to the buffer for the LRU and clear the + * (now stale) dispose list state flag + */ + if (list_lru_add(&bp->b_target->bt_lru, &bp->b_lru)) { + bp->b_state &= ~XFS_BSTATE_DISPOSE; + atomic_inc(&bp->b_hold); + } + spin_unlock(&bp->b_lock); spin_unlock(&pag->pag_buf_lock); } else { - xfs_buf_lru_del(bp); + /* + * most of the time buffers will already be removed from + * the LRU, so optimise that case by checking for the + * XFS_BSTATE_DISPOSE flag indicating the last list the + * buffer was on was the disposal list + */ + if (!(bp->b_state & XFS_BSTATE_DISPOSE)) { + list_lru_del(&bp->b_target->bt_lru, &bp->b_lru); + } else { + ASSERT(list_empty(&bp->b_lru)); + } + spin_unlock(&bp->b_lock); + ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); spin_unlock(&pag->pag_buf_lock); @@ -1502,83 +1470,121 @@ xfs_buf_iomove( * returned. These buffers will have an elevated hold count, so wait on those * while freeing all the buffers only held by the LRU. */ +static enum lru_status +xfs_buftarg_wait_rele( + struct list_head *item, + spinlock_t *lru_lock, + void *arg) + +{ + struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru); + struct list_head *dispose = arg; + + if (atomic_read(&bp->b_hold) > 1) { + /* need to wait, so skip it this pass */ + trace_xfs_buf_wait_buftarg(bp, _RET_IP_); + return LRU_SKIP; + } + if (!spin_trylock(&bp->b_lock)) + return LRU_SKIP; + + /* + * clear the LRU reference count so the buffer doesn't get + * ignored in xfs_buf_rele(). + */ + atomic_set(&bp->b_lru_ref, 0); + bp->b_state |= XFS_BSTATE_DISPOSE; + list_move(item, dispose); + spin_unlock(&bp->b_lock); + return LRU_REMOVED; +} + void xfs_wait_buftarg( struct xfs_buftarg *btp) { - struct xfs_buf *bp; + LIST_HEAD(dispose); + int loop = 0; -restart: - spin_lock(&btp->bt_lru_lock); - while (!list_empty(&btp->bt_lru)) { - bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); - if (atomic_read(&bp->b_hold) > 1) { - trace_xfs_buf_wait_buftarg(bp, _RET_IP_); - list_move_tail(&bp->b_lru, &btp->bt_lru); - spin_unlock(&btp->bt_lru_lock); - delay(100); - goto restart; + /* loop until there is nothing left on the lru list. */ + while (list_lru_count(&btp->bt_lru)) { + list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele, + &dispose, LONG_MAX); + + while (!list_empty(&dispose)) { + struct xfs_buf *bp; + bp = list_first_entry(&dispose, struct xfs_buf, b_lru); + list_del_init(&bp->b_lru); + xfs_buf_rele(bp); } - /* - * clear the LRU reference count so the buffer doesn't get - * ignored in xfs_buf_rele(). - */ - atomic_set(&bp->b_lru_ref, 0); - spin_unlock(&btp->bt_lru_lock); - xfs_buf_rele(bp); - spin_lock(&btp->bt_lru_lock); + if (loop++ != 0) + delay(100); } - spin_unlock(&btp->bt_lru_lock); } -int -xfs_buftarg_shrink( +static enum lru_status +xfs_buftarg_isolate( + struct list_head *item, + spinlock_t *lru_lock, + void *arg) +{ + struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru); + struct list_head *dispose = arg; + + /* + * we are inverting the lru lock/bp->b_lock here, so use a trylock. + * If we fail to get the lock, just skip it. + */ + if (!spin_trylock(&bp->b_lock)) + return LRU_SKIP; + /* + * Decrement the b_lru_ref count unless the value is already + * zero. If the value is already zero, we need to reclaim the + * buffer, otherwise it gets another trip through the LRU. + */ + if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) { + spin_unlock(&bp->b_lock); + return LRU_ROTATE; + } + + bp->b_state |= XFS_BSTATE_DISPOSE; + list_move(item, dispose); + spin_unlock(&bp->b_lock); + return LRU_REMOVED; +} + +static unsigned long +xfs_buftarg_shrink_scan( struct shrinker *shrink, struct shrink_control *sc) { struct xfs_buftarg *btp = container_of(shrink, struct xfs_buftarg, bt_shrinker); - struct xfs_buf *bp; - int nr_to_scan = sc->nr_to_scan; LIST_HEAD(dispose); + unsigned long freed; + unsigned long nr_to_scan = sc->nr_to_scan; - if (!nr_to_scan) - return btp->bt_lru_nr; - - spin_lock(&btp->bt_lru_lock); - while (!list_empty(&btp->bt_lru)) { - if (nr_to_scan-- <= 0) - break; - - bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); - - /* - * Decrement the b_lru_ref count unless the value is already - * zero. If the value is already zero, we need to reclaim the - * buffer, otherwise it gets another trip through the LRU. - */ - if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) { - list_move_tail(&bp->b_lru, &btp->bt_lru); - continue; - } - - /* - * remove the buffer from the LRU now to avoid needing another - * lock round trip inside xfs_buf_rele(). - */ - list_move(&bp->b_lru, &dispose); - btp->bt_lru_nr--; - bp->b_lru_flags |= _XBF_LRU_DISPOSE; - } - spin_unlock(&btp->bt_lru_lock); + freed = list_lru_walk_node(&btp->bt_lru, sc->nid, xfs_buftarg_isolate, + &dispose, &nr_to_scan); while (!list_empty(&dispose)) { + struct xfs_buf *bp; bp = list_first_entry(&dispose, struct xfs_buf, b_lru); list_del_init(&bp->b_lru); xfs_buf_rele(bp); } - return btp->bt_lru_nr; + return freed; +} + +static unsigned long +xfs_buftarg_shrink_count( + struct shrinker *shrink, + struct shrink_control *sc) +{ + struct xfs_buftarg *btp = container_of(shrink, + struct xfs_buftarg, bt_shrinker); + return list_lru_count_node(&btp->bt_lru, sc->nid); } void @@ -1587,6 +1593,7 @@ xfs_free_buftarg( struct xfs_buftarg *btp) { unregister_shrinker(&btp->bt_shrinker); + list_lru_destroy(&btp->bt_lru); if (mp->m_flags & XFS_MOUNT_BARRIER) xfs_blkdev_issue_flush(btp); @@ -1660,12 +1667,16 @@ xfs_alloc_buftarg( if (!btp->bt_bdi) goto error; - INIT_LIST_HEAD(&btp->bt_lru); - spin_lock_init(&btp->bt_lru_lock); if (xfs_setsize_buftarg_early(btp, bdev)) goto error; - btp->bt_shrinker.shrink = xfs_buftarg_shrink; + + if (list_lru_init(&btp->bt_lru)) + goto error; + + btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count; + btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan; btp->bt_shrinker.seeks = DEFAULT_SEEKS; + btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE; register_shrinker(&btp->bt_shrinker); return btp; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 433a12ed7b17..e65683361017 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -25,6 +25,7 @@ #include <linux/fs.h> #include <linux/buffer_head.h> #include <linux/uio.h> +#include <linux/list_lru.h> /* * Base types @@ -59,7 +60,6 @@ typedef enum { #define _XBF_KMEM (1 << 21)/* backed by heap memory */ #define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ #define _XBF_COMPOUND (1 << 23)/* compound buffer */ -#define _XBF_LRU_DISPOSE (1 << 24)/* buffer being discarded */ typedef unsigned int xfs_buf_flags_t; @@ -78,8 +78,12 @@ typedef unsigned int xfs_buf_flags_t; { _XBF_PAGES, "PAGES" }, \ { _XBF_KMEM, "KMEM" }, \ { _XBF_DELWRI_Q, "DELWRI_Q" }, \ - { _XBF_COMPOUND, "COMPOUND" }, \ - { _XBF_LRU_DISPOSE, "LRU_DISPOSE" } + { _XBF_COMPOUND, "COMPOUND" } + +/* + * Internal state flags. + */ +#define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */ typedef struct xfs_buftarg { dev_t bt_dev; @@ -92,9 +96,7 @@ typedef struct xfs_buftarg { /* LRU control structures */ struct shrinker bt_shrinker; - struct list_head bt_lru; - spinlock_t bt_lru_lock; - unsigned int bt_lru_nr; + struct list_lru bt_lru; } xfs_buftarg_t; struct xfs_buf; @@ -137,7 +139,8 @@ typedef struct xfs_buf { * bt_lru_lock and not by b_sema */ struct list_head b_lru; /* lru list */ - xfs_buf_flags_t b_lru_flags; /* internal lru status flags */ + spinlock_t b_lock; /* internal state lock */ + unsigned int b_state; /* internal state flags */ wait_queue_head_t b_waiters; /* unpin waiters */ struct list_head b_list; struct xfs_perag *b_pag; /* contains rbtree root */ diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 3a944b198e35..88c5ea75ebf6 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -613,13 +613,27 @@ xfs_buf_item_unlock( } } } - if (clean || aborted) { - if (atomic_dec_and_test(&bip->bli_refcount)) { - ASSERT(!aborted || XFS_FORCED_SHUTDOWN(lip->li_mountp)); + + /* + * Clean buffers, by definition, cannot be in the AIL. However, aborted + * buffers may be dirty and hence in the AIL. Therefore if we are + * aborting a buffer and we've just taken the last refernce away, we + * have to check if it is in the AIL before freeing it. We need to free + * it in this case, because an aborted transaction has already shut the + * filesystem down and this is the last chance we will have to do so. + */ + if (atomic_dec_and_test(&bip->bli_refcount)) { + if (clean) + xfs_buf_item_relse(bp); + else if (aborted) { + ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp)); + if (lip->li_flags & XFS_LI_IN_AIL) { + xfs_trans_ail_delete(lip->li_ailp, lip, + SHUTDOWN_LOG_IO_ERROR); + } xfs_buf_item_relse(bp); } - } else - atomic_dec(&bip->bli_refcount); + } if (!(flags & XFS_BLI_HOLD)) xfs_buf_relse(bp); diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index d4e59a4ff59f..069537c845e5 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -635,6 +635,7 @@ xfs_da3_root_split( xfs_trans_log_buf(tp, bp, 0, size - 1); bp->b_ops = blk1->bp->b_ops; + xfs_trans_buf_copy_type(bp, blk1->bp); blk1->bp = bp; blk1->blkno = blkno; diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index 08984eeee159..1021c8356d08 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c @@ -180,6 +180,11 @@ xfs_dir3_leaf_check_int( return true; } +/* + * We verify the magic numbers before decoding the leaf header so that on debug + * kernels we don't get assertion failures in xfs_dir3_leaf_hdr_from_disk() due + * to incorrect magic numbers. + */ static bool xfs_dir3_leaf_verify( struct xfs_buf *bp, @@ -191,24 +196,25 @@ xfs_dir3_leaf_verify( ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC); - xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); if (xfs_sb_version_hascrc(&mp->m_sb)) { struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; + __uint16_t magic3; - if ((magic == XFS_DIR2_LEAF1_MAGIC && - leafhdr.magic != XFS_DIR3_LEAF1_MAGIC) || - (magic == XFS_DIR2_LEAFN_MAGIC && - leafhdr.magic != XFS_DIR3_LEAFN_MAGIC)) - return false; + magic3 = (magic == XFS_DIR2_LEAF1_MAGIC) ? XFS_DIR3_LEAF1_MAGIC + : XFS_DIR3_LEAFN_MAGIC; + if (leaf3->info.hdr.magic != cpu_to_be16(magic3)) + return false; if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_uuid)) return false; if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) return false; } else { - if (leafhdr.magic != magic) + if (leaf->hdr.info.magic != cpu_to_be16(magic)) return false; } + + xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); return xfs_dir3_leaf_check_int(mp, &leafhdr, leaf); } diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 251c66632e5e..71520e6e5d65 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -940,13 +940,8 @@ xfs_qm_dqput_final( trace_xfs_dqput_free(dqp); - mutex_lock(&qi->qi_lru_lock); - if (list_empty(&dqp->q_lru)) { - list_add_tail(&dqp->q_lru, &qi->qi_lru_list); - qi->qi_lru_count++; + if (list_lru_add(&qi->qi_lru, &dqp->q_lru)) XFS_STATS_INC(xs_qm_dquot_unused); - } - mutex_unlock(&qi->qi_lru_lock); /* * If we just added a udquot to the freelist, then we want to release diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 60c6e1f12695..e838d84b4e85 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -142,7 +142,8 @@ xfs_qm_dqunpin_wait( STATIC uint xfs_qm_dquot_logitem_push( struct xfs_log_item *lip, - struct list_head *buffer_list) + struct list_head *buffer_list) __releases(&lip->li_ailp->xa_lock) + __acquires(&lip->li_ailp->xa_lock) { struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; struct xfs_buf *bp = NULL; diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index 86f559f6e5d3..e43708e2f080 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c @@ -160,7 +160,8 @@ xfs_extent_busy_update_extent( struct xfs_extent_busy *busyp, xfs_agblock_t fbno, xfs_extlen_t flen, - bool userdata) + bool userdata) __releases(&pag->pagb_lock) + __acquires(&pag->pagb_lock) { xfs_agblock_t fend = fbno + flen; xfs_agblock_t bbno = busyp->bno; diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 16219b9c6790..193206ba4358 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -48,7 +48,7 @@ STATIC void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, /* * Allocate and initialise an xfs_inode. */ -STATIC struct xfs_inode * +struct xfs_inode * xfs_inode_alloc( struct xfs_mount *mp, xfs_ino_t ino) @@ -98,7 +98,7 @@ xfs_inode_free_callback( kmem_zone_free(xfs_inode_zone, ip); } -STATIC void +void xfs_inode_free( struct xfs_inode *ip) { @@ -1167,7 +1167,7 @@ xfs_reclaim_inodes( * them to be cleaned, which we hope will not be very long due to the * background walker having already kicked the IO off on those dirty inodes. */ -void +long xfs_reclaim_inodes_nr( struct xfs_mount *mp, int nr_to_scan) @@ -1176,7 +1176,7 @@ xfs_reclaim_inodes_nr( xfs_reclaim_work_queue(mp); xfs_ail_push_all(mp->m_ail); - xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan); + return xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan); } /* diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index 8a89f7d791bd..9ed68bb750f5 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -42,11 +42,15 @@ struct xfs_eofblocks { int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino, uint flags, uint lock_flags, xfs_inode_t **ipp); +/* recovery needs direct inode allocation capability */ +struct xfs_inode * xfs_inode_alloc(struct xfs_mount *mp, xfs_ino_t ino); +void xfs_inode_free(struct xfs_inode *ip); + void xfs_reclaim_worker(struct work_struct *work); int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); int xfs_reclaim_inodes_count(struct xfs_mount *mp); -void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); +long xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); diff --git a/fs/xfs/xfs_inode_buf.c b/fs/xfs/xfs_inode_buf.c index e011d597f12f..63382d37f565 100644 --- a/fs/xfs/xfs_inode_buf.c +++ b/fs/xfs/xfs_inode_buf.c @@ -53,9 +53,8 @@ xfs_inobp_check( i * mp->m_sb.sb_inodesize); if (!dip->di_next_unlinked) { xfs_alert(mp, - "Detected bogus zero next_unlinked field in incore inode buffer 0x%p.", - bp); - ASSERT(dip->di_next_unlinked); + "Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.", + i, (long long)bp->b_bn); } } } @@ -106,11 +105,10 @@ xfs_inode_buf_verify( XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH, mp, dip); #ifdef DEBUG - xfs_emerg(mp, + xfs_alert(mp, "bad inode magic/vsn daddr %lld #%d (magic=%x)", (unsigned long long)bp->b_bn, i, be16_to_cpu(dip->di_magic)); - ASSERT(0); #endif } } @@ -196,7 +194,7 @@ xfs_imap_to_bp( return 0; } -STATIC void +void xfs_dinode_from_disk( xfs_icdinode_t *to, xfs_dinode_t *from) diff --git a/fs/xfs/xfs_inode_buf.h b/fs/xfs/xfs_inode_buf.h index 599e6c0ca2a9..abba0ae8cf2d 100644 --- a/fs/xfs/xfs_inode_buf.h +++ b/fs/xfs/xfs_inode_buf.h @@ -32,17 +32,17 @@ struct xfs_imap { ushort im_boffset; /* inode offset in block in bytes */ }; -int xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *, - struct xfs_imap *, struct xfs_dinode **, - struct xfs_buf **, uint, uint); -int xfs_iread(struct xfs_mount *, struct xfs_trans *, - struct xfs_inode *, uint); -void xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *); -void xfs_dinode_to_disk(struct xfs_dinode *, - struct xfs_icdinode *); +int xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *, + struct xfs_imap *, struct xfs_dinode **, + struct xfs_buf **, uint, uint); +int xfs_iread(struct xfs_mount *, struct xfs_trans *, + struct xfs_inode *, uint); +void xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *); +void xfs_dinode_to_disk(struct xfs_dinode *to, struct xfs_icdinode *from); +void xfs_dinode_from_disk(struct xfs_icdinode *to, struct xfs_dinode *from); #if defined(DEBUG) -void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); +void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); #else #define xfs_inobp_check(mp, bp) #endif /* DEBUG */ diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index bdebc21078d7..668e8f4ccf5e 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -71,7 +71,7 @@ xfs_find_handle( int hsize; xfs_handle_t handle; struct inode *inode; - struct fd f = {0}; + struct fd f = {NULL}; struct path path; int error; struct xfs_inode *ip; @@ -456,12 +456,9 @@ xfs_attrlist_by_handle( if (IS_ERR(dentry)) return PTR_ERR(dentry); - kbuf = kmem_zalloc(al_hreq.buflen, KM_SLEEP | KM_MAYFAIL); - if (!kbuf) { - kbuf = kmem_zalloc_large(al_hreq.buflen); - if (!kbuf) - goto out_dput; - } + kbuf = kmem_zalloc_large(al_hreq.buflen, KM_SLEEP); + if (!kbuf) + goto out_dput; cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen, @@ -472,12 +469,9 @@ xfs_attrlist_by_handle( if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen)) error = -EFAULT; - out_kfree: - if (is_vmalloc_addr(kbuf)) - kmem_free_large(kbuf); - else - kmem_free(kbuf); - out_dput: +out_kfree: + kmem_free(kbuf); +out_dput: dput(dentry); return error; } @@ -495,12 +489,9 @@ xfs_attrmulti_attr_get( if (*len > XATTR_SIZE_MAX) return EINVAL; - kbuf = kmem_zalloc(*len, KM_SLEEP | KM_MAYFAIL); - if (!kbuf) { - kbuf = kmem_zalloc_large(*len); - if (!kbuf) - return ENOMEM; - } + kbuf = kmem_zalloc_large(*len, KM_SLEEP); + if (!kbuf) + return ENOMEM; error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags); if (error) @@ -509,11 +500,8 @@ xfs_attrmulti_attr_get( if (copy_to_user(ubuf, kbuf, *len)) error = EFAULT; - out_kfree: - if (is_vmalloc_addr(kbuf)) - kmem_free_large(kbuf); - else - kmem_free(kbuf); +out_kfree: + kmem_free(kbuf); return error; } diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index d3ab9534307f..f671f7e472ac 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -371,12 +371,9 @@ xfs_compat_attrlist_by_handle( return PTR_ERR(dentry); error = -ENOMEM; - kbuf = kmem_zalloc(al_hreq.buflen, KM_SLEEP | KM_MAYFAIL); - if (!kbuf) { - kbuf = kmem_zalloc_large(al_hreq.buflen); - if (!kbuf) - goto out_dput; - } + kbuf = kmem_zalloc_large(al_hreq.buflen, KM_SLEEP); + if (!kbuf) + goto out_dput; cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen, @@ -387,12 +384,9 @@ xfs_compat_attrlist_by_handle( if (copy_to_user(compat_ptr(al_hreq.buffer), kbuf, al_hreq.buflen)) error = -EFAULT; - out_kfree: - if (is_vmalloc_addr(kbuf)) - kmem_free_large(kbuf); - else - kmem_free(kbuf); - out_dput: +out_kfree: + kmem_free(kbuf); +out_dput: dput(dentry); return error; } diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index b93e14b86754..084b3e1741fd 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -495,7 +495,7 @@ xfs_bulkstat( /* * Done, we're either out of filesystem or space to put the data. */ - kmem_free_large(irbuf); + kmem_free(irbuf); *ubcountp = ubelem; /* * Found some inodes, return them now and return the error next time. @@ -541,8 +541,9 @@ xfs_bulkstat_single( * at the expense of the error case. */ - ino = (xfs_ino_t)*lastinop; - error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), 0, &res); + ino = *lastinop; + error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), + NULL, &res); if (error) { /* * Special case way failed, do it the "long" way diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 5372d58ef93a..a2dea108071a 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -257,7 +257,8 @@ xlog_grant_head_wait( struct xlog *log, struct xlog_grant_head *head, struct xlog_ticket *tic, - int need_bytes) + int need_bytes) __releases(&head->lock) + __acquires(&head->lock) { list_add_tail(&tic->t_queue, &head->waiters); diff --git a/fs/xfs/xfs_log_format.h b/fs/xfs/xfs_log_format.h index 31e3a06c4644..ca7e28a8ed31 100644 --- a/fs/xfs/xfs_log_format.h +++ b/fs/xfs/xfs_log_format.h @@ -474,6 +474,8 @@ typedef struct xfs_inode_log_format_64 { #define XFS_ILOG_ADATA 0x040 /* log i_af.if_data */ #define XFS_ILOG_AEXT 0x080 /* log i_af.if_extents */ #define XFS_ILOG_ABROOT 0x100 /* log i_af.i_broot */ +#define XFS_ILOG_DOWNER 0x200 /* change the data fork owner on replay */ +#define XFS_ILOG_AOWNER 0x400 /* change the attr fork owner on replay */ /* @@ -487,7 +489,8 @@ typedef struct xfs_inode_log_format_64 { #define XFS_ILOG_NONCORE (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \ XFS_ILOG_DBROOT | XFS_ILOG_DEV | \ XFS_ILOG_UUID | XFS_ILOG_ADATA | \ - XFS_ILOG_AEXT | XFS_ILOG_ABROOT) + XFS_ILOG_AEXT | XFS_ILOG_ABROOT | \ + XFS_ILOG_DOWNER | XFS_ILOG_AOWNER) #define XFS_ILOG_DFORK (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \ XFS_ILOG_DBROOT) @@ -499,7 +502,8 @@ typedef struct xfs_inode_log_format_64 { XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \ XFS_ILOG_DEV | XFS_ILOG_UUID | \ XFS_ILOG_ADATA | XFS_ILOG_AEXT | \ - XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP) + XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP | \ + XFS_ILOG_DOWNER | XFS_ILOG_AOWNER) static inline int xfs_ilog_fbroot(int w) { diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 7c0c1fdc728b..dabda9521b4b 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2014,7 +2014,7 @@ xlog_recover_get_buf_lsn( case XFS_ATTR3_RMT_MAGIC: return be64_to_cpu(((struct xfs_attr3_rmt_hdr *)blk)->rm_lsn); case XFS_SB_MAGIC: - return be64_to_cpu(((struct xfs_sb *)blk)->sb_lsn); + return be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn); default: break; } @@ -2629,6 +2629,82 @@ out_release: return error; } +/* + * Inode fork owner changes + * + * If we have been told that we have to reparent the inode fork, it's because an + * extent swap operation on a CRC enabled filesystem has been done and we are + * replaying it. We need to walk the BMBT of the appropriate fork and change the + * owners of it. + * + * The complexity here is that we don't have an inode context to work with, so + * after we've replayed the inode we need to instantiate one. This is where the + * fun begins. + * + * We are in the middle of log recovery, so we can't run transactions. That + * means we cannot use cache coherent inode instantiation via xfs_iget(), as + * that will result in the corresponding iput() running the inode through + * xfs_inactive(). If we've just replayed an inode core that changes the link + * count to zero (i.e. it's been unlinked), then xfs_inactive() will run + * transactions (bad!). + * + * So, to avoid this, we instantiate an inode directly from the inode core we've + * just recovered. We have the buffer still locked, and all we really need to + * instantiate is the inode core and the forks being modified. We can do this + * manually, then run the inode btree owner change, and then tear down the + * xfs_inode without having to run any transactions at all. + * + * Also, because we don't have a transaction context available here but need to + * gather all the buffers we modify for writeback so we pass the buffer_list + * instead for the operation to use. + */ + +STATIC int +xfs_recover_inode_owner_change( + struct xfs_mount *mp, + struct xfs_dinode *dip, + struct xfs_inode_log_format *in_f, + struct list_head *buffer_list) +{ + struct xfs_inode *ip; + int error; + + ASSERT(in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER)); + + ip = xfs_inode_alloc(mp, in_f->ilf_ino); + if (!ip) + return ENOMEM; + + /* instantiate the inode */ + xfs_dinode_from_disk(&ip->i_d, dip); + ASSERT(ip->i_d.di_version >= 3); + + error = xfs_iformat_fork(ip, dip); + if (error) + goto out_free_ip; + + + if (in_f->ilf_fields & XFS_ILOG_DOWNER) { + ASSERT(in_f->ilf_fields & XFS_ILOG_DBROOT); + error = xfs_bmbt_change_owner(NULL, ip, XFS_DATA_FORK, + ip->i_ino, buffer_list); + if (error) + goto out_free_ip; + } + + if (in_f->ilf_fields & XFS_ILOG_AOWNER) { + ASSERT(in_f->ilf_fields & XFS_ILOG_ABROOT); + error = xfs_bmbt_change_owner(NULL, ip, XFS_ATTR_FORK, + ip->i_ino, buffer_list); + if (error) + goto out_free_ip; + } + +out_free_ip: + xfs_inode_free(ip); + return error; +} + STATIC int xlog_recover_inode_pass2( struct xlog *log, @@ -2681,8 +2757,7 @@ xlog_recover_inode_pass2( error = bp->b_error; if (error) { xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#2)"); - xfs_buf_relse(bp); - goto error; + goto out_release; } ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset); @@ -2692,30 +2767,31 @@ xlog_recover_inode_pass2( * like an inode! */ if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) { - xfs_buf_relse(bp); xfs_alert(mp, "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld", __func__, dip, bp, in_f->ilf_ino); XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", XFS_ERRLEVEL_LOW, mp); error = EFSCORRUPTED; - goto error; + goto out_release; } dicp = item->ri_buf[1].i_addr; if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) { - xfs_buf_relse(bp); xfs_alert(mp, "%s: Bad inode log record, rec ptr 0x%p, ino %Ld", __func__, item, in_f->ilf_ino); XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", XFS_ERRLEVEL_LOW, mp); error = EFSCORRUPTED; - goto error; + goto out_release; } /* * If the inode has an LSN in it, recover the inode only if it's less - * than the lsn of the transaction we are replaying. + * than the lsn of the transaction we are replaying. Note: we still + * need to replay an owner change even though the inode is more recent + * than the transaction as there is no guarantee that all the btree + * blocks are more recent than this transaction, too. */ if (dip->di_version >= 3) { xfs_lsn_t lsn = be64_to_cpu(dip->di_lsn); @@ -2723,7 +2799,7 @@ xlog_recover_inode_pass2( if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { trace_xfs_log_recover_inode_skip(log, in_f); error = 0; - goto out_release; + goto out_owner_change; } } @@ -2745,10 +2821,9 @@ xlog_recover_inode_pass2( dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) { /* do nothing */ } else { - xfs_buf_relse(bp); trace_xfs_log_recover_inode_skip(log, in_f); error = 0; - goto error; + goto out_release; } } @@ -2760,13 +2835,12 @@ xlog_recover_inode_pass2( (dicp->di_format != XFS_DINODE_FMT_BTREE)) { XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", XFS_ERRLEVEL_LOW, mp, dicp); - xfs_buf_relse(bp); xfs_alert(mp, "%s: Bad regular inode log record, rec ptr 0x%p, " "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", __func__, item, dip, bp, in_f->ilf_ino); error = EFSCORRUPTED; - goto error; + goto out_release; } } else if (unlikely(S_ISDIR(dicp->di_mode))) { if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && @@ -2774,19 +2848,17 @@ xlog_recover_inode_pass2( (dicp->di_format != XFS_DINODE_FMT_LOCAL)) { XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)", XFS_ERRLEVEL_LOW, mp, dicp); - xfs_buf_relse(bp); xfs_alert(mp, "%s: Bad dir inode log record, rec ptr 0x%p, " "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", __func__, item, dip, bp, in_f->ilf_ino); error = EFSCORRUPTED; - goto error; + goto out_release; } } if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){ XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", XFS_ERRLEVEL_LOW, mp, dicp); - xfs_buf_relse(bp); xfs_alert(mp, "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " "dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", @@ -2794,29 +2866,27 @@ xlog_recover_inode_pass2( dicp->di_nextents + dicp->di_anextents, dicp->di_nblocks); error = EFSCORRUPTED; - goto error; + goto out_release; } if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", XFS_ERRLEVEL_LOW, mp, dicp); - xfs_buf_relse(bp); xfs_alert(mp, "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__, item, dip, bp, in_f->ilf_ino, dicp->di_forkoff); error = EFSCORRUPTED; - goto error; + goto out_release; } isize = xfs_icdinode_size(dicp->di_version); if (unlikely(item->ri_buf[1].i_len > isize)) { XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", XFS_ERRLEVEL_LOW, mp, dicp); - xfs_buf_relse(bp); xfs_alert(mp, "%s: Bad inode log record length %d, rec ptr 0x%p", __func__, item->ri_buf[1].i_len, item); error = EFSCORRUPTED; - goto error; + goto out_release; } /* The core is in in-core format */ @@ -2842,7 +2912,7 @@ xlog_recover_inode_pass2( } if (in_f->ilf_size == 2) - goto write_inode_buffer; + goto out_owner_change; len = item->ri_buf[2].i_len; src = item->ri_buf[2].i_addr; ASSERT(in_f->ilf_size <= 4); @@ -2903,13 +2973,15 @@ xlog_recover_inode_pass2( default: xfs_warn(log->l_mp, "%s: Invalid flag", __func__); ASSERT(0); - xfs_buf_relse(bp); error = EIO; - goto error; + goto out_release; } } -write_inode_buffer: +out_owner_change: + if (in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER)) + error = xfs_recover_inode_owner_change(mp, dip, in_f, + buffer_list); /* re-generate the checksum. */ xfs_dinode_calc_crc(log->l_mp, dip); diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 6218a0aeeeea..3e6c2e6c9cd2 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -51,8 +51,9 @@ */ STATIC int xfs_qm_init_quotainos(xfs_mount_t *); STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); -STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *); + +STATIC void xfs_qm_dqfree_one(struct xfs_dquot *dqp); /* * We use the batch lookup interface to iterate over the dquots as it * currently is the only interface into the radix tree code that allows @@ -203,12 +204,9 @@ xfs_qm_dqpurge( * We move dquots to the freelist as soon as their reference count * hits zero, so it really should be on the freelist here. */ - mutex_lock(&qi->qi_lru_lock); ASSERT(!list_empty(&dqp->q_lru)); - list_del_init(&dqp->q_lru); - qi->qi_lru_count--; + list_lru_del(&qi->qi_lru, &dqp->q_lru); XFS_STATS_DEC(xs_qm_dquot_unused); - mutex_unlock(&qi->qi_lru_lock); xfs_qm_dqdestroy(dqp); @@ -680,6 +678,143 @@ xfs_qm_calc_dquots_per_chunk( return ndquots; } +struct xfs_qm_isolate { + struct list_head buffers; + struct list_head dispose; +}; + +static enum lru_status +xfs_qm_dquot_isolate( + struct list_head *item, + spinlock_t *lru_lock, + void *arg) +{ + struct xfs_dquot *dqp = container_of(item, + struct xfs_dquot, q_lru); + struct xfs_qm_isolate *isol = arg; + + if (!xfs_dqlock_nowait(dqp)) + goto out_miss_busy; + + /* + * This dquot has acquired a reference in the meantime remove it from + * the freelist and try again. + */ + if (dqp->q_nrefs) { + xfs_dqunlock(dqp); + XFS_STATS_INC(xs_qm_dqwants); + + trace_xfs_dqreclaim_want(dqp); + list_del_init(&dqp->q_lru); + XFS_STATS_DEC(xs_qm_dquot_unused); + return LRU_REMOVED; + } + + /* + * If the dquot is dirty, flush it. If it's already being flushed, just + * skip it so there is time for the IO to complete before we try to + * reclaim it again on the next LRU pass. + */ + if (!xfs_dqflock_nowait(dqp)) { + xfs_dqunlock(dqp); + goto out_miss_busy; + } + + if (XFS_DQ_IS_DIRTY(dqp)) { + struct xfs_buf *bp = NULL; + int error; + + trace_xfs_dqreclaim_dirty(dqp); + + /* we have to drop the LRU lock to flush the dquot */ + spin_unlock(lru_lock); + + error = xfs_qm_dqflush(dqp, &bp); + if (error) { + xfs_warn(dqp->q_mount, "%s: dquot %p flush failed", + __func__, dqp); + goto out_unlock_dirty; + } + + xfs_buf_delwri_queue(bp, &isol->buffers); + xfs_buf_relse(bp); + goto out_unlock_dirty; + } + xfs_dqfunlock(dqp); + + /* + * Prevent lookups now that we are past the point of no return. + */ + dqp->dq_flags |= XFS_DQ_FREEING; + xfs_dqunlock(dqp); + + ASSERT(dqp->q_nrefs == 0); + list_move_tail(&dqp->q_lru, &isol->dispose); + XFS_STATS_DEC(xs_qm_dquot_unused); + trace_xfs_dqreclaim_done(dqp); + XFS_STATS_INC(xs_qm_dqreclaims); + return LRU_REMOVED; + +out_miss_busy: + trace_xfs_dqreclaim_busy(dqp); + XFS_STATS_INC(xs_qm_dqreclaim_misses); + return LRU_SKIP; + +out_unlock_dirty: + trace_xfs_dqreclaim_busy(dqp); + XFS_STATS_INC(xs_qm_dqreclaim_misses); + xfs_dqunlock(dqp); + spin_lock(lru_lock); + return LRU_RETRY; +} + +static unsigned long +xfs_qm_shrink_scan( + struct shrinker *shrink, + struct shrink_control *sc) +{ + struct xfs_quotainfo *qi = container_of(shrink, + struct xfs_quotainfo, qi_shrinker); + struct xfs_qm_isolate isol; + unsigned long freed; + int error; + unsigned long nr_to_scan = sc->nr_to_scan; + + if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) + return 0; + + INIT_LIST_HEAD(&isol.buffers); + INIT_LIST_HEAD(&isol.dispose); + + freed = list_lru_walk_node(&qi->qi_lru, sc->nid, xfs_qm_dquot_isolate, &isol, + &nr_to_scan); + + error = xfs_buf_delwri_submit(&isol.buffers); + if (error) + xfs_warn(NULL, "%s: dquot reclaim failed", __func__); + + while (!list_empty(&isol.dispose)) { + struct xfs_dquot *dqp; + + dqp = list_first_entry(&isol.dispose, struct xfs_dquot, q_lru); + list_del_init(&dqp->q_lru); + xfs_qm_dqfree_one(dqp); + } + + return freed; +} + +static unsigned long +xfs_qm_shrink_count( + struct shrinker *shrink, + struct shrink_control *sc) +{ + struct xfs_quotainfo *qi = container_of(shrink, + struct xfs_quotainfo, qi_shrinker); + + return list_lru_count_node(&qi->qi_lru, sc->nid); +} + /* * This initializes all the quota information that's kept in the * mount structure @@ -696,11 +831,18 @@ xfs_qm_init_quotainfo( qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); + if ((error = list_lru_init(&qinf->qi_lru))) { + kmem_free(qinf); + mp->m_quotainfo = NULL; + return error; + } + /* * See if quotainodes are setup, and if not, allocate them, * and change the superblock accordingly. */ if ((error = xfs_qm_init_quotainos(mp))) { + list_lru_destroy(&qinf->qi_lru); kmem_free(qinf); mp->m_quotainfo = NULL; return error; @@ -711,10 +853,6 @@ xfs_qm_init_quotainfo( INIT_RADIX_TREE(&qinf->qi_pquota_tree, GFP_NOFS); mutex_init(&qinf->qi_tree_lock); - INIT_LIST_HEAD(&qinf->qi_lru_list); - qinf->qi_lru_count = 0; - mutex_init(&qinf->qi_lru_lock); - /* mutex used to serialize quotaoffs */ mutex_init(&qinf->qi_quotaofflock); @@ -779,8 +917,10 @@ xfs_qm_init_quotainfo( qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT; } - qinf->qi_shrinker.shrink = xfs_qm_shake; + qinf->qi_shrinker.count_objects = xfs_qm_shrink_count; + qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan; qinf->qi_shrinker.seeks = DEFAULT_SEEKS; + qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE; register_shrinker(&qinf->qi_shrinker); return 0; } @@ -801,6 +941,7 @@ xfs_qm_destroy_quotainfo( ASSERT(qi != NULL); unregister_shrinker(&qi->qi_shrinker); + list_lru_destroy(&qi->qi_lru); if (qi->qi_uquotaip) { IRELE(qi->qi_uquotaip); @@ -1599,132 +1740,6 @@ xfs_qm_dqfree_one( xfs_qm_dqdestroy(dqp); } -STATIC void -xfs_qm_dqreclaim_one( - struct xfs_dquot *dqp, - struct list_head *buffer_list, - struct list_head *dispose_list) -{ - struct xfs_mount *mp = dqp->q_mount; - struct xfs_quotainfo *qi = mp->m_quotainfo; - int error; - - if (!xfs_dqlock_nowait(dqp)) - goto out_move_tail; - - /* - * This dquot has acquired a reference in the meantime remove it from - * the freelist and try again. - */ - if (dqp->q_nrefs) { - xfs_dqunlock(dqp); - - trace_xfs_dqreclaim_want(dqp); - XFS_STATS_INC(xs_qm_dqwants); - - list_del_init(&dqp->q_lru); - qi->qi_lru_count--; - XFS_STATS_DEC(xs_qm_dquot_unused); - return; - } - - /* - * Try to grab the flush lock. If this dquot is in the process of - * getting flushed to disk, we don't want to reclaim it. - */ - if (!xfs_dqflock_nowait(dqp)) - goto out_unlock_move_tail; - - if (XFS_DQ_IS_DIRTY(dqp)) { - struct xfs_buf *bp = NULL; - - trace_xfs_dqreclaim_dirty(dqp); - - error = xfs_qm_dqflush(dqp, &bp); - if (error) { - xfs_warn(mp, "%s: dquot %p flush failed", - __func__, dqp); - goto out_unlock_move_tail; - } - - xfs_buf_delwri_queue(bp, buffer_list); - xfs_buf_relse(bp); - /* - * Give the dquot another try on the freelist, as the - * flushing will take some time. - */ - goto out_unlock_move_tail; - } - xfs_dqfunlock(dqp); - - /* - * Prevent lookups now that we are past the point of no return. - */ - dqp->dq_flags |= XFS_DQ_FREEING; - xfs_dqunlock(dqp); - - ASSERT(dqp->q_nrefs == 0); - list_move_tail(&dqp->q_lru, dispose_list); - qi->qi_lru_count--; - XFS_STATS_DEC(xs_qm_dquot_unused); - - trace_xfs_dqreclaim_done(dqp); - XFS_STATS_INC(xs_qm_dqreclaims); - return; - - /* - * Move the dquot to the tail of the list so that we don't spin on it. - */ -out_unlock_move_tail: - xfs_dqunlock(dqp); -out_move_tail: - list_move_tail(&dqp->q_lru, &qi->qi_lru_list); - trace_xfs_dqreclaim_busy(dqp); - XFS_STATS_INC(xs_qm_dqreclaim_misses); -} - -STATIC int -xfs_qm_shake( - struct shrinker *shrink, - struct shrink_control *sc) -{ - struct xfs_quotainfo *qi = - container_of(shrink, struct xfs_quotainfo, qi_shrinker); - int nr_to_scan = sc->nr_to_scan; - LIST_HEAD (buffer_list); - LIST_HEAD (dispose_list); - struct xfs_dquot *dqp; - int error; - - if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) - return 0; - if (!nr_to_scan) - goto out; - - mutex_lock(&qi->qi_lru_lock); - while (!list_empty(&qi->qi_lru_list)) { - if (nr_to_scan-- <= 0) - break; - dqp = list_first_entry(&qi->qi_lru_list, struct xfs_dquot, - q_lru); - xfs_qm_dqreclaim_one(dqp, &buffer_list, &dispose_list); - } - mutex_unlock(&qi->qi_lru_lock); - - error = xfs_buf_delwri_submit(&buffer_list); - if (error) - xfs_warn(NULL, "%s: dquot reclaim failed", __func__); - - while (!list_empty(&dispose_list)) { - dqp = list_first_entry(&dispose_list, struct xfs_dquot, q_lru); - list_del_init(&dqp->q_lru); - xfs_qm_dqfree_one(dqp); - } - -out: - return (qi->qi_lru_count / 100) * sysctl_vfs_cache_pressure; -} - /* * Start a transaction and write the incore superblock changes to * disk. flags parameter indicates which fields have changed. diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 670cd4464070..2b602df9c242 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -49,9 +49,7 @@ typedef struct xfs_quotainfo { struct xfs_inode *qi_uquotaip; /* user quota inode */ struct xfs_inode *qi_gquotaip; /* group quota inode */ struct xfs_inode *qi_pquotaip; /* project quota inode */ - struct list_head qi_lru_list; - struct mutex qi_lru_lock; - int qi_lru_count; + struct list_lru qi_lru; int qi_dquots; time_t qi_btimelimit; /* limit for blks timer */ time_t qi_itimelimit; /* limit for inodes timer */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 979a77d4b87d..15188cc99449 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1535,19 +1535,21 @@ xfs_fs_mount( return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super); } -static int +static long xfs_fs_nr_cached_objects( - struct super_block *sb) + struct super_block *sb, + int nid) { return xfs_reclaim_inodes_count(XFS_M(sb)); } -static void +static long xfs_fs_free_cached_objects( struct super_block *sb, - int nr_to_scan) + long nr_to_scan, + int nid) { - xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan); + return xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan); } static const struct super_operations xfs_super_operations = { diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 2f2a7c005be2..f622a97a7e33 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -41,6 +41,7 @@ #include "xfs_trans_space.h" #include "xfs_trace.h" #include "xfs_symlink.h" +#include "xfs_buf_item.h" /* ----- Kernel only functions below ----- */ STATIC int @@ -363,6 +364,7 @@ xfs_symlink( pathlen -= byte_cnt; offset += byte_cnt; + xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SYMLINK_BUF); xfs_trans_log_buf(tp, bp, 0, (buf + byte_cnt - 1) - (char *)bp->b_addr); } diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index fd54a14a7c2a..3d79e513c0b3 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -12,11 +12,14 @@ {0x1002, 0x130F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x1310, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x1311, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x1312, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x1313, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x1315, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x1316, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x1317, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x131B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x131C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x131D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x3150, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \ {0x1002, 0x3151, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x3152, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ diff --git a/include/linux/aio.h b/include/linux/aio.h index 1bdf965339f9..d9c92daa3944 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -27,15 +27,13 @@ struct kiocb; */ #define KIOCB_CANCELLED ((void *) (~0ULL)) -typedef int (kiocb_cancel_fn)(struct kiocb *, struct io_event *); +typedef int (kiocb_cancel_fn)(struct kiocb *); struct kiocb { - atomic_t ki_users; - struct file *ki_filp; struct kioctx *ki_ctx; /* NULL for sync ops */ kiocb_cancel_fn *ki_cancel; - void (*ki_dtor)(struct kiocb *); + void *private; union { void __user *user; @@ -44,17 +42,7 @@ struct kiocb { __u64 ki_user_data; /* user's data for completion */ loff_t ki_pos; - - void *private; - /* State that we remember to be able to restart/retry */ - unsigned short ki_opcode; - size_t ki_nbytes; /* copy of iocb->aio_nbytes */ - char __user *ki_buf; /* remaining iocb->aio_buf */ - size_t ki_left; /* remaining bytes */ - struct iovec ki_inline_vec; /* inline vector */ - struct iovec *ki_iovec; - unsigned long ki_nr_segs; - unsigned long ki_cur_seg; + size_t ki_nbytes; /* copy of iocb->aio_nbytes */ struct list_head ki_list; /* the aio core uses this * for cancellation */ @@ -74,7 +62,6 @@ static inline bool is_sync_kiocb(struct kiocb *kiocb) static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) { *kiocb = (struct kiocb) { - .ki_users = ATOMIC_INIT(1), .ki_ctx = NULL, .ki_filp = filp, .ki_obj.tsk = current, @@ -84,7 +71,6 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) /* prototypes */ #ifdef CONFIG_AIO extern ssize_t wait_on_sync_kiocb(struct kiocb *iocb); -extern void aio_put_req(struct kiocb *iocb); extern void aio_complete(struct kiocb *iocb, long res, long res2); struct mm_struct; extern void exit_aio(struct mm_struct *mm); @@ -93,7 +79,6 @@ extern long do_io_submit(aio_context_t ctx_id, long nr, void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel); #else static inline ssize_t wait_on_sync_kiocb(struct kiocb *iocb) { return 0; } -static inline void aio_put_req(struct kiocb *iocb) { } static inline void aio_complete(struct kiocb *iocb, long res, long res2) { } struct mm_struct; static inline void exit_aio(struct mm_struct *mm) { } diff --git a/include/linux/anon_inodes.h b/include/linux/anon_inodes.h index 8013a45242fe..cf573c22b81e 100644 --- a/include/linux/anon_inodes.h +++ b/include/linux/anon_inodes.h @@ -13,6 +13,9 @@ struct file_operations; struct file *anon_inode_getfile(const char *name, const struct file_operations *fops, void *priv, int flags); +struct file *anon_inode_getfile_private(const char *name, + const struct file_operations *fops, + void *priv, int flags); int anon_inode_getfd(const char *name, const struct file_operations *fops, void *priv, int flags); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index ce6df39f60ff..8f47625a0661 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -335,6 +335,8 @@ extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req); extern void ceph_osdc_sync(struct ceph_osd_client *osdc); +extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc); + extern int ceph_osdc_readpages(struct ceph_osd_client *osdc, struct ceph_vino vino, struct ceph_file_layout *layout, diff --git a/include/linux/cpu_rmap.h b/include/linux/cpu_rmap.h index 1739510d8994..bdd18caa6c94 100644 --- a/include/linux/cpu_rmap.h +++ b/include/linux/cpu_rmap.h @@ -52,8 +52,6 @@ static inline void *cpu_rmap_lookup_obj(struct cpu_rmap *rmap, unsigned int cpu) return rmap->obj[rmap->near[cpu].index]; } -#ifdef CONFIG_GENERIC_HARDIRQS - /** * alloc_irq_cpu_rmap - allocate CPU affinity reverse-map for IRQs * @size: Number of objects to be mapped @@ -68,5 +66,4 @@ extern void free_irq_cpu_rmap(struct cpu_rmap *rmap); extern int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq); -#endif #endif /* __LINUX_CPU_RMAP_H */ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index d568f3975eeb..fcabc42d66ab 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -85,7 +85,6 @@ struct cpufreq_policy { struct list_head policy_list; struct kobject kobj; struct completion kobj_unregister; - int transition_ongoing; /* Tracks transition status */ }; /* Only for ACPI */ diff --git a/include/linux/dcache.h b/include/linux/dcache.h index feaa8d88eef7..59066e0b4ff1 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -55,11 +55,11 @@ struct qstr { #define hashlen_len(hashlen) ((u32)((hashlen) >> 32)) struct dentry_stat_t { - int nr_dentry; - int nr_unused; - int age_limit; /* age in seconds */ - int want_pages; /* pages requested by system */ - int dummy[2]; + long nr_dentry; + long nr_unused; + long age_limit; /* age in seconds */ + long want_pages; /* pages requested by system */ + long dummy[2]; }; extern struct dentry_stat_t dentry_stat; @@ -395,4 +395,8 @@ static inline bool d_mountpoint(const struct dentry *dentry) extern int sysctl_vfs_cache_pressure; +static inline unsigned long vfs_pressure_ratio(unsigned long val) +{ + return mult_frac(val, sysctl_vfs_cache_pressure, 100); +} #endif /* __LINUX_DCACHE_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 529d8711baba..3f40547ba191 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -10,6 +10,7 @@ #include <linux/stat.h> #include <linux/cache.h> #include <linux/list.h> +#include <linux/list_lru.h> #include <linux/llist.h> #include <linux/radix-tree.h> #include <linux/rbtree.h> @@ -1269,15 +1270,6 @@ struct super_block { struct list_head s_files; #endif struct list_head s_mounts; /* list of mounts; _not_ for fs use */ - /* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */ - struct list_head s_dentry_lru; /* unused dentry lru */ - int s_nr_dentry_unused; /* # of dentry on lru */ - - /* s_inode_lru_lock protects s_inode_lru and s_nr_inodes_unused */ - spinlock_t s_inode_lru_lock ____cacheline_aligned_in_smp; - struct list_head s_inode_lru; /* unused inode lru */ - int s_nr_inodes_unused; /* # of inodes on lru */ - struct block_device *s_bdev; struct backing_dev_info *s_bdi; struct mtd_info *s_mtd; @@ -1331,11 +1323,14 @@ struct super_block { /* AIO completions deferred from interrupt context */ struct workqueue_struct *s_dio_done_wq; -}; -/* superblock cache pruning functions */ -extern void prune_icache_sb(struct super_block *sb, int nr_to_scan); -extern void prune_dcache_sb(struct super_block *sb, int nr_to_scan); + /* + * Keep the lru lists last in the structure so they always sit on their + * own individual cachelines. + */ + struct list_lru s_dentry_lru ____cacheline_aligned_in_smp; + struct list_lru s_inode_lru ____cacheline_aligned_in_smp; +}; extern struct timespec current_fs_time(struct super_block *sb); @@ -1629,8 +1624,8 @@ struct super_operations { ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); #endif int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); - int (*nr_cached_objects)(struct super_block *); - void (*free_cached_objects)(struct super_block *, int); + long (*nr_cached_objects)(struct super_block *, int); + long (*free_cached_objects)(struct super_block *, long, int); }; /* @@ -2074,6 +2069,7 @@ extern struct super_block *freeze_bdev(struct block_device *); extern void emergency_thaw_all(void); extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); extern int fsync_bdev(struct block_device *); +extern int sb_is_blkdev_sb(struct super_block *sb); #else static inline void bd_forget(struct inode *inode) {} static inline int sync_blockdev(struct block_device *bdev) { return 0; } @@ -2093,6 +2089,11 @@ static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) static inline void iterate_bdevs(void (*f)(struct block_device *, void *), void *arg) { } + +static inline int sb_is_blkdev_sb(struct super_block *sb) +{ + return 0; +} #endif extern int sync_filesystem(struct super_block *); extern const struct file_operations def_blk_fops; @@ -2494,7 +2495,6 @@ extern const struct file_operations generic_ro_fops; #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) extern int vfs_readlink(struct dentry *, char __user *, int, const char *); -extern int vfs_follow_link(struct nameidata *, const char *); extern int page_readlink(struct dentry *, char __user *, int); extern void *page_follow_link_light(struct dentry *, struct nameidata *); extern void page_put_link(struct dentry *, struct nameidata *, void *); diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index 2b93a9a5a1e6..0efc3e62843a 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h @@ -39,17 +39,6 @@ static inline void get_fs_pwd(struct fs_struct *fs, struct path *pwd) spin_unlock(&fs->lock); } -static inline void get_fs_root_and_pwd(struct fs_struct *fs, struct path *root, - struct path *pwd) -{ - spin_lock(&fs->lock); - *root = fs->root; - path_get(root); - *pwd = fs->pwd; - path_get(pwd); - spin_unlock(&fs->lock); -} - extern bool current_chrooted(void); #endif /* _LINUX_FS_STRUCT_H */ diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index ccfe17c5c8da..1e041063b226 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -7,11 +7,7 @@ #include <linux/vtime.h> -#if defined(CONFIG_SMP) || defined(CONFIG_GENERIC_HARDIRQS) extern void synchronize_irq(unsigned int irq); -#else -# define synchronize_irq(irq) barrier() -#endif #if defined(CONFIG_TINY_RCU) diff --git a/include/linux/hid.h b/include/linux/hid.h index ee1ffc5e19c9..31b9d299ef6c 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -756,6 +756,10 @@ u8 *hid_alloc_report_buf(struct hid_report *report, gfp_t flags); struct hid_device *hid_allocate_device(void); struct hid_report *hid_register_report(struct hid_device *device, unsigned type, unsigned id); int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size); +struct hid_report *hid_validate_values(struct hid_device *hid, + unsigned int type, unsigned int id, + unsigned int field_index, + unsigned int report_counts); int hid_open_report(struct hid_device *device); int hid_check_keys_pressed(struct hid_device *hid); int hid_connect(struct hid_device *hid, unsigned int connect_mask); diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index b60de92e2edc..3935428c57cf 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -96,9 +96,6 @@ extern int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma, unsigned long addr, unsigned long end); -extern int handle_pte_fault(struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long address, - pte_t *pte, pmd_t *pmd, unsigned int flags); extern int split_huge_page_to_list(struct page *page, struct list_head *list); static inline int split_huge_page(struct page *page) { diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 5fa5afeeb759..5e865b554940 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -120,7 +120,6 @@ struct irqaction { extern irqreturn_t no_action(int cpl, void *dev_id); -#ifdef CONFIG_GENERIC_HARDIRQS extern int __must_check request_threaded_irq(unsigned int irq, irq_handler_t handler, irq_handler_t thread_fn, @@ -140,40 +139,6 @@ request_any_context_irq(unsigned int irq, irq_handler_t handler, extern int __must_check request_percpu_irq(unsigned int irq, irq_handler_t handler, const char *devname, void __percpu *percpu_dev_id); -#else - -extern int __must_check -request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags, - const char *name, void *dev); - -/* - * Special function to avoid ifdeffery in kernel/irq/devres.c which - * gets magically built by GENERIC_HARDIRQS=n architectures (sparc, - * m68k). I really love these $@%#!* obvious Makefile references: - * ../../../kernel/irq/devres.o - */ -static inline int __must_check -request_threaded_irq(unsigned int irq, irq_handler_t handler, - irq_handler_t thread_fn, - unsigned long flags, const char *name, void *dev) -{ - return request_irq(irq, handler, flags, name, dev); -} - -static inline int __must_check -request_any_context_irq(unsigned int irq, irq_handler_t handler, - unsigned long flags, const char *name, void *dev_id) -{ - return request_irq(irq, handler, flags, name, dev_id); -} - -static inline int __must_check -request_percpu_irq(unsigned int irq, irq_handler_t handler, - const char *devname, void __percpu *percpu_dev_id) -{ - return request_irq(irq, handler, 0, devname, percpu_dev_id); -} -#endif extern void free_irq(unsigned int, void *); extern void free_percpu_irq(unsigned int, void __percpu *); @@ -221,7 +186,6 @@ extern void enable_irq(unsigned int irq); extern void enable_percpu_irq(unsigned int irq, unsigned int type); /* The following three functions are for the core kernel use only. */ -#ifdef CONFIG_GENERIC_HARDIRQS extern void suspend_device_irqs(void); extern void resume_device_irqs(void); #ifdef CONFIG_PM_SLEEP @@ -229,13 +193,8 @@ extern int check_wakeup_irqs(void); #else static inline int check_wakeup_irqs(void) { return 0; } #endif -#else -static inline void suspend_device_irqs(void) { }; -static inline void resume_device_irqs(void) { }; -static inline int check_wakeup_irqs(void) { return 0; } -#endif -#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS) +#if defined(CONFIG_SMP) extern cpumask_var_t irq_default_affinity; @@ -287,9 +246,8 @@ static inline int irq_set_affinity_hint(unsigned int irq, { return -EINVAL; } -#endif /* CONFIG_SMP && CONFIG_GENERIC_HARDIRQS */ +#endif /* CONFIG_SMP */ -#ifdef CONFIG_GENERIC_HARDIRQS /* * Special lockdep variants of irq disabling/enabling. * These should be used for locking constructs that @@ -354,33 +312,6 @@ static inline int disable_irq_wake(unsigned int irq) return irq_set_irq_wake(irq, 0); } -#else /* !CONFIG_GENERIC_HARDIRQS */ -/* - * NOTE: non-genirq architectures, if they want to support the lock - * validator need to define the methods below in their asm/irq.h - * files, under an #ifdef CONFIG_LOCKDEP section. - */ -#ifndef CONFIG_LOCKDEP -# define disable_irq_nosync_lockdep(irq) disable_irq_nosync(irq) -# define disable_irq_nosync_lockdep_irqsave(irq, flags) \ - disable_irq_nosync(irq) -# define disable_irq_lockdep(irq) disable_irq(irq) -# define enable_irq_lockdep(irq) enable_irq(irq) -# define enable_irq_lockdep_irqrestore(irq, flags) \ - enable_irq(irq) -# endif - -static inline int enable_irq_wake(unsigned int irq) -{ - return 0; -} - -static inline int disable_irq_wake(unsigned int irq) -{ - return 0; -} -#endif /* CONFIG_GENERIC_HARDIRQS */ - #ifdef CONFIG_IRQ_FORCED_THREADING extern bool force_irqthreads; @@ -655,7 +586,7 @@ void tasklet_hrtimer_cancel(struct tasklet_hrtimer *ttimer) * if more than one irq occurred. */ -#if defined(CONFIG_GENERIC_HARDIRQS) && !defined(CONFIG_GENERIC_IRQ_PROBE) +#if !defined(CONFIG_GENERIC_IRQ_PROBE) static inline unsigned long probe_irq_on(void) { return 0; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 3aeb7305e2f5..7ea319e95b47 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -58,10 +58,26 @@ struct iommu_domain { #define IOMMU_CAP_CACHE_COHERENCY 0x1 #define IOMMU_CAP_INTR_REMAP 0x2 /* isolates device intrs */ +/* + * Following constraints are specifc to FSL_PAMUV1: + * -aperture must be power of 2, and naturally aligned + * -number of windows must be power of 2, and address space size + * of each window is determined by aperture size / # of windows + * -the actual size of the mapped region of a window must be power + * of 2 starting with 4KB and physical address must be naturally + * aligned. + * DOMAIN_ATTR_FSL_PAMUV1 corresponds to the above mentioned contraints. + * The caller can invoke iommu_domain_get_attr to check if the underlying + * iommu implementation supports these constraints. + */ + enum iommu_attr { DOMAIN_ATTR_GEOMETRY, DOMAIN_ATTR_PAGING, DOMAIN_ATTR_WINDOWS, + DOMAIN_ATTR_FSL_PAMU_STASH, + DOMAIN_ATTR_FSL_PAMU_ENABLE, + DOMAIN_ATTR_FSL_PAMUV1, DOMAIN_ATTR_MAX, }; diff --git a/include/linux/irq.h b/include/linux/irq.h index f04d3ba335cb..56bb0dc8b7d4 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -382,8 +382,6 @@ extern void irq_cpu_online(void); extern void irq_cpu_offline(void); extern int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *cpumask); -#ifdef CONFIG_GENERIC_HARDIRQS - #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ) void irq_move_irq(struct irq_data *data); void irq_move_masked_irq(struct irq_data *data); @@ -802,11 +800,4 @@ static inline void irq_gc_lock(struct irq_chip_generic *gc) { } static inline void irq_gc_unlock(struct irq_chip_generic *gc) { } #endif -#else /* !CONFIG_GENERIC_HARDIRQS */ - -extern struct msi_desc *irq_get_msi_desc(unsigned int irq); -extern int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry); - -#endif /* CONFIG_GENERIC_HARDIRQS */ - #endif /* _LINUX_IRQ_H */ diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 623325e2ff97..56fb646909dc 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -76,8 +76,6 @@ struct irq_desc { extern struct irq_desc irq_desc[NR_IRQS]; #endif -#ifdef CONFIG_GENERIC_HARDIRQS - static inline struct irq_data *irq_desc_get_irq_data(struct irq_desc *desc) { return &desc->irq_data; @@ -173,6 +171,5 @@ __irq_set_preflow_handler(unsigned int irq, irq_preflow_handler_t handler) desc->preflow_handler = handler; } #endif -#endif #endif diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index 0a2dc46cdaf6..fdd5cc16c9c4 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -4,23 +4,6 @@ #include <uapi/linux/irqnr.h> -#ifndef CONFIG_GENERIC_HARDIRQS -#include <asm/irq.h> - -/* - * Wrappers for non-genirq architectures: - */ -#define nr_irqs NR_IRQS -#define irq_to_desc(irq) (&irq_desc[irq]) - -# define for_each_irq_desc(irq, desc) \ - for (irq = 0; irq < nr_irqs; irq++) - -# define for_each_irq_desc_reverse(irq, desc) \ - for (irq = nr_irqs - 1; irq >= 0; irq--) - -#else /* CONFIG_GENERIC_HARDIRQS */ - extern int nr_irqs; extern struct irq_desc *irq_to_desc(unsigned int irq); unsigned int irq_get_next_irq(unsigned int offset); @@ -50,8 +33,6 @@ unsigned int irq_get_next_irq(unsigned int offset); for (irq = irq_get_next_irq(0); irq < nr_irqs; \ irq = irq_get_next_irq(irq + 1)) -#endif /* CONFIG_GENERIC_HARDIRQS */ - #define for_each_irq_nr(irq) \ for (irq = 0; irq < nr_irqs; irq++) diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index ed5f6ed6eb77..51c72be4a7c3 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -36,9 +36,6 @@ struct kernel_cpustat { }; struct kernel_stat { -#ifndef CONFIG_GENERIC_HARDIRQS - unsigned int irqs[NR_IRQS]; -#endif unsigned long irqs_sum; unsigned int softirqs[NR_SOFTIRQS]; }; @@ -54,22 +51,6 @@ DECLARE_PER_CPU(struct kernel_cpustat, kernel_cpustat); extern unsigned long long nr_context_switches(void); -#ifndef CONFIG_GENERIC_HARDIRQS - -struct irq_desc; - -static inline void kstat_incr_irqs_this_cpu(unsigned int irq, - struct irq_desc *desc) -{ - __this_cpu_inc(kstat.irqs[irq]); - __this_cpu_inc(kstat.irqs_sum); -} - -static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) -{ - return kstat_cpu(cpu).irqs[irq]; -} -#else #include <linux/irq.h> extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu); @@ -79,8 +60,6 @@ do { \ __this_cpu_inc(kstat.irqs_sum); \ } while (0) -#endif - static inline void kstat_incr_softirqs_this_cpu(unsigned int irq) { __this_cpu_inc(kstat.softirqs[irq]); @@ -94,20 +73,7 @@ static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu) /* * Number of interrupts per specific IRQ source, since bootup */ -#ifndef CONFIG_GENERIC_HARDIRQS -static inline unsigned int kstat_irqs(unsigned int irq) -{ - unsigned int sum = 0; - int cpu; - - for_each_possible_cpu(cpu) - sum += kstat_irqs_cpu(irq, cpu); - - return sum; -} -#else extern unsigned int kstat_irqs(unsigned int irq); -#endif /* * Number of interrupts per cpu, since bootup diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ca645a01d37a..0fbbc7aa02cb 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -533,6 +533,7 @@ int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); +unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable); unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h new file mode 100644 index 000000000000..3ce541753c88 --- /dev/null +++ b/include/linux/list_lru.h @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. + * Authors: David Chinner and Glauber Costa + * + * Generic LRU infrastructure + */ +#ifndef _LRU_LIST_H +#define _LRU_LIST_H + +#include <linux/list.h> +#include <linux/nodemask.h> + +/* list_lru_walk_cb has to always return one of those */ +enum lru_status { + LRU_REMOVED, /* item removed from list */ + LRU_ROTATE, /* item referenced, give another pass */ + LRU_SKIP, /* item cannot be locked, skip */ + LRU_RETRY, /* item not freeable. May drop the lock + internally, but has to return locked. */ +}; + +struct list_lru_node { + spinlock_t lock; + struct list_head list; + /* kept as signed so we can catch imbalance bugs */ + long nr_items; +} ____cacheline_aligned_in_smp; + +struct list_lru { + struct list_lru_node *node; + nodemask_t active_nodes; +}; + +void list_lru_destroy(struct list_lru *lru); +int list_lru_init(struct list_lru *lru); + +/** + * list_lru_add: add an element to the lru list's tail + * @list_lru: the lru pointer + * @item: the item to be added. + * + * If the element is already part of a list, this function returns doing + * nothing. Therefore the caller does not need to keep state about whether or + * not the element already belongs in the list and is allowed to lazy update + * it. Note however that this is valid for *a* list, not *this* list. If + * the caller organize itself in a way that elements can be in more than + * one type of list, it is up to the caller to fully remove the item from + * the previous list (with list_lru_del() for instance) before moving it + * to @list_lru + * + * Return value: true if the list was updated, false otherwise + */ +bool list_lru_add(struct list_lru *lru, struct list_head *item); + +/** + * list_lru_del: delete an element to the lru list + * @list_lru: the lru pointer + * @item: the item to be deleted. + * + * This function works analogously as list_lru_add in terms of list + * manipulation. The comments about an element already pertaining to + * a list are also valid for list_lru_del. + * + * Return value: true if the list was updated, false otherwise + */ +bool list_lru_del(struct list_lru *lru, struct list_head *item); + +/** + * list_lru_count_node: return the number of objects currently held by @lru + * @lru: the lru pointer. + * @nid: the node id to count from. + * + * Always return a non-negative number, 0 for empty lists. There is no + * guarantee that the list is not updated while the count is being computed. + * Callers that want such a guarantee need to provide an outer lock. + */ +unsigned long list_lru_count_node(struct list_lru *lru, int nid); +static inline unsigned long list_lru_count(struct list_lru *lru) +{ + long count = 0; + int nid; + + for_each_node_mask(nid, lru->active_nodes) + count += list_lru_count_node(lru, nid); + + return count; +} + +typedef enum lru_status +(*list_lru_walk_cb)(struct list_head *item, spinlock_t *lock, void *cb_arg); +/** + * list_lru_walk_node: walk a list_lru, isolating and disposing freeable items. + * @lru: the lru pointer. + * @nid: the node id to scan from. + * @isolate: callback function that is resposible for deciding what to do with + * the item currently being scanned + * @cb_arg: opaque type that will be passed to @isolate + * @nr_to_walk: how many items to scan. + * + * This function will scan all elements in a particular list_lru, calling the + * @isolate callback for each of those items, along with the current list + * spinlock and a caller-provided opaque. The @isolate callback can choose to + * drop the lock internally, but *must* return with the lock held. The callback + * will return an enum lru_status telling the list_lru infrastructure what to + * do with the object being scanned. + * + * Please note that nr_to_walk does not mean how many objects will be freed, + * just how many objects will be scanned. + * + * Return value: the number of objects effectively removed from the LRU. + */ +unsigned long list_lru_walk_node(struct list_lru *lru, int nid, + list_lru_walk_cb isolate, void *cb_arg, + unsigned long *nr_to_walk); + +static inline unsigned long +list_lru_walk(struct list_lru *lru, list_lru_walk_cb isolate, + void *cb_arg, unsigned long nr_to_walk) +{ + long isolated = 0; + int nid; + + for_each_node_mask(nid, lru->active_nodes) { + isolated += list_lru_walk_node(lru, nid, isolate, + cb_arg, &nr_to_walk); + if (nr_to_walk <= 0) + break; + } + return isolated; +} +#endif /* _LRU_LIST_H */ diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 6c416092e324..60e95872da29 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -30,9 +30,21 @@ struct page; struct mm_struct; struct kmem_cache; -/* Stats that can be updated by kernel. */ -enum mem_cgroup_page_stat_item { - MEMCG_NR_FILE_MAPPED, /* # of pages charged as file rss */ +/* + * The corresponding mem_cgroup_stat_names is defined in mm/memcontrol.c, + * These two lists should keep in accord with each other. + */ +enum mem_cgroup_stat_index { + /* + * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss. + */ + MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */ + MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */ + MEM_CGROUP_STAT_RSS_HUGE, /* # of pages charged as anon huge */ + MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */ + MEM_CGROUP_STAT_WRITEBACK, /* # of pages under writeback */ + MEM_CGROUP_STAT_SWAP, /* # of pages, swapped out */ + MEM_CGROUP_STAT_NSTATS, }; struct mem_cgroup_reclaim_cookie { @@ -41,6 +53,23 @@ struct mem_cgroup_reclaim_cookie { unsigned int generation; }; +enum mem_cgroup_filter_t { + VISIT, /* visit current node */ + SKIP, /* skip the current node and continue traversal */ + SKIP_TREE, /* skip the whole subtree and continue traversal */ +}; + +/* + * mem_cgroup_filter_t predicate might instruct mem_cgroup_iter_cond how to + * iterate through the hierarchy tree. Each tree element is checked by the + * predicate before it is returned by the iterator. If a filter returns + * SKIP or SKIP_TREE then the iterator code continues traversal (with the + * next node down the hierarchy or the next node that doesn't belong under the + * memcg's subtree). + */ +typedef enum mem_cgroup_filter_t +(*mem_cgroup_iter_filter)(struct mem_cgroup *memcg, struct mem_cgroup *root); + #ifdef CONFIG_MEMCG /* * All "charge" functions with gfp_mask should use GFP_KERNEL or @@ -108,9 +137,18 @@ mem_cgroup_prepare_migration(struct page *page, struct page *newpage, extern void mem_cgroup_end_migration(struct mem_cgroup *memcg, struct page *oldpage, struct page *newpage, bool migration_ok); -struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *, - struct mem_cgroup *, - struct mem_cgroup_reclaim_cookie *); +struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root, + struct mem_cgroup *prev, + struct mem_cgroup_reclaim_cookie *reclaim, + mem_cgroup_iter_filter cond); + +static inline struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, + struct mem_cgroup *prev, + struct mem_cgroup_reclaim_cookie *reclaim) +{ + return mem_cgroup_iter_cond(root, prev, reclaim, NULL); +} + void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); /* @@ -125,6 +163,48 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, extern void mem_cgroup_replace_page_cache(struct page *oldpage, struct page *newpage); +/** + * mem_cgroup_toggle_oom - toggle the memcg OOM killer for the current task + * @new: true to enable, false to disable + * + * Toggle whether a failed memcg charge should invoke the OOM killer + * or just return -ENOMEM. Returns the previous toggle state. + * + * NOTE: Any path that enables the OOM killer before charging must + * call mem_cgroup_oom_synchronize() afterward to finalize the + * OOM handling and clean up. + */ +static inline bool mem_cgroup_toggle_oom(bool new) +{ + bool old; + + old = current->memcg_oom.may_oom; + current->memcg_oom.may_oom = new; + + return old; +} + +static inline void mem_cgroup_enable_oom(void) +{ + bool old = mem_cgroup_toggle_oom(true); + + WARN_ON(old == true); +} + +static inline void mem_cgroup_disable_oom(void) +{ + bool old = mem_cgroup_toggle_oom(false); + + WARN_ON(old == false); +} + +static inline bool task_in_memcg_oom(struct task_struct *p) +{ + return p->memcg_oom.in_memcg_oom; +} + +bool mem_cgroup_oom_synchronize(void); + #ifdef CONFIG_MEMCG_SWAP extern int do_swap_account; #endif @@ -165,24 +245,24 @@ static inline void mem_cgroup_end_update_page_stat(struct page *page, } void mem_cgroup_update_page_stat(struct page *page, - enum mem_cgroup_page_stat_item idx, + enum mem_cgroup_stat_index idx, int val); static inline void mem_cgroup_inc_page_stat(struct page *page, - enum mem_cgroup_page_stat_item idx) + enum mem_cgroup_stat_index idx) { mem_cgroup_update_page_stat(page, idx, 1); } static inline void mem_cgroup_dec_page_stat(struct page *page, - enum mem_cgroup_page_stat_item idx) + enum mem_cgroup_stat_index idx) { mem_cgroup_update_page_stat(page, idx, -1); } -unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, - gfp_t gfp_mask, - unsigned long *total_scanned); +enum mem_cgroup_filter_t +mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg, + struct mem_cgroup *root); void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx); static inline void mem_cgroup_count_vm_event(struct mm_struct *mm, @@ -296,6 +376,15 @@ static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg, struct page *oldpage, struct page *newpage, bool migration_ok) { } +static inline struct mem_cgroup * +mem_cgroup_iter_cond(struct mem_cgroup *root, + struct mem_cgroup *prev, + struct mem_cgroup_reclaim_cookie *reclaim, + mem_cgroup_iter_filter cond) +{ + /* first call must return non-NULL, second return NULL */ + return (struct mem_cgroup *)(unsigned long)!prev; +} static inline struct mem_cgroup * mem_cgroup_iter(struct mem_cgroup *root, @@ -348,22 +437,45 @@ static inline void mem_cgroup_end_update_page_stat(struct page *page, { } +static inline bool mem_cgroup_toggle_oom(bool new) +{ + return false; +} + +static inline void mem_cgroup_enable_oom(void) +{ +} + +static inline void mem_cgroup_disable_oom(void) +{ +} + +static inline bool task_in_memcg_oom(struct task_struct *p) +{ + return false; +} + +static inline bool mem_cgroup_oom_synchronize(void) +{ + return false; +} + static inline void mem_cgroup_inc_page_stat(struct page *page, - enum mem_cgroup_page_stat_item idx) + enum mem_cgroup_stat_index idx) { } static inline void mem_cgroup_dec_page_stat(struct page *page, - enum mem_cgroup_page_stat_item idx) + enum mem_cgroup_stat_index idx) { } static inline -unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, - gfp_t gfp_mask, - unsigned long *total_scanned) +enum mem_cgroup_filter_t +mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg, + struct mem_cgroup *root) { - return 0; + return VISIT; } static inline void mem_cgroup_split_huge_fixup(struct page *head) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 6fe521420631..8d3c57fdf221 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -53,6 +53,9 @@ extern int migrate_vmas(struct mm_struct *mm, extern void migrate_page_copy(struct page *newpage, struct page *page); extern int migrate_huge_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page); +extern int migrate_page_move_mapping(struct address_space *mapping, + struct page *newpage, struct page *page, + struct buffer_head *head, enum migrate_mode mode); #else static inline void putback_lru_pages(struct list_head *l) {} diff --git a/include/linux/mm.h b/include/linux/mm.h index caf543c7eaa7..8b6e55ee8855 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -176,6 +176,7 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */ #define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */ #define FAULT_FLAG_TRIED 0x40 /* second try */ +#define FAULT_FLAG_USER 0x80 /* The fault originated in userspace */ /* * vm_fault is filled by the the pagefault handler and passed to the vma's @@ -876,11 +877,12 @@ static inline int page_mapped(struct page *page) #define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */ #define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ #define VM_FAULT_RETRY 0x0400 /* ->fault blocked, must retry */ +#define VM_FAULT_FALLBACK 0x0800 /* huge page fault failed, fall back to small */ #define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */ #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | \ - VM_FAULT_HWPOISON_LARGE) + VM_FAULT_FALLBACK | VM_FAULT_HWPOISON_LARGE) /* Encode hstate index for a hwpoisoned large page */ #define VM_FAULT_SET_HINDEX(x) ((x) << 12) @@ -984,7 +986,7 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, unmap_mapping_range(mapping, holebegin, holelen, 0); } -extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new); +extern void truncate_pagecache(struct inode *inode, loff_t new); extern void truncate_setsize(struct inode *inode, loff_t newsize); void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end); int truncate_inode_page(struct address_space *mapping, struct page *page); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index faf4b7c1ad12..d9851eeb6e1d 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -322,6 +322,7 @@ struct mm_rss_stat { atomic_long_t count[NR_MM_COUNTERS]; }; +struct kioctx_table; struct mm_struct { struct vm_area_struct * mmap; /* list of VMAs */ struct rb_root mm_rb; @@ -383,8 +384,8 @@ struct mm_struct { struct core_state *core_state; /* coredumping support */ #ifdef CONFIG_AIO - spinlock_t ioctx_lock; - struct hlist_head ioctx_list; + spinlock_t ioctx_lock; + struct kioctx_table __rcu *ioctx_table; #endif #ifdef CONFIG_MM_OWNER /* diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index bc95b2b391bf..97fbecdd7a40 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -758,6 +758,7 @@ #define PCI_DEVICE_ID_HP_CISSE 0x323a #define PCI_DEVICE_ID_HP_CISSF 0x323b #define PCI_DEVICE_ID_HP_CISSH 0x323c +#define PCI_DEVICE_ID_HP_CISSI 0x3239 #define PCI_DEVICE_ID_HP_ZX2_IOC 0x4031 #define PCI_VENDOR_ID_PCTECH 0x1042 diff --git a/include/linux/percpu_ida.h b/include/linux/percpu_ida.h new file mode 100644 index 000000000000..0b23edbee309 --- /dev/null +++ b/include/linux/percpu_ida.h @@ -0,0 +1,60 @@ +#ifndef __PERCPU_IDA_H__ +#define __PERCPU_IDA_H__ + +#include <linux/types.h> +#include <linux/bitops.h> +#include <linux/init.h> +#include <linux/spinlock_types.h> +#include <linux/wait.h> +#include <linux/cpumask.h> + +struct percpu_ida_cpu; + +struct percpu_ida { + /* + * number of tags available to be allocated, as passed to + * percpu_ida_init() + */ + unsigned nr_tags; + + struct percpu_ida_cpu __percpu *tag_cpu; + + /* + * Bitmap of cpus that (may) have tags on their percpu freelists: + * steal_tags() uses this to decide when to steal tags, and which cpus + * to try stealing from. + * + * It's ok for a freelist to be empty when its bit is set - steal_tags() + * will just keep looking - but the bitmap _must_ be set whenever a + * percpu freelist does have tags. + */ + cpumask_t cpus_have_tags; + + struct { + spinlock_t lock; + /* + * When we go to steal tags from another cpu (see steal_tags()), + * we want to pick a cpu at random. Cycling through them every + * time we steal is a bit easier and more or less equivalent: + */ + unsigned cpu_last_stolen; + + /* For sleeping on allocation failure */ + wait_queue_head_t wait; + + /* + * Global freelist - it's a stack where nr_free points to the + * top + */ + unsigned nr_free; + unsigned *freelist; + } ____cacheline_aligned_in_smp; +}; + +int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp); +void percpu_ida_free(struct percpu_ida *pool, unsigned tag); + +void percpu_ida_destroy(struct percpu_ida *pool); +int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags); + +#endif /* __PERCPU_IDA_H__ */ diff --git a/include/linux/platform_data/exynos_thermal.h b/include/linux/platform_data/exynos_thermal.h deleted file mode 100644 index da7e6274b175..000000000000 --- a/include/linux/platform_data/exynos_thermal.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * exynos_thermal.h - Samsung EXYNOS TMU (Thermal Management Unit) - * - * Copyright (C) 2011 Samsung Electronics - * Donggeun Kim <dg77.kim@samsung.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _LINUX_EXYNOS_THERMAL_H -#define _LINUX_EXYNOS_THERMAL_H -#include <linux/cpu_cooling.h> - -enum calibration_type { - TYPE_ONE_POINT_TRIMMING, - TYPE_TWO_POINT_TRIMMING, - TYPE_NONE, -}; - -enum soc_type { - SOC_ARCH_EXYNOS4210 = 1, - SOC_ARCH_EXYNOS, -}; -/** - * struct freq_clip_table - * @freq_clip_max: maximum frequency allowed for this cooling state. - * @temp_level: Temperature level at which the temperature clipping will - * happen. - * @mask_val: cpumask of the allowed cpu's where the clipping will take place. - * - * This structure is required to be filled and passed to the - * cpufreq_cooling_unregister function. - */ -struct freq_clip_table { - unsigned int freq_clip_max; - unsigned int temp_level; - const struct cpumask *mask_val; -}; - -/** - * struct exynos_tmu_platform_data - * @threshold: basic temperature for generating interrupt - * 25 <= threshold <= 125 [unit: degree Celsius] - * @threshold_falling: differntial value for setting threshold - * of temperature falling interrupt. - * @trigger_levels: array for each interrupt levels - * [unit: degree Celsius] - * 0: temperature for trigger_level0 interrupt - * condition for trigger_level0 interrupt: - * current temperature > threshold + trigger_levels[0] - * 1: temperature for trigger_level1 interrupt - * condition for trigger_level1 interrupt: - * current temperature > threshold + trigger_levels[1] - * 2: temperature for trigger_level2 interrupt - * condition for trigger_level2 interrupt: - * current temperature > threshold + trigger_levels[2] - * 3: temperature for trigger_level3 interrupt - * condition for trigger_level3 interrupt: - * current temperature > threshold + trigger_levels[3] - * @trigger_level0_en: - * 1 = enable trigger_level0 interrupt, - * 0 = disable trigger_level0 interrupt - * @trigger_level1_en: - * 1 = enable trigger_level1 interrupt, - * 0 = disable trigger_level1 interrupt - * @trigger_level2_en: - * 1 = enable trigger_level2 interrupt, - * 0 = disable trigger_level2 interrupt - * @trigger_level3_en: - * 1 = enable trigger_level3 interrupt, - * 0 = disable trigger_level3 interrupt - * @gain: gain of amplifier in the positive-TC generator block - * 0 <= gain <= 15 - * @reference_voltage: reference voltage of amplifier - * in the positive-TC generator block - * 0 <= reference_voltage <= 31 - * @noise_cancel_mode: noise cancellation mode - * 000, 100, 101, 110 and 111 can be different modes - * @type: determines the type of SOC - * @efuse_value: platform defined fuse value - * @cal_type: calibration type for temperature - * @freq_clip_table: Table representing frequency reduction percentage. - * @freq_tab_count: Count of the above table as frequency reduction may - * applicable to only some of the trigger levels. - * - * This structure is required for configuration of exynos_tmu driver. - */ -struct exynos_tmu_platform_data { - u8 threshold; - u8 threshold_falling; - u8 trigger_levels[4]; - bool trigger_level0_en; - bool trigger_level1_en; - bool trigger_level2_en; - bool trigger_level3_en; - - u8 gain; - u8 reference_voltage; - u8 noise_cancel_mode; - u32 efuse_value; - - enum calibration_type cal_type; - enum soc_type type; - struct freq_clip_table freq_tab[4]; - unsigned int freq_tab_count; -}; -#endif /* _LINUX_EXYNOS_THERMAL_H */ diff --git a/include/linux/platform_data/leds-lp55xx.h b/include/linux/platform_data/leds-lp55xx.h index 202e290faea8..51a2ff579d60 100644 --- a/include/linux/platform_data/leds-lp55xx.h +++ b/include/linux/platform_data/leds-lp55xx.h @@ -36,6 +36,13 @@ struct lp55xx_predef_pattern { u8 size_b; }; +enum lp8501_pwr_sel { + LP8501_ALL_VDD, /* D1~9 are connected to VDD */ + LP8501_6VDD_3VOUT, /* D1~6 with VDD, D7~9 with VOUT */ + LP8501_3VDD_6VOUT, /* D1~6 with VOUT, D7~9 with VDD */ + LP8501_ALL_VOUT, /* D1~9 are connected to VOUT */ +}; + /* * struct lp55xx_platform_data * @led_config : Configurable led class device @@ -67,6 +74,9 @@ struct lp55xx_platform_data { /* Predefined pattern data */ struct lp55xx_predef_pattern *patterns; unsigned int num_patterns; + + /* LP8501 specific */ + enum lp8501_pwr_sel pwr_sel; }; #endif /* _LEDS_LP55XX_H */ diff --git a/include/linux/platform_data/leds-pca9633.h b/include/linux/platform_data/leds-pca963x.h index c5bf29b6fa7f..e731f0036329 100644 --- a/include/linux/platform_data/leds-pca9633.h +++ b/include/linux/platform_data/leds-pca963x.h @@ -1,7 +1,8 @@ /* - * PCA9633 LED chip driver. + * PCA963X LED chip driver. * * Copyright 2012 bct electronic GmbH + * Copyright 2013 Qtechnology A/S * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -18,18 +19,24 @@ * 02110-1301 USA */ -#ifndef __LINUX_PCA9633_H -#define __LINUX_PCA9633_H +#ifndef __LINUX_PCA963X_H +#define __LINUX_PCA963X_H #include <linux/leds.h> -enum pca9633_outdrv { - PCA9633_OPEN_DRAIN, - PCA9633_TOTEM_POLE, /* aka push-pull */ +enum pca963x_outdrv { + PCA963X_OPEN_DRAIN, + PCA963X_TOTEM_POLE, /* aka push-pull */ }; -struct pca9633_platform_data { +enum pca963x_blink_type { + PCA963X_SW_BLINK, + PCA963X_HW_BLINK, +}; + +struct pca963x_platform_data { struct led_platform_data leds; - enum pca9633_outdrv outdrv; + enum pca963x_outdrv outdrv; + enum pca963x_blink_type blink_type; }; -#endif /* __LINUX_PCA9633_H*/ +#endif /* __LINUX_PCA963X_H*/ diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index 96a509b6be04..201a69749659 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -54,7 +54,7 @@ struct res_counter { struct res_counter *parent; }; -#define RESOURCE_MAX (unsigned long long)LLONG_MAX +#define RES_COUNTER_MAX ULLONG_MAX /** * Helpers to interact with userspace diff --git a/include/linux/sched.h b/include/linux/sched.h index 45f254dddafc..6682da36b293 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1393,6 +1393,13 @@ struct task_struct { unsigned long memsw_nr_pages; /* uncharged mem+swap usage */ } memcg_batch; unsigned int memcg_kmem_skip_account; + struct memcg_oom_info { + unsigned int may_oom:1; + unsigned int in_memcg_oom:1; + unsigned int oom_locked:1; + int wakeups; + struct mem_cgroup *wait_on_memcg; + } memcg_oom; #endif #ifdef CONFIG_UPROBES struct uprobe_task *utask; diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 18299057402f..21a209336e79 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -3,15 +3,21 @@ /* * Reader/writer consistent mechanism without starving writers. This type of * lock for data where the reader wants a consistent set of information - * and is willing to retry if the information changes. Readers never - * block but they may have to retry if a writer is in - * progress. Writers do not wait for readers. + * and is willing to retry if the information changes. There are two types + * of readers: + * 1. Sequence readers which never block a writer but they may have to retry + * if a writer is in progress by detecting change in sequence number. + * Writers do not wait for a sequence reader. + * 2. Locking readers which will wait if a writer or another locking reader + * is in progress. A locking reader in progress will also block a writer + * from going forward. Unlike the regular rwlock, the read lock here is + * exclusive so that only one locking reader can get it. * - * This is not as cache friendly as brlock. Also, this will not work + * This is not as cache friendly as brlock. Also, this may not work well * for data that contains pointers, because any writer could * invalidate a pointer that a reader was following. * - * Expected reader usage: + * Expected non-blocking reader usage: * do { * seq = read_seqbegin(&foo); * ... @@ -268,4 +274,56 @@ write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) spin_unlock_irqrestore(&sl->lock, flags); } +/* + * A locking reader exclusively locks out other writers and locking readers, + * but doesn't update the sequence number. Acts like a normal spin_lock/unlock. + * Don't need preempt_disable() because that is in the spin_lock already. + */ +static inline void read_seqlock_excl(seqlock_t *sl) +{ + spin_lock(&sl->lock); +} + +static inline void read_sequnlock_excl(seqlock_t *sl) +{ + spin_unlock(&sl->lock); +} + +static inline void read_seqlock_excl_bh(seqlock_t *sl) +{ + spin_lock_bh(&sl->lock); +} + +static inline void read_sequnlock_excl_bh(seqlock_t *sl) +{ + spin_unlock_bh(&sl->lock); +} + +static inline void read_seqlock_excl_irq(seqlock_t *sl) +{ + spin_lock_irq(&sl->lock); +} + +static inline void read_sequnlock_excl_irq(seqlock_t *sl) +{ + spin_unlock_irq(&sl->lock); +} + +static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl) +{ + unsigned long flags; + + spin_lock_irqsave(&sl->lock, flags); + return flags; +} + +#define read_seqlock_excl_irqsave(lock, flags) \ + do { flags = __read_seqlock_excl_irqsave(lock); } while (0) + +static inline void +read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags) +{ + spin_unlock_irqrestore(&sl->lock, flags); +} + #endif /* __LINUX_SEQLOCK_H */ diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index ac6b8ee07825..68c097077ef0 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -4,39 +4,67 @@ /* * This struct is used to pass information from page reclaim to the shrinkers. * We consolidate the values for easier extention later. + * + * The 'gfpmask' refers to the allocation we are currently trying to + * fulfil. */ struct shrink_control { gfp_t gfp_mask; - /* How many slab objects shrinker() should scan and try to reclaim */ + /* + * How many objects scan_objects should scan and try to reclaim. + * This is reset before every call, so it is safe for callees + * to modify. + */ unsigned long nr_to_scan; + + /* shrink from these nodes */ + nodemask_t nodes_to_scan; + /* current node being shrunk (for NUMA aware shrinkers) */ + int nid; }; +#define SHRINK_STOP (~0UL) /* * A callback you can register to apply pressure to ageable caches. * - * 'sc' is passed shrink_control which includes a count 'nr_to_scan' - * and a 'gfpmask'. It should look through the least-recently-used - * 'nr_to_scan' entries and attempt to free them up. It should return - * the number of objects which remain in the cache. If it returns -1, it means - * it cannot do any scanning at this time (eg. there is a risk of deadlock). + * @count_objects should return the number of freeable items in the cache. If + * there are no objects to free or the number of freeable items cannot be + * determined, it should return 0. No deadlock checks should be done during the + * count callback - the shrinker relies on aggregating scan counts that couldn't + * be executed due to potential deadlocks to be run at a later call when the + * deadlock condition is no longer pending. * - * The 'gfpmask' refers to the allocation we are currently trying to - * fulfil. + * @scan_objects will only be called if @count_objects returned a non-zero + * value for the number of freeable objects. The callout should scan the cache + * and attempt to free items from the cache. It should then return the number + * of objects freed during the scan, or SHRINK_STOP if progress cannot be made + * due to potential deadlocks. If SHRINK_STOP is returned, then no further + * attempts to call the @scan_objects will be made from the current reclaim + * context. * - * Note that 'shrink' will be passed nr_to_scan == 0 when the VM is - * querying the cache size, so a fastpath for that case is appropriate. + * @flags determine the shrinker abilities, like numa awareness */ struct shrinker { - int (*shrink)(struct shrinker *, struct shrink_control *sc); + unsigned long (*count_objects)(struct shrinker *, + struct shrink_control *sc); + unsigned long (*scan_objects)(struct shrinker *, + struct shrink_control *sc); + int seeks; /* seeks to recreate an obj */ long batch; /* reclaim batch size, 0 = default */ + unsigned long flags; /* These are for internal use */ struct list_head list; - atomic_long_t nr_in_batch; /* objs pending delete */ + /* objs pending delete, per node */ + atomic_long_t *nr_deferred; }; #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ -extern void register_shrinker(struct shrinker *); + +/* Flags */ +#define SHRINKER_NUMA_AWARE (1 << 0) + +extern int register_shrinker(struct shrinker *); extern void unregister_shrinker(struct shrinker *); #endif diff --git a/include/linux/slab.h b/include/linux/slab.h index 6c5cc0ea8713..74f105847d13 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -4,6 +4,8 @@ * (C) SGI 2006, Christoph Lameter * Cleaned up and restructured to ease the addition of alternative * implementations of SLAB allocators. + * (C) Linux Foundation 2008-2013 + * Unified interface for all slab allocators */ #ifndef _LINUX_SLAB_H @@ -94,6 +96,7 @@ #define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) <= \ (unsigned long)ZERO_SIZE_PTR) +#include <linux/kmemleak.h> struct mem_cgroup; /* @@ -289,6 +292,57 @@ static __always_inline int kmalloc_index(size_t size) } #endif /* !CONFIG_SLOB */ +void *__kmalloc(size_t size, gfp_t flags); +void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags); + +#ifdef CONFIG_NUMA +void *__kmalloc_node(size_t size, gfp_t flags, int node); +void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); +#else +static __always_inline void *__kmalloc_node(size_t size, gfp_t flags, int node) +{ + return __kmalloc(size, flags); +} + +static __always_inline void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node) +{ + return kmem_cache_alloc(s, flags); +} +#endif + +#ifdef CONFIG_TRACING +extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t); + +#ifdef CONFIG_NUMA +extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s, + gfp_t gfpflags, + int node, size_t size); +#else +static __always_inline void * +kmem_cache_alloc_node_trace(struct kmem_cache *s, + gfp_t gfpflags, + int node, size_t size) +{ + return kmem_cache_alloc_trace(s, gfpflags, size); +} +#endif /* CONFIG_NUMA */ + +#else /* CONFIG_TRACING */ +static __always_inline void *kmem_cache_alloc_trace(struct kmem_cache *s, + gfp_t flags, size_t size) +{ + return kmem_cache_alloc(s, flags); +} + +static __always_inline void * +kmem_cache_alloc_node_trace(struct kmem_cache *s, + gfp_t gfpflags, + int node, size_t size) +{ + return kmem_cache_alloc_node(s, gfpflags, node); +} +#endif /* CONFIG_TRACING */ + #ifdef CONFIG_SLAB #include <linux/slab_def.h> #endif @@ -297,9 +351,60 @@ static __always_inline int kmalloc_index(size_t size) #include <linux/slub_def.h> #endif -#ifdef CONFIG_SLOB -#include <linux/slob_def.h> +static __always_inline void * +kmalloc_order(size_t size, gfp_t flags, unsigned int order) +{ + void *ret; + + flags |= (__GFP_COMP | __GFP_KMEMCG); + ret = (void *) __get_free_pages(flags, order); + kmemleak_alloc(ret, size, 1, flags); + return ret; +} + +#ifdef CONFIG_TRACING +extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order); +#else +static __always_inline void * +kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) +{ + return kmalloc_order(size, flags, order); +} +#endif + +static __always_inline void *kmalloc_large(size_t size, gfp_t flags) +{ + unsigned int order = get_order(size); + return kmalloc_order_trace(size, flags, order); +} + +/** + * kmalloc - allocate memory + * @size: how many bytes of memory are required. + * @flags: the type of memory to allocate (see kcalloc). + * + * kmalloc is the normal method of allocating memory + * for objects smaller than page size in the kernel. + */ +static __always_inline void *kmalloc(size_t size, gfp_t flags) +{ + if (__builtin_constant_p(size)) { + if (size > KMALLOC_MAX_CACHE_SIZE) + return kmalloc_large(size, flags); +#ifndef CONFIG_SLOB + if (!(flags & GFP_DMA)) { + int index = kmalloc_index(size); + + if (!index) + return ZERO_SIZE_PTR; + + return kmem_cache_alloc_trace(kmalloc_caches[index], + flags, size); + } #endif + } + return __kmalloc(size, flags); +} /* * Determine size used for the nth kmalloc cache. @@ -321,6 +426,23 @@ static __always_inline int kmalloc_size(int n) return 0; } +static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) +{ +#ifndef CONFIG_SLOB + if (__builtin_constant_p(size) && + size <= KMALLOC_MAX_CACHE_SIZE && !(flags & GFP_DMA)) { + int i = kmalloc_index(size); + + if (!i) + return ZERO_SIZE_PTR; + + return kmem_cache_alloc_node_trace(kmalloc_caches[i], + flags, node, size); + } +#endif + return __kmalloc_node(size, flags, node); +} + /* * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment. * Intended for arches that get misalignment faults even for 64 bit integer @@ -451,36 +573,6 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags) return kmalloc_array(n, size, flags | __GFP_ZERO); } -#if !defined(CONFIG_NUMA) && !defined(CONFIG_SLOB) -/** - * kmalloc_node - allocate memory from a specific node - * @size: how many bytes of memory are required. - * @flags: the type of memory to allocate (see kmalloc). - * @node: node to allocate from. - * - * kmalloc() for non-local nodes, used to allocate from a specific node - * if available. Equivalent to kmalloc() in the non-NUMA single-node - * case. - */ -static inline void *kmalloc_node(size_t size, gfp_t flags, int node) -{ - return kmalloc(size, flags); -} - -static inline void *__kmalloc_node(size_t size, gfp_t flags, int node) -{ - return __kmalloc(size, flags); -} - -void *kmem_cache_alloc(struct kmem_cache *, gfp_t); - -static inline void *kmem_cache_alloc_node(struct kmem_cache *cachep, - gfp_t flags, int node) -{ - return kmem_cache_alloc(cachep, flags); -} -#endif /* !CONFIG_NUMA && !CONFIG_SLOB */ - /* * kmalloc_track_caller is a special version of kmalloc that records the * calling function of the routine calling it for slab leak tracking instead diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index cd401580bdd3..e9346b4f1ef4 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -3,20 +3,6 @@ /* * Definitions unique to the original Linux SLAB allocator. - * - * What we provide here is a way to optimize the frequent kmalloc - * calls in the kernel by selecting the appropriate general cache - * if kmalloc was called with a size that can be established at - * compile time. - */ - -#include <linux/init.h> -#include <linux/compiler.h> - -/* - * struct kmem_cache - * - * manages a cache. */ struct kmem_cache { @@ -102,96 +88,4 @@ struct kmem_cache { */ }; -void *kmem_cache_alloc(struct kmem_cache *, gfp_t); -void *__kmalloc(size_t size, gfp_t flags); - -#ifdef CONFIG_TRACING -extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t); -#else -static __always_inline void * -kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size) -{ - return kmem_cache_alloc(cachep, flags); -} -#endif - -static __always_inline void *kmalloc(size_t size, gfp_t flags) -{ - struct kmem_cache *cachep; - void *ret; - - if (__builtin_constant_p(size)) { - int i; - - if (!size) - return ZERO_SIZE_PTR; - - if (WARN_ON_ONCE(size > KMALLOC_MAX_SIZE)) - return NULL; - - i = kmalloc_index(size); - -#ifdef CONFIG_ZONE_DMA - if (flags & GFP_DMA) - cachep = kmalloc_dma_caches[i]; - else -#endif - cachep = kmalloc_caches[i]; - - ret = kmem_cache_alloc_trace(cachep, flags, size); - - return ret; - } - return __kmalloc(size, flags); -} - -#ifdef CONFIG_NUMA -extern void *__kmalloc_node(size_t size, gfp_t flags, int node); -extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); - -#ifdef CONFIG_TRACING -extern void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep, - gfp_t flags, - int nodeid, - size_t size); -#else -static __always_inline void * -kmem_cache_alloc_node_trace(struct kmem_cache *cachep, - gfp_t flags, - int nodeid, - size_t size) -{ - return kmem_cache_alloc_node(cachep, flags, nodeid); -} -#endif - -static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) -{ - struct kmem_cache *cachep; - - if (__builtin_constant_p(size)) { - int i; - - if (!size) - return ZERO_SIZE_PTR; - - if (WARN_ON_ONCE(size > KMALLOC_MAX_SIZE)) - return NULL; - - i = kmalloc_index(size); - -#ifdef CONFIG_ZONE_DMA - if (flags & GFP_DMA) - cachep = kmalloc_dma_caches[i]; - else -#endif - cachep = kmalloc_caches[i]; - - return kmem_cache_alloc_node_trace(cachep, flags, node, size); - } - return __kmalloc_node(size, flags, node); -} - -#endif /* CONFIG_NUMA */ - #endif /* _LINUX_SLAB_DEF_H */ diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h deleted file mode 100644 index 095a5a4a8516..000000000000 --- a/include/linux/slob_def.h +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef __LINUX_SLOB_DEF_H -#define __LINUX_SLOB_DEF_H - -#include <linux/numa.h> - -void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); - -static __always_inline void *kmem_cache_alloc(struct kmem_cache *cachep, - gfp_t flags) -{ - return kmem_cache_alloc_node(cachep, flags, NUMA_NO_NODE); -} - -void *__kmalloc_node(size_t size, gfp_t flags, int node); - -static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) -{ - return __kmalloc_node(size, flags, node); -} - -static __always_inline void *kmalloc(size_t size, gfp_t flags) -{ - return __kmalloc_node(size, flags, NUMA_NO_NODE); -} - -static __always_inline void *__kmalloc(size_t size, gfp_t flags) -{ - return kmalloc(size, flags); -} - -#endif /* __LINUX_SLOB_DEF_H */ diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 027276fa8713..cc0b67eada42 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -6,14 +6,8 @@ * * (C) 2007 SGI, Christoph Lameter */ -#include <linux/types.h> -#include <linux/gfp.h> -#include <linux/bug.h> -#include <linux/workqueue.h> #include <linux/kobject.h> -#include <linux/kmemleak.h> - enum stat_item { ALLOC_FASTPATH, /* Allocation from cpu slab */ ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */ @@ -104,108 +98,4 @@ struct kmem_cache { struct kmem_cache_node *node[MAX_NUMNODES]; }; -void *kmem_cache_alloc(struct kmem_cache *, gfp_t); -void *__kmalloc(size_t size, gfp_t flags); - -static __always_inline void * -kmalloc_order(size_t size, gfp_t flags, unsigned int order) -{ - void *ret; - - flags |= (__GFP_COMP | __GFP_KMEMCG); - ret = (void *) __get_free_pages(flags, order); - kmemleak_alloc(ret, size, 1, flags); - return ret; -} - -/** - * Calling this on allocated memory will check that the memory - * is expected to be in use, and print warnings if not. - */ -#ifdef CONFIG_SLUB_DEBUG -extern bool verify_mem_not_deleted(const void *x); -#else -static inline bool verify_mem_not_deleted(const void *x) -{ - return true; -} -#endif - -#ifdef CONFIG_TRACING -extern void * -kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size); -extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order); -#else -static __always_inline void * -kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) -{ - return kmem_cache_alloc(s, gfpflags); -} - -static __always_inline void * -kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) -{ - return kmalloc_order(size, flags, order); -} -#endif - -static __always_inline void *kmalloc_large(size_t size, gfp_t flags) -{ - unsigned int order = get_order(size); - return kmalloc_order_trace(size, flags, order); -} - -static __always_inline void *kmalloc(size_t size, gfp_t flags) -{ - if (__builtin_constant_p(size)) { - if (size > KMALLOC_MAX_CACHE_SIZE) - return kmalloc_large(size, flags); - - if (!(flags & GFP_DMA)) { - int index = kmalloc_index(size); - - if (!index) - return ZERO_SIZE_PTR; - - return kmem_cache_alloc_trace(kmalloc_caches[index], - flags, size); - } - } - return __kmalloc(size, flags); -} - -#ifdef CONFIG_NUMA -void *__kmalloc_node(size_t size, gfp_t flags, int node); -void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); - -#ifdef CONFIG_TRACING -extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s, - gfp_t gfpflags, - int node, size_t size); -#else -static __always_inline void * -kmem_cache_alloc_node_trace(struct kmem_cache *s, - gfp_t gfpflags, - int node, size_t size) -{ - return kmem_cache_alloc_node(s, gfpflags, node); -} -#endif - -static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) -{ - if (__builtin_constant_p(size) && - size <= KMALLOC_MAX_CACHE_SIZE && !(flags & GFP_DMA)) { - int index = kmalloc_index(size); - - if (!index) - return ZERO_SIZE_PTR; - - return kmem_cache_alloc_node_trace(kmalloc_caches[index], - flags, node, size); - } - return __kmalloc_node(size, flags, node); -} -#endif - #endif /* _LINUX_SLUB_DEF_H */ diff --git a/include/linux/swap.h b/include/linux/swap.h index c03c139219c9..46ba0c6c219f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -280,7 +280,7 @@ extern void activate_page(struct page *); extern void mark_page_accessed(struct page *); extern void lru_add_drain(void); extern void lru_add_drain_cpu(int cpu); -extern int lru_add_drain_all(void); +extern void lru_add_drain_all(void); extern void rotate_reclaimable_page(struct page *page); extern void deactivate_page(struct page *page); extern void swap_setup(void); diff --git a/include/linux/thermal.h b/include/linux/thermal.h index a386a1cbb6e1..b268d3cf7ae3 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -207,6 +207,16 @@ struct thermal_bind_params { * See Documentation/thermal/sysfs-api.txt for more information. */ int trip_mask; + + /* + * This is an array of cooling state limits. Must have exactly + * 2 * thermal_zone.number_of_trip_points. It is an array consisting + * of tuples <lower-state upper-state> of state limits. Each trip + * will be associated with one state limit tuple when binding. + * A NULL pointer means <THERMAL_NO_LIMITS THERMAL_NO_LIMITS> + * on all trips. + */ + unsigned long *binding_limits; int (*match) (struct thermal_zone_device *tz, struct thermal_cooling_device *cdev); }; @@ -214,6 +224,14 @@ struct thermal_bind_params { /* Structure to define Thermal Zone parameters */ struct thermal_zone_params { char governor_name[THERMAL_NAME_LENGTH]; + + /* + * a boolean to indicate if the thermal to hwmon sysfs interface + * is required. when no_hwmon == false, a hwmon sysfs interface + * will be created. when no_hwmon == true, nothing will be done + */ + bool no_hwmon; + int num_tbps; /* Number of tbp entries */ struct thermal_bind_params *tbp; }; diff --git a/include/linux/time-armada-370-xp.h b/include/linux/time-armada-370-xp.h deleted file mode 100644 index 6fb0856b9405..000000000000 --- a/include/linux/time-armada-370-xp.h +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Marvell Armada 370/XP SoC timer handling. - * - * Copyright (C) 2012 Marvell - * - * Lior Amsalem <alior@marvell.com> - * Gregory CLEMENT <gregory.clement@free-electrons.com> - * Thomas Petazzoni <thomas.petazzoni@free-electrons.com> - * - */ -#ifndef __TIME_ARMADA_370_XPPRCMU_H -#define __TIME_ARMADA_370_XPPRCMU_H - -void armada_370_xp_timer_init(void); - -#endif diff --git a/include/linux/timex.h b/include/linux/timex.h index b3726e61368e..dd3edd7dfc94 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -141,6 +141,7 @@ extern int do_adjtimex(struct timex *); extern void hardpps(const struct timespec *, const struct timespec *); int read_current_timer(unsigned long *timer_val); +void ntp_notify_cmos_timer(void); /* The clock frequency of the i8253/i8254 PIT */ #define PIT_TICK_RATE 1193182ul diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h index d477bfb73fb9..66d42edfb3fc 100644 --- a/include/scsi/scsi.h +++ b/include/scsi/scsi.h @@ -144,6 +144,7 @@ enum scsi_timeouts { #define ACCESS_CONTROL_IN 0x86 #define ACCESS_CONTROL_OUT 0x87 #define READ_16 0x88 +#define COMPARE_AND_WRITE 0x89 #define WRITE_16 0x8a #define READ_ATTRIBUTE 0x8c #define WRITE_ATTRIBUTE 0x8d diff --git a/include/sound/rcar_snd.h b/include/sound/rcar_snd.h index d35412ae03b3..fe66533e9b7a 100644 --- a/include/sound/rcar_snd.h +++ b/include/sound/rcar_snd.h @@ -55,7 +55,7 @@ struct rsnd_ssi_platform_info { /* * flags */ -#define RSND_SCU_USB_HPBIF (1 << 31) /* it needs RSND_SSI_DEPENDENT */ +#define RSND_SCU_USE_HPBIF (1 << 31) /* it needs RSND_SSI_DEPENDENT */ struct rsnd_scu_platform_info { u32 flags; diff --git a/include/target/iscsi/iscsi_transport.h b/include/target/iscsi/iscsi_transport.h index e5d09d242ba3..a12589c4ee92 100644 --- a/include/target/iscsi/iscsi_transport.h +++ b/include/target/iscsi/iscsi_transport.h @@ -6,13 +6,13 @@ struct iscsit_transport { #define ISCSIT_TRANSPORT_NAME 16 char name[ISCSIT_TRANSPORT_NAME]; int transport_type; + int priv_size; struct module *owner; struct list_head t_node; int (*iscsit_setup_np)(struct iscsi_np *, struct __kernel_sockaddr_storage *); int (*iscsit_accept_np)(struct iscsi_np *, struct iscsi_conn *); void (*iscsit_free_np)(struct iscsi_np *); void (*iscsit_free_conn)(struct iscsi_conn *); - struct iscsi_cmd *(*iscsit_alloc_cmd)(struct iscsi_conn *, gfp_t); int (*iscsit_get_login_rx)(struct iscsi_conn *, struct iscsi_login *); int (*iscsit_put_login_tx)(struct iscsi_conn *, struct iscsi_login *, u32); int (*iscsit_immediate_queue)(struct iscsi_conn *, struct iscsi_cmd *, int); @@ -22,6 +22,11 @@ struct iscsit_transport { int (*iscsit_queue_status)(struct iscsi_conn *, struct iscsi_cmd *); }; +static inline void *iscsit_priv_cmd(struct iscsi_cmd *cmd) +{ + return (void *)(cmd + 1); +} + /* * From iscsi_target_transport.c */ @@ -92,3 +97,4 @@ extern int iscsit_tmr_post_handler(struct iscsi_cmd *, struct iscsi_conn *); extern struct iscsi_cmd *iscsit_allocate_cmd(struct iscsi_conn *, gfp_t); extern int iscsit_sequence_cmd(struct iscsi_conn *, struct iscsi_cmd *, unsigned char *, __be32); +extern void iscsit_release_cmd(struct iscsi_cmd *); diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index ffa2696d64dc..5ebe21cd5d1c 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -39,7 +39,8 @@ struct se_subsystem_api { }; struct sbc_ops { - sense_reason_t (*execute_rw)(struct se_cmd *cmd); + sense_reason_t (*execute_rw)(struct se_cmd *cmd, struct scatterlist *, + u32, enum dma_data_direction); sense_reason_t (*execute_sync_cache)(struct se_cmd *cmd); sense_reason_t (*execute_write_same)(struct se_cmd *cmd); sense_reason_t (*execute_write_same_unmap)(struct se_cmd *cmd); @@ -73,6 +74,10 @@ int transport_set_vpd_ident(struct t10_vpd *, unsigned char *); /* core helpers also used by command snooping in pscsi */ void *transport_kmap_data_sg(struct se_cmd *); void transport_kunmap_data_sg(struct se_cmd *); +/* core helpers also used by xcopy during internal command setup */ +int target_alloc_sgl(struct scatterlist **, unsigned int *, u32, bool); +sense_reason_t transport_generic_map_mem_to_cmd(struct se_cmd *, + struct scatterlist *, u32, struct scatterlist *, u32); void array_free(void *array, int n); diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index e34fc904f2e1..5bdb8b7d2a69 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -5,11 +5,12 @@ #include <linux/configfs.h> #include <linux/dma-mapping.h> #include <linux/blkdev.h> +#include <linux/percpu_ida.h> #include <scsi/scsi_cmnd.h> #include <net/sock.h> #include <net/tcp.h> -#define TARGET_CORE_MOD_VERSION "v4.1.0-rc2-ml" +#define TARGET_CORE_MOD_VERSION "v4.1.0" #define TARGET_CORE_VERSION TARGET_CORE_MOD_VERSION /* Maximum Number of LUNs per Target Portal Group */ @@ -96,6 +97,10 @@ * block/blk-lib.c:blkdev_issue_discard() */ #define DA_EMULATE_TPWS 0 +/* Emulation for CompareAndWrite (AtomicTestandSet) by default */ +#define DA_EMULATE_CAW 1 +/* Emulation for 3rd Party Copy (ExtendedCopy) by default */ +#define DA_EMULATE_3PC 1 /* No Emulation for PSCSI by default */ #define DA_EMULATE_ALUA 0 /* Enforce SCSI Initiator Port TransportID with 'ISID' for PR */ @@ -158,6 +163,9 @@ enum se_cmd_flags_table { SCF_ALUA_NON_OPTIMIZED = 0x00008000, SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC = 0x00020000, SCF_ACK_KREF = 0x00040000, + SCF_COMPARE_AND_WRITE = 0x00080000, + SCF_COMPARE_AND_WRITE_POST = 0x00100000, + SCF_CMD_XCOPY_PASSTHROUGH = 0x00200000, }; /* struct se_dev_entry->lun_flags and struct se_lun->lun_access */ @@ -196,6 +204,7 @@ enum tcm_sense_reason_table { TCM_ADDRESS_OUT_OF_RANGE = R(0x11), TCM_OUT_OF_RESOURCES = R(0x12), TCM_PARAMETER_LIST_LENGTH_ERROR = R(0x13), + TCM_MISCOMPARE_VERIFY = R(0x14), #undef R }; @@ -415,6 +424,8 @@ struct se_cmd { enum dma_data_direction data_direction; /* For SAM Task Attribute */ int sam_task_attr; + /* Used for se_sess->sess_tag_pool */ + unsigned int map_tag; /* Transport protocol dependent state, see transport_state_table */ enum transport_state_table t_state; unsigned cmd_wait_set:1; @@ -444,11 +455,14 @@ struct se_cmd { struct kref cmd_kref; struct target_core_fabric_ops *se_tfo; sense_reason_t (*execute_cmd)(struct se_cmd *); - void (*transport_complete_callback)(struct se_cmd *); + sense_reason_t (*execute_rw)(struct se_cmd *, struct scatterlist *, + u32, enum dma_data_direction); + sense_reason_t (*transport_complete_callback)(struct se_cmd *); unsigned char *t_task_cdb; unsigned char __t_task_cdb[TCM_MAX_COMMAND_SIZE]; unsigned long long t_task_lba; + unsigned int t_task_nolb; unsigned int transport_state; #define CMD_T_ABORTED (1 << 0) #define CMD_T_ACTIVE (1 << 1) @@ -469,7 +483,9 @@ struct se_cmd { struct work_struct work; struct scatterlist *t_data_sg; + struct scatterlist *t_data_sg_orig; unsigned int t_data_nents; + unsigned int t_data_nents_orig; void *t_data_vmap; struct scatterlist *t_bidi_data_sg; unsigned int t_bidi_data_nents; @@ -536,6 +552,8 @@ struct se_session { struct list_head sess_wait_list; spinlock_t sess_cmd_lock; struct kref sess_kref; + void *sess_cmd_map; + struct percpu_ida sess_tag_pool; }; struct se_device; @@ -589,6 +607,8 @@ struct se_dev_attrib { int emulate_tas; int emulate_tpu; int emulate_tpws; + int emulate_caw; + int emulate_3pc; int enforce_pr_isids; int is_nonrot; int emulate_rest_reord; @@ -656,6 +676,7 @@ struct se_device { spinlock_t se_port_lock; spinlock_t se_tmr_lock; spinlock_t qf_cmd_lock; + struct semaphore caw_sem; /* Used for legacy SPC-2 reservationsa */ struct se_node_acl *dev_reserved_node_acl; /* Used for ALUA Logical Unit Group membership */ @@ -669,6 +690,7 @@ struct se_device { struct list_head delayed_cmd_list; struct list_head state_list; struct list_head qf_cmd_list; + struct list_head g_dev_node; /* Pointer to associated SE HBA */ struct se_hba *se_hba; /* T10 Inquiry and VPD WWN Information */ diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index 7a16178424f9..882b650e32be 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -84,6 +84,9 @@ struct target_core_fabric_ops { }; struct se_session *transport_init_session(void); +int transport_alloc_session_tags(struct se_session *, unsigned int, + unsigned int); +struct se_session *transport_init_session_tags(unsigned int, unsigned int); void __transport_register_session(struct se_portal_group *, struct se_node_acl *, struct se_session *, void *); void transport_register_session(struct se_portal_group *, @@ -131,6 +134,7 @@ int core_tmr_alloc_req(struct se_cmd *, void *, u8, gfp_t); void core_tmr_release_req(struct se_tmr_req *); int transport_generic_handle_tmr(struct se_cmd *); void transport_generic_request_failure(struct se_cmd *, sense_reason_t); +void __target_execute_cmd(struct se_cmd *); int transport_lookup_tmr_lun(struct se_cmd *, u32); struct se_node_acl *core_tpg_check_initiator_node_acl(struct se_portal_group *, @@ -175,4 +179,30 @@ u32 iscsi_get_pr_transport_id_len(struct se_portal_group *, struct se_node_acl * char *iscsi_parse_pr_out_transport_id(struct se_portal_group *, const char *, u32 *, char **); +/* + * The LIO target core uses DMA_TO_DEVICE to mean that data is going + * to the target (eg handling a WRITE) and DMA_FROM_DEVICE to mean + * that data is coming from the target (eg handling a READ). However, + * this is just the opposite of what we have to tell the DMA mapping + * layer -- eg when handling a READ, the HBA will have to DMA the data + * out of memory so it can send it to the initiator, which means we + * need to use DMA_TO_DEVICE when we map the data. + */ +static inline enum dma_data_direction +target_reverse_dma_direction(struct se_cmd *se_cmd) +{ + if (se_cmd->se_cmd_flags & SCF_BIDI) + return DMA_BIDIRECTIONAL; + + switch (se_cmd->data_direction) { + case DMA_TO_DEVICE: + return DMA_FROM_DEVICE; + case DMA_FROM_DEVICE: + return DMA_TO_DEVICE; + case DMA_NONE: + default: + return DMA_NONE; + } +} + #endif /* TARGET_CORE_FABRICH */ diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 2902657ba766..45702c3c3837 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -439,7 +439,7 @@ TRACE_EVENT(btrfs_sync_fs, { BTRFS_UPDATE_DELAYED_HEAD, "UPDATE_DELAYED_HEAD" }) -TRACE_EVENT(btrfs_delayed_tree_ref, +DECLARE_EVENT_CLASS(btrfs_delayed_tree_ref, TP_PROTO(struct btrfs_delayed_ref_node *ref, struct btrfs_delayed_tree_ref *full_ref, @@ -481,7 +481,25 @@ TRACE_EVENT(btrfs_delayed_tree_ref, (unsigned long long)__entry->seq) ); -TRACE_EVENT(btrfs_delayed_data_ref, +DEFINE_EVENT(btrfs_delayed_tree_ref, add_delayed_tree_ref, + + TP_PROTO(struct btrfs_delayed_ref_node *ref, + struct btrfs_delayed_tree_ref *full_ref, + int action), + + TP_ARGS(ref, full_ref, action) +); + +DEFINE_EVENT(btrfs_delayed_tree_ref, run_delayed_tree_ref, + + TP_PROTO(struct btrfs_delayed_ref_node *ref, + struct btrfs_delayed_tree_ref *full_ref, + int action), + + TP_ARGS(ref, full_ref, action) +); + +DECLARE_EVENT_CLASS(btrfs_delayed_data_ref, TP_PROTO(struct btrfs_delayed_ref_node *ref, struct btrfs_delayed_data_ref *full_ref, @@ -527,7 +545,25 @@ TRACE_EVENT(btrfs_delayed_data_ref, (unsigned long long)__entry->seq) ); -TRACE_EVENT(btrfs_delayed_ref_head, +DEFINE_EVENT(btrfs_delayed_data_ref, add_delayed_data_ref, + + TP_PROTO(struct btrfs_delayed_ref_node *ref, + struct btrfs_delayed_data_ref *full_ref, + int action), + + TP_ARGS(ref, full_ref, action) +); + +DEFINE_EVENT(btrfs_delayed_data_ref, run_delayed_data_ref, + + TP_PROTO(struct btrfs_delayed_ref_node *ref, + struct btrfs_delayed_data_ref *full_ref, + int action), + + TP_ARGS(ref, full_ref, action) +); + +DECLARE_EVENT_CLASS(btrfs_delayed_ref_head, TP_PROTO(struct btrfs_delayed_ref_node *ref, struct btrfs_delayed_ref_head *head_ref, @@ -556,6 +592,24 @@ TRACE_EVENT(btrfs_delayed_ref_head, __entry->is_data) ); +DEFINE_EVENT(btrfs_delayed_ref_head, add_delayed_ref_head, + + TP_PROTO(struct btrfs_delayed_ref_node *ref, + struct btrfs_delayed_ref_head *head_ref, + int action), + + TP_ARGS(ref, head_ref, action) +); + +DEFINE_EVENT(btrfs_delayed_ref_head, run_delayed_ref_head, + + TP_PROTO(struct btrfs_delayed_ref_node *ref, + struct btrfs_delayed_ref_head *head_ref, + int action), + + TP_ARGS(ref, head_ref, action) +); + #define show_chunk_type(type) \ __print_flags(type, "|", \ { BTRFS_BLOCK_GROUP_DATA, "DATA" }, \ diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index 63cfcccaebb3..132a985aba8b 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -202,7 +202,7 @@ TRACE_EVENT(mm_shrink_slab_start, TP_fast_assign( __entry->shr = shr; - __entry->shrink = shr->shrink; + __entry->shrink = shr->scan_objects; __entry->nr_objects_to_shrink = nr_objects_to_shrink; __entry->gfp_flags = sc->gfp_mask; __entry->pgs_scanned = pgs_scanned; @@ -241,7 +241,7 @@ TRACE_EVENT(mm_shrink_slab_end, TP_fast_assign( __entry->shr = shr; - __entry->shrink = shr->shrink; + __entry->shrink = shr->scan_objects; __entry->unused_scan = unused_scan_cnt; __entry->new_scan = new_scan_cnt; __entry->retval = shrinker_retval; diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 05aed70627e2..45e618921c61 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -305,6 +305,31 @@ struct btrfs_ioctl_clone_range_args { #define BTRFS_DEFRAG_RANGE_COMPRESS 1 #define BTRFS_DEFRAG_RANGE_START_IO 2 +#define BTRFS_SAME_DATA_DIFFERS 1 +/* For extent-same ioctl */ +struct btrfs_ioctl_same_extent_info { + __s64 fd; /* in - destination file */ + __u64 logical_offset; /* in - start of extent in destination */ + __u64 bytes_deduped; /* out - total # of bytes we were able + * to dedupe from this file */ + /* status of this dedupe operation: + * 0 if dedup succeeds + * < 0 for error + * == BTRFS_SAME_DATA_DIFFERS if data differs + */ + __s32 status; /* out - see above description */ + __u32 reserved; +}; + +struct btrfs_ioctl_same_args { + __u64 logical_offset; /* in - start of extent in source */ + __u64 length; /* in - length of extent */ + __u16 dest_count; /* in - total elements in info array */ + __u16 reserved1; + __u32 reserved2; + struct btrfs_ioctl_same_extent_info info[0]; +}; + struct btrfs_ioctl_space_info { __u64 flags; __u64 total_bytes; @@ -524,7 +549,7 @@ static inline char *btrfs_err_str(enum btrfs_err_code err_code) struct btrfs_ioctl_search_args) #define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \ struct btrfs_ioctl_ino_lookup_args) -#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64) +#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, __u64) #define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \ struct btrfs_ioctl_space_args) #define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64) @@ -579,4 +604,7 @@ static inline char *btrfs_err_str(enum btrfs_err_code err_code) struct btrfs_ioctl_get_dev_stats) #define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \ struct btrfs_ioctl_dev_replace_args) +#define BTRFS_IOC_FILE_EXTENT_SAME _IOWR(BTRFS_IOCTL_MAGIC, 54, \ + struct btrfs_ioctl_same_args) + #endif /* _UAPI_LINUX_BTRFS_H */ diff --git a/include/uapi/linux/cifs/cifs_mount.h b/include/uapi/linux/cifs/cifs_mount.h new file mode 100644 index 000000000000..d7e4c6ce6171 --- /dev/null +++ b/include/uapi/linux/cifs/cifs_mount.h @@ -0,0 +1,27 @@ +/* + * include/uapi/linux/cifs/cifs_mount.h + * + * Author(s): Scott Lovenberg (scott.lovenberg@gmail.com) + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + */ +#ifndef _CIFS_MOUNT_H +#define _CIFS_MOUNT_H + +/* Max string lengths for cifs mounting options. */ +#define CIFS_MAX_DOMAINNAME_LEN 256 /* max fully qualified domain name */ +#define CIFS_MAX_USERNAME_LEN 256 /* reasonable max for current servers */ +#define CIFS_MAX_PASSWORD_LEN 512 /* Windows max seems to be 256 wide chars */ +#define CIFS_MAX_SHARE_LEN 256 /* reasonable max share name length */ +#define CIFS_NI_MAXHOST 1024 /* max host name length (256 * 4 bytes) */ + + +#endif /* _CIFS_MOUNT_H */ diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index a4ed56cf0eac..6c28b61bb690 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -49,9 +49,9 @@ struct files_stat_struct { }; struct inodes_stat_t { - int nr_inodes; - int nr_unused; - int dummy[5]; /* padding for sysctl ABI compatibility */ + long nr_inodes; + long nr_unused; + long dummy[5]; /* padding for sysctl ABI compatibility */ }; diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h index d08abf9101d2..a3726275876d 100644 --- a/include/uapi/linux/input.h +++ b/include/uapi/linux/input.h @@ -152,6 +152,7 @@ struct input_keymap_entry { #define EVIOCGEFFECTS _IOR('E', 0x84, int) /* Report number of effects playable at the same time */ #define EVIOCGRAB _IOW('E', 0x90, int) /* Grab/Release device */ +#define EVIOCREVOKE _IOW('E', 0x91, int) /* Revoke device access */ #define EVIOCSCLOCKID _IOW('E', 0xa0, int) /* Set clockid to be used for timestamps */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index ca1d90bcb74d..40a1fb807396 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -324,7 +324,7 @@ struct perf_event_attr { #define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64) #define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) #define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) -#define PERF_EVENT_IOC_ID _IOR('$', 7, u64 *) +#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *) enum perf_event_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, diff --git a/init/Kconfig b/init/Kconfig index 18bd9e3d3274..3ecd8a1178f1 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1602,7 +1602,7 @@ endchoice config SLUB_CPU_PARTIAL default y - depends on SLUB + depends on SLUB && SMP bool "SLUB per cpu partial cache" help Per cpu partial caches accellerate objects allocation and freeing diff --git a/kernel/Makefile b/kernel/Makefile index 35ef1185e359..1ce47553fb02 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -26,6 +26,7 @@ obj-y += sched/ obj-y += power/ obj-y += printk/ obj-y += cpu/ +obj-y += irq/ obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o obj-$(CONFIG_FREEZER) += freezer.o @@ -79,7 +80,6 @@ obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_KGDB) += debug/ obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o -obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ obj-$(CONFIG_SECCOMP) += seccomp.o obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o obj-$(CONFIG_TREE_RCU) += rcutree.o diff --git a/kernel/events/core.c b/kernel/events/core.c index 2207efc941d1..dd236b66ca3a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5039,6 +5039,7 @@ static void perf_event_mmap_output(struct perf_event *event, mmap_event->event_id.header.size += sizeof(mmap_event->maj); mmap_event->event_id.header.size += sizeof(mmap_event->min); mmap_event->event_id.header.size += sizeof(mmap_event->ino); + mmap_event->event_id.header.size += sizeof(mmap_event->ino_generation); } perf_event_header__init_id(&mmap_event->event_id.header, &sample, event); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index f3569747d629..ad8e1bdca70e 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1682,12 +1682,10 @@ static bool handle_trampoline(struct pt_regs *regs) tmp = ri; ri = ri->next; kfree(tmp); + utask->depth--; if (!chained) break; - - utask->depth--; - BUG_ON(!ri); } diff --git a/kernel/fork.c b/kernel/fork.c index 81ccb4f010c2..086fe73ad6bd 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -519,7 +519,7 @@ static void mm_init_aio(struct mm_struct *mm) { #ifdef CONFIG_AIO spin_lock_init(&mm->ioctx_lock); - INIT_HLIST_HEAD(&mm->ioctx_list); + mm->ioctx_table = NULL; #endif } diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c index 9bd0934f6c33..7a7d2ee96d42 100644 --- a/kernel/gcov/fs.c +++ b/kernel/gcov/fs.c @@ -74,7 +74,7 @@ static int __init gcov_persist_setup(char *str) { unsigned long val; - if (strict_strtoul(str, 0, &val)) { + if (kstrtoul(str, 0, &val)) { pr_warning("invalid gcov_persist parameter '%s'\n", str); return 0; } diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index d1a758bc972a..4a1fef09f658 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -1,15 +1,4 @@ -# Select this to activate the generic irq options below -config HAVE_GENERIC_HARDIRQS - bool - -if HAVE_GENERIC_HARDIRQS menu "IRQ subsystem" -# -# Interrupt subsystem related configuration options -# -config GENERIC_HARDIRQS - def_bool y - # Options selectable by the architecture code # Make sparse irq Kconfig switch below available @@ -84,4 +73,3 @@ config SPARSE_IRQ If you don't know what to do here, say N. endmenu -endif diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 6ada93c23a9a..9659d38e008f 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -113,7 +113,7 @@ static ssize_t kexec_crash_size_store(struct kobject *kobj, unsigned long cnt; int ret; - if (strict_strtoul(buf, 0, &cnt)) + if (kstrtoul(buf, 0, &cnt)) return -EINVAL; ret = crash_shrink_memory(cnt); diff --git a/kernel/params.c b/kernel/params.c index 501bde4f3bee..81c4e78c8f4c 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -253,13 +253,13 @@ int parse_args(const char *doing, EXPORT_SYMBOL(param_ops_##name) -STANDARD_PARAM_DEF(byte, unsigned char, "%hhu", unsigned long, strict_strtoul); -STANDARD_PARAM_DEF(short, short, "%hi", long, strict_strtol); -STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", unsigned long, strict_strtoul); -STANDARD_PARAM_DEF(int, int, "%i", long, strict_strtol); -STANDARD_PARAM_DEF(uint, unsigned int, "%u", unsigned long, strict_strtoul); -STANDARD_PARAM_DEF(long, long, "%li", long, strict_strtol); -STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", unsigned long, strict_strtoul); +STANDARD_PARAM_DEF(byte, unsigned char, "%hhu", unsigned long, kstrtoul); +STANDARD_PARAM_DEF(short, short, "%hi", long, kstrtoul); +STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", unsigned long, kstrtoul); +STANDARD_PARAM_DEF(int, int, "%i", long, kstrtoul); +STANDARD_PARAM_DEF(uint, unsigned int, "%u", unsigned long, kstrtoul); +STANDARD_PARAM_DEF(long, long, "%li", long, kstrtoul); +STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", unsigned long, kstrtoul); int param_set_charp(const char *val, const struct kernel_param *kp) { diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 3085e62a80a5..c9c759d5a15c 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -644,22 +644,23 @@ int hibernate(void) if (error) goto Exit; - /* Allocate memory management structures */ - error = create_basic_memory_bitmaps(); - if (error) - goto Exit; - printk(KERN_INFO "PM: Syncing filesystems ... "); sys_sync(); printk("done.\n"); error = freeze_processes(); if (error) - goto Free_bitmaps; + goto Exit; + + lock_device_hotplug(); + /* Allocate memory management structures */ + error = create_basic_memory_bitmaps(); + if (error) + goto Thaw; error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); if (error || freezer_test_done) - goto Thaw; + goto Free_bitmaps; if (in_suspend) { unsigned int flags = 0; @@ -682,14 +683,14 @@ int hibernate(void) pr_debug("PM: Image restored successfully.\n"); } + Free_bitmaps: + free_basic_memory_bitmaps(); Thaw: + unlock_device_hotplug(); thaw_processes(); /* Don't bother checking whether freezer_test_done is true */ freezer_test_done = false; - - Free_bitmaps: - free_basic_memory_bitmaps(); Exit: pm_notifier_call_chain(PM_POST_HIBERNATION); pm_restore_console(); @@ -806,21 +807,20 @@ static int software_resume(void) pm_prepare_console(); error = pm_notifier_call_chain(PM_RESTORE_PREPARE); if (error) - goto close_finish; - - error = create_basic_memory_bitmaps(); - if (error) - goto close_finish; + goto Close_Finish; pr_debug("PM: Preparing processes for restore.\n"); error = freeze_processes(); - if (error) { - swsusp_close(FMODE_READ); - goto Done; - } + if (error) + goto Close_Finish; pr_debug("PM: Loading hibernation image.\n"); + lock_device_hotplug(); + error = create_basic_memory_bitmaps(); + if (error) + goto Thaw; + error = swsusp_read(&flags); swsusp_close(FMODE_READ); if (!error) @@ -828,9 +828,10 @@ static int software_resume(void) printk(KERN_ERR "PM: Failed to load hibernation image, recovering.\n"); swsusp_free(); - thaw_processes(); - Done: free_basic_memory_bitmaps(); + Thaw: + unlock_device_hotplug(); + thaw_processes(); Finish: pm_notifier_call_chain(PM_POST_RESTORE); pm_restore_console(); @@ -840,7 +841,7 @@ static int software_resume(void) mutex_unlock(&pm_mutex); pr_debug("PM: Hibernation image not present or could not be loaded.\n"); return error; -close_finish: + Close_Finish: swsusp_close(FMODE_READ); goto Finish; } diff --git a/kernel/power/user.c b/kernel/power/user.c index 4ed81e74f86f..72e8f4fd616d 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -60,11 +60,6 @@ static int snapshot_open(struct inode *inode, struct file *filp) error = -ENOSYS; goto Unlock; } - if(create_basic_memory_bitmaps()) { - atomic_inc(&snapshot_device_available); - error = -ENOMEM; - goto Unlock; - } nonseekable_open(inode, filp); data = &snapshot_state; filp->private_data = data; @@ -90,10 +85,9 @@ static int snapshot_open(struct inode *inode, struct file *filp) if (error) pm_notifier_call_chain(PM_POST_RESTORE); } - if (error) { - free_basic_memory_bitmaps(); + if (error) atomic_inc(&snapshot_device_available); - } + data->frozen = 0; data->ready = 0; data->platform_support = 0; @@ -111,11 +105,11 @@ static int snapshot_release(struct inode *inode, struct file *filp) lock_system_sleep(); swsusp_free(); - free_basic_memory_bitmaps(); data = filp->private_data; free_all_swap_pages(data->swap); if (data->frozen) { pm_restore_gfp_mask(); + free_basic_memory_bitmaps(); thaw_processes(); } pm_notifier_call_chain(data->mode == O_RDONLY ? @@ -207,6 +201,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, if (!mutex_trylock(&pm_mutex)) return -EBUSY; + lock_device_hotplug(); data = filp->private_data; switch (cmd) { @@ -220,14 +215,22 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, printk("done.\n"); error = freeze_processes(); - if (!error) + if (error) + break; + + error = create_basic_memory_bitmaps(); + if (error) + thaw_processes(); + else data->frozen = 1; + break; case SNAPSHOT_UNFREEZE: if (!data->frozen || data->ready) break; pm_restore_gfp_mask(); + free_basic_memory_bitmaps(); thaw_processes(); data->frozen = 0; break; @@ -371,6 +374,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, } + unlock_device_hotplug(); mutex_unlock(&pm_mutex); return error; diff --git a/kernel/res_counter.c b/kernel/res_counter.c index ff55247e7049..4aa8a305aede 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c @@ -17,8 +17,8 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent) { spin_lock_init(&counter->lock); - counter->limit = RESOURCE_MAX; - counter->soft_limit = RESOURCE_MAX; + counter->limit = RES_COUNTER_MAX; + counter->soft_limit = RES_COUNTER_MAX; counter->parent = parent; } @@ -178,23 +178,30 @@ u64 res_counter_read_u64(struct res_counter *counter, int member) #endif int res_counter_memparse_write_strategy(const char *buf, - unsigned long long *res) + unsigned long long *resp) { char *end; + unsigned long long res; - /* return RESOURCE_MAX(unlimited) if "-1" is specified */ + /* return RES_COUNTER_MAX(unlimited) if "-1" is specified */ if (*buf == '-') { - *res = simple_strtoull(buf + 1, &end, 10); - if (*res != 1 || *end != '\0') + res = simple_strtoull(buf + 1, &end, 10); + if (res != 1 || *end != '\0') return -EINVAL; - *res = RESOURCE_MAX; + *resp = RES_COUNTER_MAX; return 0; } - *res = memparse(buf, &end); + res = memparse(buf, &end); if (*end != '\0') return -EINVAL; - *res = PAGE_ALIGN(*res); + if (PAGE_ALIGN(res) >= res) + res = PAGE_ALIGN(res); + else + res = RES_COUNTER_MAX; + + *resp = res; + return 0; } diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index e076bddd4c66..196559994f7c 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -124,7 +124,7 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) SEQ_printf(m, " "); SEQ_printf(m, "%15s %5d %9Ld.%06ld %9Ld %5d ", - p->comm, p->pid, + p->comm, task_pid_nr(p), SPLIT_NS(p->se.vruntime), (long long)(p->nvcsw + p->nivcsw), p->prio); @@ -289,7 +289,7 @@ do { \ P(nr_load_updates); P(nr_uninterruptible); PN(next_balance); - P(curr->pid); + SEQ_printf(m, " .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr))); PN(clock); P(cpu_load[0]); P(cpu_load[1]); @@ -492,7 +492,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) { unsigned long nr_switches; - SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, p->pid, + SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr(p), get_nr_threads(p)); SEQ_printf(m, "---------------------------------------------------------" diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7f0a5e6cdae0..11cd13667359 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5151,7 +5151,7 @@ static int should_we_balance(struct lb_env *env) * First idle cpu or the first cpu(busiest) in this sched group * is eligible for doing load balancing at this and above domains. */ - return balance_cpu != env->dst_cpu; + return balance_cpu == env->dst_cpu; } /* @@ -5928,11 +5928,15 @@ static void task_fork_fair(struct task_struct *p) cfs_rq = task_cfs_rq(current); curr = cfs_rq->curr; - if (unlikely(task_cpu(p) != this_cpu)) { - rcu_read_lock(); - __set_task_cpu(p, this_cpu); - rcu_read_unlock(); - } + /* + * Not only the cpu but also the task_group of the parent might have + * been changed after parent->se.parent,cfs_rq were copied to + * child->se.parent,cfs_rq. So call __set_task_cpu() to make those + * of child point to valid ones. + */ + rcu_read_lock(); + __set_task_cpu(p, this_cpu); + rcu_read_unlock(); update_curr(cfs_rq); diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 5aef494fc8b4..c7edee71bce8 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -104,8 +104,9 @@ static inline void sched_info_queued(struct task_struct *t) } /* - * Called when a process ceases being the active-running process, either - * voluntarily or involuntarily. Now we can calculate how long we ran. + * Called when a process ceases being the active-running process involuntarily + * due, typically, to expiring its time slice (this may also be called when + * switching to the idle task). Now we can calculate how long we ran. * Also, if the process is still in the TASK_RUNNING state, call * sched_info_queued() to mark that it has now again started waiting on * the runqueue. diff --git a/kernel/softirq.c b/kernel/softirq.c index be3d3514c325..53cc09ceb0b8 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -876,7 +876,6 @@ int __init __weak early_irq_init(void) return 0; } -#ifdef CONFIG_GENERIC_HARDIRQS int __init __weak arch_probe_nr_irqs(void) { return NR_IRQS_LEGACY; @@ -886,4 +885,3 @@ int __init __weak arch_early_irq_init(void) { return 0; } -#endif diff --git a/kernel/sysctl.c b/kernel/sysctl.c index dc69093a8ec4..b2f06f3c6a3f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1471,14 +1471,14 @@ static struct ctl_table fs_table[] = { { .procname = "inode-nr", .data = &inodes_stat, - .maxlen = 2*sizeof(int), + .maxlen = 2*sizeof(long), .mode = 0444, .proc_handler = proc_nr_inodes, }, { .procname = "inode-state", .data = &inodes_stat, - .maxlen = 7*sizeof(int), + .maxlen = 7*sizeof(long), .mode = 0444, .proc_handler = proc_nr_inodes, }, @@ -1508,7 +1508,7 @@ static struct ctl_table fs_table[] = { { .procname = "dentry-state", .data = &dentry_stat, - .maxlen = 6*sizeof(int), + .maxlen = 6*sizeof(long), .mode = 0444, .proc_handler = proc_nr_dentry, }, diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 8f5b3b98577b..bb2215174f05 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -516,13 +516,13 @@ static void sync_cmos_clock(struct work_struct *work) schedule_delayed_work(&sync_cmos_work, timespec_to_jiffies(&next)); } -static void notify_cmos_timer(void) +void ntp_notify_cmos_timer(void) { schedule_delayed_work(&sync_cmos_work, 0); } #else -static inline void notify_cmos_timer(void) { } +void ntp_notify_cmos_timer(void) { } #endif @@ -687,8 +687,6 @@ int __do_adjtimex(struct timex *txc, struct timespec *ts, s32 *time_tai) if (!(time_status & STA_NANO)) txc->time.tv_usec /= NSEC_PER_USEC; - notify_cmos_timer(); - return result; } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 48b9fffabdc2..947ba25a95a0 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1703,6 +1703,8 @@ int do_adjtimex(struct timex *txc) write_seqcount_end(&timekeeper_seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); + ntp_notify_cmos_timer(); + return ret; } diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c9eef36739a9..06344d986eb9 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -597,7 +597,7 @@ endmenu # "Memory Debugging" config DEBUG_SHIRQ bool "Debug shared IRQ handlers" - depends on DEBUG_KERNEL && GENERIC_HARDIRQS + depends on DEBUG_KERNEL help Enable this to generate a spurious interrupt as soon as a shared interrupt handler is registered, and just before one is deregistered. diff --git a/lib/Makefile b/lib/Makefile index f2cb3082697c..f3bb2cb98adf 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -13,7 +13,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ - earlycpio.o percpu-refcount.o + earlycpio.o percpu-refcount.o percpu_ida.o obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o lib-$(CONFIG_MMU) += ioremap.o @@ -25,7 +25,8 @@ obj-y += lockref.o obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \ - bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o + bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \ + percpu_ida.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += kstrtox.o diff --git a/lib/cpu_rmap.c b/lib/cpu_rmap.c index 5fbed5caba6e..4f134d8907a7 100644 --- a/lib/cpu_rmap.c +++ b/lib/cpu_rmap.c @@ -8,9 +8,7 @@ */ #include <linux/cpu_rmap.h> -#ifdef CONFIG_GENERIC_HARDIRQS #include <linux/interrupt.h> -#endif #include <linux/export.h> /* @@ -213,8 +211,6 @@ int cpu_rmap_update(struct cpu_rmap *rmap, u16 index, } EXPORT_SYMBOL(cpu_rmap_update); -#ifdef CONFIG_GENERIC_HARDIRQS - /* Glue between IRQ affinity notifiers and CPU rmaps */ struct irq_glue { @@ -309,5 +305,3 @@ int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq) return rc; } EXPORT_SYMBOL(irq_cpu_rmap_add); - -#endif /* CONFIG_GENERIC_HARDIRQS */ diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c index 43bc5b071f96..dfe6ec17c0a5 100644 --- a/lib/crc-t10dif.c +++ b/lib/crc-t10dif.c @@ -14,8 +14,10 @@ #include <linux/err.h> #include <linux/init.h> #include <crypto/hash.h> +#include <linux/static_key.h> static struct crypto_shash *crct10dif_tfm; +static struct static_key crct10dif_fallback __read_mostly; __u16 crc_t10dif(const unsigned char *buffer, size_t len) { @@ -25,6 +27,9 @@ __u16 crc_t10dif(const unsigned char *buffer, size_t len) } desc; int err; + if (static_key_false(&crct10dif_fallback)) + return crc_t10dif_generic(0, buffer, len); + desc.shash.tfm = crct10dif_tfm; desc.shash.flags = 0; *(__u16 *)desc.ctx = 0; @@ -39,7 +44,11 @@ EXPORT_SYMBOL(crc_t10dif); static int __init crc_t10dif_mod_init(void) { crct10dif_tfm = crypto_alloc_shash("crct10dif", 0, 0); - return PTR_RET(crct10dif_tfm); + if (IS_ERR(crct10dif_tfm)) { + static_key_slow_inc(&crct10dif_fallback); + crct10dif_tfm = NULL; + } + return 0; } static void __exit crc_t10dif_mod_fini(void) diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c new file mode 100644 index 000000000000..bab1ba2a4c71 --- /dev/null +++ b/lib/percpu_ida.c @@ -0,0 +1,335 @@ +/* + * Percpu IDA library + * + * Copyright (C) 2013 Datera, Inc. Kent Overstreet + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include <linux/bitmap.h> +#include <linux/bitops.h> +#include <linux/bug.h> +#include <linux/err.h> +#include <linux/export.h> +#include <linux/hardirq.h> +#include <linux/idr.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/percpu.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/spinlock.h> +#include <linux/percpu_ida.h> + +/* + * Number of tags we move between the percpu freelist and the global freelist at + * a time + */ +#define IDA_PCPU_BATCH_MOVE 32U + +/* Max size of percpu freelist, */ +#define IDA_PCPU_SIZE ((IDA_PCPU_BATCH_MOVE * 3) / 2) + +struct percpu_ida_cpu { + /* + * Even though this is percpu, we need a lock for tag stealing by remote + * CPUs: + */ + spinlock_t lock; + + /* nr_free/freelist form a stack of free IDs */ + unsigned nr_free; + unsigned freelist[]; +}; + +static inline void move_tags(unsigned *dst, unsigned *dst_nr, + unsigned *src, unsigned *src_nr, + unsigned nr) +{ + *src_nr -= nr; + memcpy(dst + *dst_nr, src + *src_nr, sizeof(unsigned) * nr); + *dst_nr += nr; +} + +/* + * Try to steal tags from a remote cpu's percpu freelist. + * + * We first check how many percpu freelists have tags - we don't steal tags + * unless enough percpu freelists have tags on them that it's possible more than + * half the total tags could be stuck on remote percpu freelists. + * + * Then we iterate through the cpus until we find some tags - we don't attempt + * to find the "best" cpu to steal from, to keep cacheline bouncing to a + * minimum. + */ +static inline void steal_tags(struct percpu_ida *pool, + struct percpu_ida_cpu *tags) +{ + unsigned cpus_have_tags, cpu = pool->cpu_last_stolen; + struct percpu_ida_cpu *remote; + + for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags); + cpus_have_tags * IDA_PCPU_SIZE > pool->nr_tags / 2; + cpus_have_tags--) { + cpu = cpumask_next(cpu, &pool->cpus_have_tags); + + if (cpu >= nr_cpu_ids) { + cpu = cpumask_first(&pool->cpus_have_tags); + if (cpu >= nr_cpu_ids) + BUG(); + } + + pool->cpu_last_stolen = cpu; + remote = per_cpu_ptr(pool->tag_cpu, cpu); + + cpumask_clear_cpu(cpu, &pool->cpus_have_tags); + + if (remote == tags) + continue; + + spin_lock(&remote->lock); + + if (remote->nr_free) { + memcpy(tags->freelist, + remote->freelist, + sizeof(unsigned) * remote->nr_free); + + tags->nr_free = remote->nr_free; + remote->nr_free = 0; + } + + spin_unlock(&remote->lock); + + if (tags->nr_free) + break; + } +} + +/* + * Pop up to IDA_PCPU_BATCH_MOVE IDs off the global freelist, and push them onto + * our percpu freelist: + */ +static inline void alloc_global_tags(struct percpu_ida *pool, + struct percpu_ida_cpu *tags) +{ + move_tags(tags->freelist, &tags->nr_free, + pool->freelist, &pool->nr_free, + min(pool->nr_free, IDA_PCPU_BATCH_MOVE)); +} + +static inline unsigned alloc_local_tag(struct percpu_ida *pool, + struct percpu_ida_cpu *tags) +{ + int tag = -ENOSPC; + + spin_lock(&tags->lock); + if (tags->nr_free) + tag = tags->freelist[--tags->nr_free]; + spin_unlock(&tags->lock); + + return tag; +} + +/** + * percpu_ida_alloc - allocate a tag + * @pool: pool to allocate from + * @gfp: gfp flags + * + * Returns a tag - an integer in the range [0..nr_tags) (passed to + * tag_pool_init()), or otherwise -ENOSPC on allocation failure. + * + * Safe to be called from interrupt context (assuming it isn't passed + * __GFP_WAIT, of course). + * + * @gfp indicates whether or not to wait until a free id is available (it's not + * used for internal memory allocations); thus if passed __GFP_WAIT we may sleep + * however long it takes until another thread frees an id (same semantics as a + * mempool). + * + * Will not fail if passed __GFP_WAIT. + */ +int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp) +{ + DEFINE_WAIT(wait); + struct percpu_ida_cpu *tags; + unsigned long flags; + int tag; + + local_irq_save(flags); + tags = this_cpu_ptr(pool->tag_cpu); + + /* Fastpath */ + tag = alloc_local_tag(pool, tags); + if (likely(tag >= 0)) { + local_irq_restore(flags); + return tag; + } + + while (1) { + spin_lock(&pool->lock); + + /* + * prepare_to_wait() must come before steal_tags(), in case + * percpu_ida_free() on another cpu flips a bit in + * cpus_have_tags + * + * global lock held and irqs disabled, don't need percpu lock + */ + prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); + + if (!tags->nr_free) + alloc_global_tags(pool, tags); + if (!tags->nr_free) + steal_tags(pool, tags); + + if (tags->nr_free) { + tag = tags->freelist[--tags->nr_free]; + if (tags->nr_free) + cpumask_set_cpu(smp_processor_id(), + &pool->cpus_have_tags); + } + + spin_unlock(&pool->lock); + local_irq_restore(flags); + + if (tag >= 0 || !(gfp & __GFP_WAIT)) + break; + + schedule(); + + local_irq_save(flags); + tags = this_cpu_ptr(pool->tag_cpu); + } + + finish_wait(&pool->wait, &wait); + return tag; +} +EXPORT_SYMBOL_GPL(percpu_ida_alloc); + +/** + * percpu_ida_free - free a tag + * @pool: pool @tag was allocated from + * @tag: a tag previously allocated with percpu_ida_alloc() + * + * Safe to be called from interrupt context. + */ +void percpu_ida_free(struct percpu_ida *pool, unsigned tag) +{ + struct percpu_ida_cpu *tags; + unsigned long flags; + unsigned nr_free; + + BUG_ON(tag >= pool->nr_tags); + + local_irq_save(flags); + tags = this_cpu_ptr(pool->tag_cpu); + + spin_lock(&tags->lock); + tags->freelist[tags->nr_free++] = tag; + + nr_free = tags->nr_free; + spin_unlock(&tags->lock); + + if (nr_free == 1) { + cpumask_set_cpu(smp_processor_id(), + &pool->cpus_have_tags); + wake_up(&pool->wait); + } + + if (nr_free == IDA_PCPU_SIZE) { + spin_lock(&pool->lock); + + /* + * Global lock held and irqs disabled, don't need percpu + * lock + */ + if (tags->nr_free == IDA_PCPU_SIZE) { + move_tags(pool->freelist, &pool->nr_free, + tags->freelist, &tags->nr_free, + IDA_PCPU_BATCH_MOVE); + + wake_up(&pool->wait); + } + spin_unlock(&pool->lock); + } + + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(percpu_ida_free); + +/** + * percpu_ida_destroy - release a tag pool's resources + * @pool: pool to free + * + * Frees the resources allocated by percpu_ida_init(). + */ +void percpu_ida_destroy(struct percpu_ida *pool) +{ + free_percpu(pool->tag_cpu); + free_pages((unsigned long) pool->freelist, + get_order(pool->nr_tags * sizeof(unsigned))); +} +EXPORT_SYMBOL_GPL(percpu_ida_destroy); + +/** + * percpu_ida_init - initialize a percpu tag pool + * @pool: pool to initialize + * @nr_tags: number of tags that will be available for allocation + * + * Initializes @pool so that it can be used to allocate tags - integers in the + * range [0, nr_tags). Typically, they'll be used by driver code to refer to a + * preallocated array of tag structures. + * + * Allocation is percpu, but sharding is limited by nr_tags - for best + * performance, the workload should not span more cpus than nr_tags / 128. + */ +int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags) +{ + unsigned i, cpu, order; + + memset(pool, 0, sizeof(*pool)); + + init_waitqueue_head(&pool->wait); + spin_lock_init(&pool->lock); + pool->nr_tags = nr_tags; + + /* Guard against overflow */ + if (nr_tags > (unsigned) INT_MAX + 1) { + pr_err("percpu_ida_init(): nr_tags too large\n"); + return -EINVAL; + } + + order = get_order(nr_tags * sizeof(unsigned)); + pool->freelist = (void *) __get_free_pages(GFP_KERNEL, order); + if (!pool->freelist) + return -ENOMEM; + + for (i = 0; i < nr_tags; i++) + pool->freelist[i] = i; + + pool->nr_free = nr_tags; + + pool->tag_cpu = __alloc_percpu(sizeof(struct percpu_ida_cpu) + + IDA_PCPU_SIZE * sizeof(unsigned), + sizeof(unsigned)); + if (!pool->tag_cpu) + goto err; + + for_each_possible_cpu(cpu) + spin_lock_init(&per_cpu_ptr(pool->tag_cpu, cpu)->lock); + + return 0; +err: + percpu_ida_destroy(pool); + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(percpu_ida_init); diff --git a/mm/Kconfig b/mm/Kconfig index 6cdd27043303..026771a9b097 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -245,7 +245,7 @@ config COMPACTION config MIGRATION bool "Page migration" def_bool y - depends on NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE || COMPACTION || CMA + depends on (NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE || COMPACTION || CMA) && MMU help Allows the migration of the physical location of pages of processes while the virtual addresses are not changed. This is useful in @@ -480,7 +480,7 @@ config FRONTSWAP config CMA bool "Contiguous Memory Allocator" - depends on HAVE_MEMBLOCK + depends on HAVE_MEMBLOCK && MMU select MIGRATION select MEMORY_ISOLATION help diff --git a/mm/Makefile b/mm/Makefile index f00803386a67..305d10acd081 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -17,7 +17,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ util.o mmzone.o vmstat.o backing-dev.o \ mm_init.o mmu_context.o percpu.o slab_common.o \ compaction.o balloon_compaction.o \ - interval_tree.o $(mmu-y) + interval_tree.o list_lru.o $(mmu-y) obj-y += init-mm.o diff --git a/mm/filemap.c b/mm/filemap.c index e607728db4a8..1e6aec4a2d2e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -467,32 +467,34 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, error = mem_cgroup_cache_charge(page, current->mm, gfp_mask & GFP_RECLAIM_MASK); if (error) - goto out; + return error; error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM); - if (error == 0) { - page_cache_get(page); - page->mapping = mapping; - page->index = offset; - - spin_lock_irq(&mapping->tree_lock); - error = radix_tree_insert(&mapping->page_tree, offset, page); - if (likely(!error)) { - mapping->nrpages++; - __inc_zone_page_state(page, NR_FILE_PAGES); - spin_unlock_irq(&mapping->tree_lock); - trace_mm_filemap_add_to_page_cache(page); - } else { - page->mapping = NULL; - /* Leave page->index set: truncation relies upon it */ - spin_unlock_irq(&mapping->tree_lock); - mem_cgroup_uncharge_cache_page(page); - page_cache_release(page); - } - radix_tree_preload_end(); - } else + if (error) { mem_cgroup_uncharge_cache_page(page); -out: + return error; + } + + page_cache_get(page); + page->mapping = mapping; + page->index = offset; + + spin_lock_irq(&mapping->tree_lock); + error = radix_tree_insert(&mapping->page_tree, offset, page); + radix_tree_preload_end(); + if (unlikely(error)) + goto err_insert; + mapping->nrpages++; + __inc_zone_page_state(page, NR_FILE_PAGES); + spin_unlock_irq(&mapping->tree_lock); + trace_mm_filemap_add_to_page_cache(page); + return 0; +err_insert: + page->mapping = NULL; + /* Leave page->index set: truncation relies upon it */ + spin_unlock_irq(&mapping->tree_lock); + mem_cgroup_uncharge_cache_page(page); + page_cache_release(page); return error; } EXPORT_SYMBOL(add_to_page_cache_locked); @@ -1614,6 +1616,7 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) struct inode *inode = mapping->host; pgoff_t offset = vmf->pgoff; struct page *page; + bool memcg_oom; pgoff_t size; int ret = 0; @@ -1622,7 +1625,11 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return VM_FAULT_SIGBUS; /* - * Do we have something in the page cache already? + * Do we have something in the page cache already? Either + * way, try readahead, but disable the memcg OOM killer for it + * as readahead is optional and no errors are propagated up + * the fault stack. The OOM killer is enabled while trying to + * instantiate the faulting page individually below. */ page = find_get_page(mapping, offset); if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) { @@ -1630,10 +1637,14 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) * We found the page, so try async readahead before * waiting for the lock. */ + memcg_oom = mem_cgroup_toggle_oom(false); do_async_mmap_readahead(vma, ra, file, page, offset); + mem_cgroup_toggle_oom(memcg_oom); } else if (!page) { /* No page in the page cache at all */ + memcg_oom = mem_cgroup_toggle_oom(false); do_sync_mmap_readahead(vma, ra, file, offset); + mem_cgroup_toggle_oom(memcg_oom); count_vm_event(PGMAJFAULT); mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); ret = VM_FAULT_MAJOR; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 963e14c0486f..7489884682d8 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -211,24 +211,29 @@ static void put_huge_zero_page(void) BUG_ON(atomic_dec_and_test(&huge_zero_refcount)); } -static int shrink_huge_zero_page(struct shrinker *shrink, - struct shrink_control *sc) +static unsigned long shrink_huge_zero_page_count(struct shrinker *shrink, + struct shrink_control *sc) { - if (!sc->nr_to_scan) - /* we can free zero page only if last reference remains */ - return atomic_read(&huge_zero_refcount) == 1 ? HPAGE_PMD_NR : 0; + /* we can free zero page only if last reference remains */ + return atomic_read(&huge_zero_refcount) == 1 ? HPAGE_PMD_NR : 0; +} +static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink, + struct shrink_control *sc) +{ if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) { struct page *zero_page = xchg(&huge_zero_page, NULL); BUG_ON(zero_page == NULL); __free_page(zero_page); + return HPAGE_PMD_NR; } return 0; } static struct shrinker huge_zero_page_shrinker = { - .shrink = shrink_huge_zero_page, + .count_objects = shrink_huge_zero_page_count, + .scan_objects = shrink_huge_zero_page_scan, .seeks = DEFAULT_SEEKS, }; @@ -690,11 +695,10 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) return pmd; } -static inline pmd_t mk_huge_pmd(struct page *page, struct vm_area_struct *vma) +static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot) { pmd_t entry; - entry = mk_pmd(page, vma->vm_page_prot); - entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); + entry = mk_pmd(page, prot); entry = pmd_mkhuge(entry); return entry; } @@ -727,7 +731,8 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, pte_free(mm, pgtable); } else { pmd_t entry; - entry = mk_huge_pmd(page, vma); + entry = mk_huge_pmd(page, vma->vm_page_prot); + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); page_add_new_anon_rmap(page, vma, haddr); pgtable_trans_huge_deposit(mm, pmd, pgtable); set_pmd_at(mm, haddr, pmd, entry); @@ -783,77 +788,57 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, { struct page *page; unsigned long haddr = address & HPAGE_PMD_MASK; - pte_t *pte; - if (haddr >= vma->vm_start && haddr + HPAGE_PMD_SIZE <= vma->vm_end) { - if (unlikely(anon_vma_prepare(vma))) - return VM_FAULT_OOM; - if (unlikely(khugepaged_enter(vma))) + if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end) + return VM_FAULT_FALLBACK; + if (unlikely(anon_vma_prepare(vma))) + return VM_FAULT_OOM; + if (unlikely(khugepaged_enter(vma))) + return VM_FAULT_OOM; + if (!(flags & FAULT_FLAG_WRITE) && + transparent_hugepage_use_zero_page()) { + pgtable_t pgtable; + struct page *zero_page; + bool set; + pgtable = pte_alloc_one(mm, haddr); + if (unlikely(!pgtable)) return VM_FAULT_OOM; - if (!(flags & FAULT_FLAG_WRITE) && - transparent_hugepage_use_zero_page()) { - pgtable_t pgtable; - struct page *zero_page; - bool set; - pgtable = pte_alloc_one(mm, haddr); - if (unlikely(!pgtable)) - return VM_FAULT_OOM; - zero_page = get_huge_zero_page(); - if (unlikely(!zero_page)) { - pte_free(mm, pgtable); - count_vm_event(THP_FAULT_FALLBACK); - goto out; - } - spin_lock(&mm->page_table_lock); - set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd, - zero_page); - spin_unlock(&mm->page_table_lock); - if (!set) { - pte_free(mm, pgtable); - put_huge_zero_page(); - } - return 0; - } - page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), - vma, haddr, numa_node_id(), 0); - if (unlikely(!page)) { + zero_page = get_huge_zero_page(); + if (unlikely(!zero_page)) { + pte_free(mm, pgtable); count_vm_event(THP_FAULT_FALLBACK); - goto out; + return VM_FAULT_FALLBACK; } - count_vm_event(THP_FAULT_ALLOC); - if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { - put_page(page); - goto out; - } - if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, - page))) { - mem_cgroup_uncharge_page(page); - put_page(page); - goto out; + spin_lock(&mm->page_table_lock); + set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd, + zero_page); + spin_unlock(&mm->page_table_lock); + if (!set) { + pte_free(mm, pgtable); + put_huge_zero_page(); } - return 0; } -out: - /* - * Use __pte_alloc instead of pte_alloc_map, because we can't - * run pte_offset_map on the pmd, if an huge pmd could - * materialize from under us from a different thread. - */ - if (unlikely(pmd_none(*pmd)) && - unlikely(__pte_alloc(mm, vma, pmd, address))) - return VM_FAULT_OOM; - /* if an huge pmd materialized from under us just retry later */ - if (unlikely(pmd_trans_huge(*pmd))) - return 0; - /* - * A regular pmd is established and it can't morph into a huge pmd - * from under us anymore at this point because we hold the mmap_sem - * read mode and khugepaged takes it in write mode. So now it's - * safe to run pte_offset_map(). - */ - pte = pte_offset_map(pmd, address); - return handle_pte_fault(mm, vma, address, pte, pmd, flags); + page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), + vma, haddr, numa_node_id(), 0); + if (unlikely(!page)) { + count_vm_event(THP_FAULT_FALLBACK); + return VM_FAULT_FALLBACK; + } + if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { + put_page(page); + count_vm_event(THP_FAULT_FALLBACK); + return VM_FAULT_FALLBACK; + } + if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) { + mem_cgroup_uncharge_page(page); + put_page(page); + count_vm_event(THP_FAULT_FALLBACK); + return VM_FAULT_FALLBACK; + } + + count_vm_event(THP_FAULT_ALLOC); + return 0; } int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, @@ -1165,7 +1150,6 @@ alloc: new_page = NULL; if (unlikely(!new_page)) { - count_vm_event(THP_FAULT_FALLBACK); if (is_huge_zero_pmd(orig_pmd)) { ret = do_huge_pmd_wp_zero_page_fallback(mm, vma, address, pmd, orig_pmd, haddr); @@ -1176,9 +1160,9 @@ alloc: split_huge_page(page); put_page(page); } + count_vm_event(THP_FAULT_FALLBACK); goto out; } - count_vm_event(THP_FAULT_ALLOC); if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { put_page(new_page); @@ -1186,10 +1170,13 @@ alloc: split_huge_page(page); put_page(page); } + count_vm_event(THP_FAULT_FALLBACK); ret |= VM_FAULT_OOM; goto out; } + count_vm_event(THP_FAULT_ALLOC); + if (is_huge_zero_pmd(orig_pmd)) clear_huge_page(new_page, haddr, HPAGE_PMD_NR); else @@ -1210,7 +1197,8 @@ alloc: goto out_mn; } else { pmd_t entry; - entry = mk_huge_pmd(new_page, vma); + entry = mk_huge_pmd(new_page, vma->vm_page_prot); + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); pmdp_clear_flush(vma, haddr, pmd); page_add_new_anon_rmap(new_page, vma, haddr); set_pmd_at(mm, haddr, pmd, entry); @@ -1661,7 +1649,6 @@ static void __split_huge_page_refcount(struct page *page, BUG_ON(atomic_read(&page->_count) <= 0); __mod_zone_page_state(zone, NR_ANON_TRANSPARENT_HUGEPAGES, -1); - __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR); ClearPageCompound(page); compound_unlock(page); @@ -2359,7 +2346,8 @@ static void collapse_huge_page(struct mm_struct *mm, __SetPageUptodate(new_page); pgtable = pmd_pgtable(_pmd); - _pmd = mk_huge_pmd(new_page, vma); + _pmd = mk_huge_pmd(new_page, vma->vm_page_prot); + _pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma); /* * spin_lock() below is not the equivalent of smp_wmb(), so diff --git a/mm/list_lru.c b/mm/list_lru.c new file mode 100644 index 000000000000..72467914b856 --- /dev/null +++ b/mm/list_lru.c @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. + * Authors: David Chinner and Glauber Costa + * + * Generic LRU infrastructure + */ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/list_lru.h> +#include <linux/slab.h> + +bool list_lru_add(struct list_lru *lru, struct list_head *item) +{ + int nid = page_to_nid(virt_to_page(item)); + struct list_lru_node *nlru = &lru->node[nid]; + + spin_lock(&nlru->lock); + WARN_ON_ONCE(nlru->nr_items < 0); + if (list_empty(item)) { + list_add_tail(item, &nlru->list); + if (nlru->nr_items++ == 0) + node_set(nid, lru->active_nodes); + spin_unlock(&nlru->lock); + return true; + } + spin_unlock(&nlru->lock); + return false; +} +EXPORT_SYMBOL_GPL(list_lru_add); + +bool list_lru_del(struct list_lru *lru, struct list_head *item) +{ + int nid = page_to_nid(virt_to_page(item)); + struct list_lru_node *nlru = &lru->node[nid]; + + spin_lock(&nlru->lock); + if (!list_empty(item)) { + list_del_init(item); + if (--nlru->nr_items == 0) + node_clear(nid, lru->active_nodes); + WARN_ON_ONCE(nlru->nr_items < 0); + spin_unlock(&nlru->lock); + return true; + } + spin_unlock(&nlru->lock); + return false; +} +EXPORT_SYMBOL_GPL(list_lru_del); + +unsigned long +list_lru_count_node(struct list_lru *lru, int nid) +{ + unsigned long count = 0; + struct list_lru_node *nlru = &lru->node[nid]; + + spin_lock(&nlru->lock); + WARN_ON_ONCE(nlru->nr_items < 0); + count += nlru->nr_items; + spin_unlock(&nlru->lock); + + return count; +} +EXPORT_SYMBOL_GPL(list_lru_count_node); + +unsigned long +list_lru_walk_node(struct list_lru *lru, int nid, list_lru_walk_cb isolate, + void *cb_arg, unsigned long *nr_to_walk) +{ + + struct list_lru_node *nlru = &lru->node[nid]; + struct list_head *item, *n; + unsigned long isolated = 0; + + spin_lock(&nlru->lock); +restart: + list_for_each_safe(item, n, &nlru->list) { + enum lru_status ret; + + /* + * decrement nr_to_walk first so that we don't livelock if we + * get stuck on large numbesr of LRU_RETRY items + */ + if (--(*nr_to_walk) == 0) + break; + + ret = isolate(item, &nlru->lock, cb_arg); + switch (ret) { + case LRU_REMOVED: + if (--nlru->nr_items == 0) + node_clear(nid, lru->active_nodes); + WARN_ON_ONCE(nlru->nr_items < 0); + isolated++; + break; + case LRU_ROTATE: + list_move_tail(item, &nlru->list); + break; + case LRU_SKIP: + break; + case LRU_RETRY: + /* + * The lru lock has been dropped, our list traversal is + * now invalid and so we have to restart from scratch. + */ + goto restart; + default: + BUG(); + } + } + + spin_unlock(&nlru->lock); + return isolated; +} +EXPORT_SYMBOL_GPL(list_lru_walk_node); + +int list_lru_init(struct list_lru *lru) +{ + int i; + size_t size = sizeof(*lru->node) * nr_node_ids; + + lru->node = kzalloc(size, GFP_KERNEL); + if (!lru->node) + return -ENOMEM; + + nodes_clear(lru->active_nodes); + for (i = 0; i < nr_node_ids; i++) { + spin_lock_init(&lru->node[i].lock); + INIT_LIST_HEAD(&lru->node[i].list); + lru->node[i].nr_items = 0; + } + return 0; +} +EXPORT_SYMBOL_GPL(list_lru_init); + +void list_lru_destroy(struct list_lru *lru) +{ + kfree(lru->node); +} +EXPORT_SYMBOL_GPL(list_lru_destroy); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c6bd28edd533..d5ff3ce13029 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -39,7 +39,6 @@ #include <linux/limits.h> #include <linux/export.h> #include <linux/mutex.h> -#include <linux/rbtree.h> #include <linux/slab.h> #include <linux/swap.h> #include <linux/swapops.h> @@ -85,26 +84,12 @@ static int really_do_swap_account __initdata = 0; #endif -/* - * Statistics for memory cgroup. - */ -enum mem_cgroup_stat_index { - /* - * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss. - */ - MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */ - MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */ - MEM_CGROUP_STAT_RSS_HUGE, /* # of pages charged as anon huge */ - MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */ - MEM_CGROUP_STAT_SWAP, /* # of pages, swapped out */ - MEM_CGROUP_STAT_NSTATS, -}; - static const char * const mem_cgroup_stat_names[] = { "cache", "rss", "rss_huge", "mapped_file", + "writeback", "swap", }; @@ -175,10 +160,6 @@ struct mem_cgroup_per_zone { struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1]; - struct rb_node tree_node; /* RB tree node */ - unsigned long long usage_in_excess;/* Set to the value by which */ - /* the soft limit is exceeded*/ - bool on_tree; struct mem_cgroup *memcg; /* Back pointer, we cannot */ /* use container_of */ }; @@ -187,26 +168,6 @@ struct mem_cgroup_per_node { struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES]; }; -/* - * Cgroups above their limits are maintained in a RB-Tree, independent of - * their hierarchy representation - */ - -struct mem_cgroup_tree_per_zone { - struct rb_root rb_root; - spinlock_t lock; -}; - -struct mem_cgroup_tree_per_node { - struct mem_cgroup_tree_per_zone rb_tree_per_zone[MAX_NR_ZONES]; -}; - -struct mem_cgroup_tree { - struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES]; -}; - -static struct mem_cgroup_tree soft_limit_tree __read_mostly; - struct mem_cgroup_threshold { struct eventfd_ctx *eventfd; u64 threshold; @@ -280,6 +241,7 @@ struct mem_cgroup { bool oom_lock; atomic_t under_oom; + atomic_t oom_wakeups; int swappiness; /* OOM-Killer disable */ @@ -304,7 +266,7 @@ struct mem_cgroup { * Should we move charges of a task when a task is moved into this * mem_cgroup ? And what type of charges should we move ? */ - unsigned long move_charge_at_immigrate; + unsigned long move_charge_at_immigrate; /* * set > 0 if pages under this cgroup are moving to other cgroup. */ @@ -341,6 +303,22 @@ struct mem_cgroup { atomic_t numainfo_events; atomic_t numainfo_updating; #endif + /* + * Protects soft_contributed transitions. + * See mem_cgroup_update_soft_limit + */ + spinlock_t soft_lock; + + /* + * If true then this group has increased parents' children_in_excess + * when it got over the soft limit. + * When a group falls bellow the soft limit, parents' children_in_excess + * is decreased and soft_contributed changed to false. + */ + bool soft_contributed; + + /* Number of children that are in soft limit excess */ + atomic_t children_in_excess; struct mem_cgroup_per_node *nodeinfo[0]; /* WARNING: nodeinfo must be the last member here */ @@ -444,7 +422,6 @@ static bool move_file(void) * limit reclaim to prevent infinite loops, if they ever occur. */ #define MEM_CGROUP_MAX_RECLAIM_LOOPS 100 -#define MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS 2 enum charge_type { MEM_CGROUP_CHARGE_TYPE_CACHE = 0, @@ -671,164 +648,6 @@ page_cgroup_zoneinfo(struct mem_cgroup *memcg, struct page *page) return mem_cgroup_zoneinfo(memcg, nid, zid); } -static struct mem_cgroup_tree_per_zone * -soft_limit_tree_node_zone(int nid, int zid) -{ - return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid]; -} - -static struct mem_cgroup_tree_per_zone * -soft_limit_tree_from_page(struct page *page) -{ - int nid = page_to_nid(page); - int zid = page_zonenum(page); - - return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid]; -} - -static void -__mem_cgroup_insert_exceeded(struct mem_cgroup *memcg, - struct mem_cgroup_per_zone *mz, - struct mem_cgroup_tree_per_zone *mctz, - unsigned long long new_usage_in_excess) -{ - struct rb_node **p = &mctz->rb_root.rb_node; - struct rb_node *parent = NULL; - struct mem_cgroup_per_zone *mz_node; - - if (mz->on_tree) - return; - - mz->usage_in_excess = new_usage_in_excess; - if (!mz->usage_in_excess) - return; - while (*p) { - parent = *p; - mz_node = rb_entry(parent, struct mem_cgroup_per_zone, - tree_node); - if (mz->usage_in_excess < mz_node->usage_in_excess) - p = &(*p)->rb_left; - /* - * We can't avoid mem cgroups that are over their soft - * limit by the same amount - */ - else if (mz->usage_in_excess >= mz_node->usage_in_excess) - p = &(*p)->rb_right; - } - rb_link_node(&mz->tree_node, parent, p); - rb_insert_color(&mz->tree_node, &mctz->rb_root); - mz->on_tree = true; -} - -static void -__mem_cgroup_remove_exceeded(struct mem_cgroup *memcg, - struct mem_cgroup_per_zone *mz, - struct mem_cgroup_tree_per_zone *mctz) -{ - if (!mz->on_tree) - return; - rb_erase(&mz->tree_node, &mctz->rb_root); - mz->on_tree = false; -} - -static void -mem_cgroup_remove_exceeded(struct mem_cgroup *memcg, - struct mem_cgroup_per_zone *mz, - struct mem_cgroup_tree_per_zone *mctz) -{ - spin_lock(&mctz->lock); - __mem_cgroup_remove_exceeded(memcg, mz, mctz); - spin_unlock(&mctz->lock); -} - - -static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) -{ - unsigned long long excess; - struct mem_cgroup_per_zone *mz; - struct mem_cgroup_tree_per_zone *mctz; - int nid = page_to_nid(page); - int zid = page_zonenum(page); - mctz = soft_limit_tree_from_page(page); - - /* - * Necessary to update all ancestors when hierarchy is used. - * because their event counter is not touched. - */ - for (; memcg; memcg = parent_mem_cgroup(memcg)) { - mz = mem_cgroup_zoneinfo(memcg, nid, zid); - excess = res_counter_soft_limit_excess(&memcg->res); - /* - * We have to update the tree if mz is on RB-tree or - * mem is over its softlimit. - */ - if (excess || mz->on_tree) { - spin_lock(&mctz->lock); - /* if on-tree, remove it */ - if (mz->on_tree) - __mem_cgroup_remove_exceeded(memcg, mz, mctz); - /* - * Insert again. mz->usage_in_excess will be updated. - * If excess is 0, no tree ops. - */ - __mem_cgroup_insert_exceeded(memcg, mz, mctz, excess); - spin_unlock(&mctz->lock); - } - } -} - -static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg) -{ - int node, zone; - struct mem_cgroup_per_zone *mz; - struct mem_cgroup_tree_per_zone *mctz; - - for_each_node(node) { - for (zone = 0; zone < MAX_NR_ZONES; zone++) { - mz = mem_cgroup_zoneinfo(memcg, node, zone); - mctz = soft_limit_tree_node_zone(node, zone); - mem_cgroup_remove_exceeded(memcg, mz, mctz); - } - } -} - -static struct mem_cgroup_per_zone * -__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) -{ - struct rb_node *rightmost = NULL; - struct mem_cgroup_per_zone *mz; - -retry: - mz = NULL; - rightmost = rb_last(&mctz->rb_root); - if (!rightmost) - goto done; /* Nothing to reclaim from */ - - mz = rb_entry(rightmost, struct mem_cgroup_per_zone, tree_node); - /* - * Remove the node now but someone else can add it back, - * we will to add it back at the end of reclaim to its correct - * position in the tree. - */ - __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz); - if (!res_counter_soft_limit_excess(&mz->memcg->res) || - !css_tryget(&mz->memcg->css)) - goto retry; -done: - return mz; -} - -static struct mem_cgroup_per_zone * -mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) -{ - struct mem_cgroup_per_zone *mz; - - spin_lock(&mctz->lock); - mz = __mem_cgroup_largest_soft_limit_node(mctz); - spin_unlock(&mctz->lock); - return mz; -} - /* * Implementation Note: reading percpu statistics for memcg. * @@ -1003,6 +822,48 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, } /* + * Called from rate-limited memcg_check_events when enough + * MEM_CGROUP_TARGET_SOFTLIMIT events are accumulated and it makes sure + * that all the parents up the hierarchy will be notified that this group + * is in excess or that it is not in excess anymore. mmecg->soft_contributed + * makes the transition a single action whenever the state flips from one to + * the other. + */ +static void mem_cgroup_update_soft_limit(struct mem_cgroup *memcg) +{ + unsigned long long excess = res_counter_soft_limit_excess(&memcg->res); + struct mem_cgroup *parent = memcg; + int delta = 0; + + spin_lock(&memcg->soft_lock); + if (excess) { + if (!memcg->soft_contributed) { + delta = 1; + memcg->soft_contributed = true; + } + } else { + if (memcg->soft_contributed) { + delta = -1; + memcg->soft_contributed = false; + } + } + + /* + * Necessary to update all ancestors when hierarchy is used + * because their event counter is not touched. + * We track children even outside the hierarchy for the root + * cgroup because tree walk starting at root should visit + * all cgroups and we want to prevent from pointless tree + * walk if no children is below the limit. + */ + while (delta && (parent = parent_mem_cgroup(parent))) + atomic_add(delta, &parent->children_in_excess); + if (memcg != root_mem_cgroup && !root_mem_cgroup->use_hierarchy) + atomic_add(delta, &root_mem_cgroup->children_in_excess); + spin_unlock(&memcg->soft_lock); +} + +/* * Check events in order. * */ @@ -1025,7 +886,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) mem_cgroup_threshold(memcg); if (unlikely(do_softlimit)) - mem_cgroup_update_tree(memcg, page); + mem_cgroup_update_soft_limit(memcg); #if MAX_NUMNODES > 1 if (unlikely(do_numainfo)) atomic_inc(&memcg->numainfo_events); @@ -1068,6 +929,15 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) return memcg; } +static enum mem_cgroup_filter_t +mem_cgroup_filter(struct mem_cgroup *memcg, struct mem_cgroup *root, + mem_cgroup_iter_filter cond) +{ + if (!cond) + return VISIT; + return cond(memcg, root); +} + /* * Returns a next (in a pre-order walk) alive memcg (with elevated css * ref. count) or NULL if the whole root's subtree has been visited. @@ -1075,7 +945,7 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) * helper function to be used by mem_cgroup_iter */ static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root, - struct mem_cgroup *last_visited) + struct mem_cgroup *last_visited, mem_cgroup_iter_filter cond) { struct cgroup_subsys_state *prev_css, *next_css; @@ -1093,11 +963,31 @@ skip_node: if (next_css) { struct mem_cgroup *mem = mem_cgroup_from_css(next_css); - if (css_tryget(&mem->css)) - return mem; - else { + switch (mem_cgroup_filter(mem, root, cond)) { + case SKIP: prev_css = next_css; goto skip_node; + case SKIP_TREE: + if (mem == root) + return NULL; + /* + * css_rightmost_descendant is not an optimal way to + * skip through a subtree (especially for imbalanced + * trees leaning to right) but that's what we have right + * now. More effective solution would be traversing + * right-up for first non-NULL without calling + * css_next_descendant_pre afterwards. + */ + prev_css = css_rightmost_descendant(next_css); + goto skip_node; + case VISIT: + if (css_tryget(&mem->css)) + return mem; + else { + prev_css = next_css; + goto skip_node; + } + break; } } @@ -1161,6 +1051,7 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter, * @root: hierarchy root * @prev: previously returned memcg, NULL on first invocation * @reclaim: cookie for shared reclaim walks, NULL for full walks + * @cond: filter for visited nodes, NULL for no filter * * Returns references to children of the hierarchy below @root, or * @root itself, or %NULL after a full round-trip. @@ -1173,15 +1064,18 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter, * divide up the memcgs in the hierarchy among all concurrent * reclaimers operating on the same zone and priority. */ -struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, +struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root, struct mem_cgroup *prev, - struct mem_cgroup_reclaim_cookie *reclaim) + struct mem_cgroup_reclaim_cookie *reclaim, + mem_cgroup_iter_filter cond) { struct mem_cgroup *memcg = NULL; struct mem_cgroup *last_visited = NULL; - if (mem_cgroup_disabled()) - return NULL; + if (mem_cgroup_disabled()) { + /* first call must return non-NULL, second return NULL */ + return (struct mem_cgroup *)(unsigned long)!prev; + } if (!root) root = root_mem_cgroup; @@ -1192,7 +1086,9 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, if (!root->use_hierarchy && root != root_mem_cgroup) { if (prev) goto out_css_put; - return root; + if (mem_cgroup_filter(root, root, cond) == VISIT) + return root; + return NULL; } rcu_read_lock(); @@ -1215,7 +1111,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, last_visited = mem_cgroup_iter_load(iter, root, &seq); } - memcg = __mem_cgroup_iter_next(root, last_visited); + memcg = __mem_cgroup_iter_next(root, last_visited, cond); if (reclaim) { mem_cgroup_iter_update(iter, last_visited, memcg, seq); @@ -1226,7 +1122,11 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, reclaim->generation = iter->generation; } - if (prev && !memcg) + /* + * We have finished the whole tree walk or no group has been + * visited because filter told us to skip the root node. + */ + if (!memcg && (prev || (cond && !last_visited))) goto out_unlock; } out_unlock: @@ -1867,6 +1767,7 @@ static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg, return total; } +#if MAX_NUMNODES > 1 /** * test_mem_cgroup_node_reclaimable * @memcg: the target memcg @@ -1889,7 +1790,6 @@ static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *memcg, return false; } -#if MAX_NUMNODES > 1 /* * Always updating the nodemask is not very good - even if we have an empty @@ -1957,115 +1857,64 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg) return node; } -/* - * Check all nodes whether it contains reclaimable pages or not. - * For quick scan, we make use of scan_nodes. This will allow us to skip - * unused nodes. But scan_nodes is lazily updated and may not cotain - * enough new information. We need to do double check. - */ -static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap) -{ - int nid; - - /* - * quick check...making use of scan_node. - * We can skip unused nodes. - */ - if (!nodes_empty(memcg->scan_nodes)) { - for (nid = first_node(memcg->scan_nodes); - nid < MAX_NUMNODES; - nid = next_node(nid, memcg->scan_nodes)) { - - if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap)) - return true; - } - } - /* - * Check rest of nodes. - */ - for_each_node_state(nid, N_MEMORY) { - if (node_isset(nid, memcg->scan_nodes)) - continue; - if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap)) - return true; - } - return false; -} - #else int mem_cgroup_select_victim_node(struct mem_cgroup *memcg) { return 0; } -static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap) -{ - return test_mem_cgroup_node_reclaimable(memcg, 0, noswap); -} #endif -static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg, - struct zone *zone, - gfp_t gfp_mask, - unsigned long *total_scanned) -{ - struct mem_cgroup *victim = NULL; - int total = 0; - int loop = 0; - unsigned long excess; - unsigned long nr_scanned; - struct mem_cgroup_reclaim_cookie reclaim = { - .zone = zone, - .priority = 0, - }; - - excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT; - - while (1) { - victim = mem_cgroup_iter(root_memcg, victim, &reclaim); - if (!victim) { - loop++; - if (loop >= 2) { - /* - * If we have not been able to reclaim - * anything, it might because there are - * no reclaimable pages under this hierarchy - */ - if (!total) - break; - /* - * We want to do more targeted reclaim. - * excess >> 2 is not to excessive so as to - * reclaim too much, nor too less that we keep - * coming back to reclaim from this cgroup - */ - if (total >= (excess >> 2) || - (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS)) - break; - } - continue; - } - if (!mem_cgroup_reclaimable(victim, false)) - continue; - total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false, - zone, &nr_scanned); - *total_scanned += nr_scanned; - if (!res_counter_soft_limit_excess(&root_memcg->res)) +/* + * A group is eligible for the soft limit reclaim under the given root + * hierarchy if + * a) it is over its soft limit + * b) any parent up the hierarchy is over its soft limit + * + * If the given group doesn't have any children over the limit then it + * doesn't make any sense to iterate its subtree. + */ +enum mem_cgroup_filter_t +mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg, + struct mem_cgroup *root) +{ + struct mem_cgroup *parent; + + if (!memcg) + memcg = root_mem_cgroup; + parent = memcg; + + if (res_counter_soft_limit_excess(&memcg->res)) + return VISIT; + + /* + * If any parent up to the root in the hierarchy is over its soft limit + * then we have to obey and reclaim from this group as well. + */ + while ((parent = parent_mem_cgroup(parent))) { + if (res_counter_soft_limit_excess(&parent->res)) + return VISIT; + if (parent == root) break; } - mem_cgroup_iter_break(root_memcg, victim); - return total; + + if (!atomic_read(&memcg->children_in_excess)) + return SKIP_TREE; + return SKIP; } +static DEFINE_SPINLOCK(memcg_oom_lock); + /* * Check OOM-Killer is already running under our hierarchy. * If someone is running, return false. - * Has to be called with memcg_oom_lock */ -static bool mem_cgroup_oom_lock(struct mem_cgroup *memcg) +static bool mem_cgroup_oom_trylock(struct mem_cgroup *memcg) { struct mem_cgroup *iter, *failed = NULL; + spin_lock(&memcg_oom_lock); + for_each_mem_cgroup_tree(iter, memcg) { if (iter->oom_lock) { /* @@ -2079,33 +1928,33 @@ static bool mem_cgroup_oom_lock(struct mem_cgroup *memcg) iter->oom_lock = true; } - if (!failed) - return true; - - /* - * OK, we failed to lock the whole subtree so we have to clean up - * what we set up to the failing subtree - */ - for_each_mem_cgroup_tree(iter, memcg) { - if (iter == failed) { - mem_cgroup_iter_break(memcg, iter); - break; + if (failed) { + /* + * OK, we failed to lock the whole subtree so we have + * to clean up what we set up to the failing subtree + */ + for_each_mem_cgroup_tree(iter, memcg) { + if (iter == failed) { + mem_cgroup_iter_break(memcg, iter); + break; + } + iter->oom_lock = false; } - iter->oom_lock = false; } - return false; + + spin_unlock(&memcg_oom_lock); + + return !failed; } -/* - * Has to be called with memcg_oom_lock - */ -static int mem_cgroup_oom_unlock(struct mem_cgroup *memcg) +static void mem_cgroup_oom_unlock(struct mem_cgroup *memcg) { struct mem_cgroup *iter; + spin_lock(&memcg_oom_lock); for_each_mem_cgroup_tree(iter, memcg) iter->oom_lock = false; - return 0; + spin_unlock(&memcg_oom_lock); } static void mem_cgroup_mark_under_oom(struct mem_cgroup *memcg) @@ -2129,7 +1978,6 @@ static void mem_cgroup_unmark_under_oom(struct mem_cgroup *memcg) atomic_add_unless(&iter->under_oom, -1, 0); } -static DEFINE_SPINLOCK(memcg_oom_lock); static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq); struct oom_wait_info { @@ -2159,6 +2007,7 @@ static int memcg_oom_wake_function(wait_queue_t *wait, static void memcg_wakeup_oom(struct mem_cgroup *memcg) { + atomic_inc(&memcg->oom_wakeups); /* for filtering, pass "memcg" as argument. */ __wake_up(&memcg_oom_waitq, TASK_NORMAL, 0, memcg); } @@ -2170,56 +2019,136 @@ static void memcg_oom_recover(struct mem_cgroup *memcg) } /* - * try to call OOM killer. returns false if we should exit memory-reclaim loop. + * try to call OOM killer */ -static bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask, - int order) +static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order) { - struct oom_wait_info owait; - bool locked, need_to_kill; + bool locked; + int wakeups; - owait.memcg = memcg; - owait.wait.flags = 0; - owait.wait.func = memcg_oom_wake_function; - owait.wait.private = current; - INIT_LIST_HEAD(&owait.wait.task_list); - need_to_kill = true; - mem_cgroup_mark_under_oom(memcg); + if (!current->memcg_oom.may_oom) + return; + + current->memcg_oom.in_memcg_oom = 1; - /* At first, try to OOM lock hierarchy under memcg.*/ - spin_lock(&memcg_oom_lock); - locked = mem_cgroup_oom_lock(memcg); /* - * Even if signal_pending(), we can't quit charge() loop without - * accounting. So, UNINTERRUPTIBLE is appropriate. But SIGKILL - * under OOM is always welcomed, use TASK_KILLABLE here. + * As with any blocking lock, a contender needs to start + * listening for wakeups before attempting the trylock, + * otherwise it can miss the wakeup from the unlock and sleep + * indefinitely. This is just open-coded because our locking + * is so particular to memcg hierarchies. */ - prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE); - if (!locked || memcg->oom_kill_disable) - need_to_kill = false; + wakeups = atomic_read(&memcg->oom_wakeups); + mem_cgroup_mark_under_oom(memcg); + + locked = mem_cgroup_oom_trylock(memcg); + if (locked) mem_cgroup_oom_notify(memcg); - spin_unlock(&memcg_oom_lock); - if (need_to_kill) { - finish_wait(&memcg_oom_waitq, &owait.wait); + if (locked && !memcg->oom_kill_disable) { + mem_cgroup_unmark_under_oom(memcg); mem_cgroup_out_of_memory(memcg, mask, order); + mem_cgroup_oom_unlock(memcg); + /* + * There is no guarantee that an OOM-lock contender + * sees the wakeups triggered by the OOM kill + * uncharges. Wake any sleepers explicitely. + */ + memcg_oom_recover(memcg); } else { - schedule(); - finish_wait(&memcg_oom_waitq, &owait.wait); + /* + * A system call can just return -ENOMEM, but if this + * is a page fault and somebody else is handling the + * OOM already, we need to sleep on the OOM waitqueue + * for this memcg until the situation is resolved. + * Which can take some time because it might be + * handled by a userspace task. + * + * However, this is the charge context, which means + * that we may sit on a large call stack and hold + * various filesystem locks, the mmap_sem etc. and we + * don't want the OOM handler to deadlock on them + * while we sit here and wait. Store the current OOM + * context in the task_struct, then return -ENOMEM. + * At the end of the page fault handler, with the + * stack unwound, pagefault_out_of_memory() will check + * back with us by calling + * mem_cgroup_oom_synchronize(), possibly putting the + * task to sleep. + */ + current->memcg_oom.oom_locked = locked; + current->memcg_oom.wakeups = wakeups; + css_get(&memcg->css); + current->memcg_oom.wait_on_memcg = memcg; } - spin_lock(&memcg_oom_lock); - if (locked) - mem_cgroup_oom_unlock(memcg); - memcg_wakeup_oom(memcg); - spin_unlock(&memcg_oom_lock); +} - mem_cgroup_unmark_under_oom(memcg); +/** + * mem_cgroup_oom_synchronize - complete memcg OOM handling + * + * This has to be called at the end of a page fault if the the memcg + * OOM handler was enabled and the fault is returning %VM_FAULT_OOM. + * + * Memcg supports userspace OOM handling, so failed allocations must + * sleep on a waitqueue until the userspace task resolves the + * situation. Sleeping directly in the charge context with all kinds + * of locks held is not a good idea, instead we remember an OOM state + * in the task and mem_cgroup_oom_synchronize() has to be called at + * the end of the page fault to put the task to sleep and clean up the + * OOM state. + * + * Returns %true if an ongoing memcg OOM situation was detected and + * finalized, %false otherwise. + */ +bool mem_cgroup_oom_synchronize(void) +{ + struct oom_wait_info owait; + struct mem_cgroup *memcg; - if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current)) + /* OOM is global, do not handle */ + if (!current->memcg_oom.in_memcg_oom) return false; - /* Give chance to dying process */ - schedule_timeout_uninterruptible(1); + + /* + * We invoked the OOM killer but there is a chance that a kill + * did not free up any charges. Everybody else might already + * be sleeping, so restart the fault and keep the rampage + * going until some charges are released. + */ + memcg = current->memcg_oom.wait_on_memcg; + if (!memcg) + goto out; + + if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current)) + goto out_memcg; + + owait.memcg = memcg; + owait.wait.flags = 0; + owait.wait.func = memcg_oom_wake_function; + owait.wait.private = current; + INIT_LIST_HEAD(&owait.wait.task_list); + + prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE); + /* Only sleep if we didn't miss any wakeups since OOM */ + if (atomic_read(&memcg->oom_wakeups) == current->memcg_oom.wakeups) + schedule(); + finish_wait(&memcg_oom_waitq, &owait.wait); +out_memcg: + mem_cgroup_unmark_under_oom(memcg); + if (current->memcg_oom.oom_locked) { + mem_cgroup_oom_unlock(memcg); + /* + * There is no guarantee that an OOM-lock contender + * sees the wakeups triggered by the OOM kill + * uncharges. Wake any sleepers explicitely. + */ + memcg_oom_recover(memcg); + } + css_put(&memcg->css); + current->memcg_oom.wait_on_memcg = NULL; +out: + current->memcg_oom.in_memcg_oom = 0; return true; } @@ -2288,7 +2217,7 @@ void __mem_cgroup_end_update_page_stat(struct page *page, unsigned long *flags) } void mem_cgroup_update_page_stat(struct page *page, - enum mem_cgroup_page_stat_item idx, int val) + enum mem_cgroup_stat_index idx, int val) { struct mem_cgroup *memcg; struct page_cgroup *pc = lookup_page_cgroup(page); @@ -2297,18 +2226,11 @@ void mem_cgroup_update_page_stat(struct page *page, if (mem_cgroup_disabled()) return; + VM_BUG_ON(!rcu_read_lock_held()); memcg = pc->mem_cgroup; if (unlikely(!memcg || !PageCgroupUsed(pc))) return; - switch (idx) { - case MEMCG_NR_FILE_MAPPED: - idx = MEM_CGROUP_STAT_FILE_MAPPED; - break; - default: - BUG(); - } - this_cpu_add(memcg->stat->count[idx], val); } @@ -2450,7 +2372,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg, bool sync) flush_work(&stock->work); } out: - put_online_cpus(); + put_online_cpus(); } /* @@ -2532,12 +2454,11 @@ enum { CHARGE_RETRY, /* need to retry but retry is not bad */ CHARGE_NOMEM, /* we can't do more. return -ENOMEM */ CHARGE_WOULDBLOCK, /* GFP_WAIT wasn't set and no enough res. */ - CHARGE_OOM_DIE, /* the current is killed because of OOM */ }; static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, unsigned int nr_pages, unsigned int min_pages, - bool oom_check) + bool invoke_oom) { unsigned long csize = nr_pages * PAGE_SIZE; struct mem_cgroup *mem_over_limit; @@ -2594,14 +2515,10 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, if (mem_cgroup_wait_acct_move(mem_over_limit)) return CHARGE_RETRY; - /* If we don't need to call oom-killer at el, return immediately */ - if (!oom_check) - return CHARGE_NOMEM; - /* check OOM */ - if (!mem_cgroup_handle_oom(mem_over_limit, gfp_mask, get_order(csize))) - return CHARGE_OOM_DIE; + if (invoke_oom) + mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(csize)); - return CHARGE_RETRY; + return CHARGE_NOMEM; } /* @@ -2704,7 +2621,7 @@ again: } do { - bool oom_check; + bool invoke_oom = oom && !nr_oom_retries; /* If killed, bypass charge */ if (fatal_signal_pending(current)) { @@ -2712,14 +2629,8 @@ again: goto bypass; } - oom_check = false; - if (oom && !nr_oom_retries) { - oom_check = true; - nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES; - } - - ret = mem_cgroup_do_charge(memcg, gfp_mask, batch, nr_pages, - oom_check); + ret = mem_cgroup_do_charge(memcg, gfp_mask, batch, + nr_pages, invoke_oom); switch (ret) { case CHARGE_OK: break; @@ -2732,16 +2643,12 @@ again: css_put(&memcg->css); goto nomem; case CHARGE_NOMEM: /* OOM routine works */ - if (!oom) { + if (!oom || invoke_oom) { css_put(&memcg->css); goto nomem; } - /* If oom, we never return -ENOMEM */ nr_oom_retries--; break; - case CHARGE_OOM_DIE: /* Killed by OOM Killer */ - css_put(&memcg->css); - goto bypass; } } while (ret != CHARGE_OK); @@ -2882,7 +2789,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, * is accessed after testing USED bit. To make pc->mem_cgroup visible * before USED bit, we need memory barrier here. * See mem_cgroup_add_lru_list(), etc. - */ + */ smp_wmb(); SetPageCgroupUsed(pc); @@ -2905,9 +2812,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, unlock_page_cgroup(pc); /* - * "charge_statistics" updated event counter. Then, check it. - * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. - * if they exceeds softlimit. + * "charge_statistics" updated event counter. */ memcg_check_events(memcg, page); } @@ -3626,9 +3531,9 @@ __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order) * the page allocator. Therefore, the following sequence when backed by * the SLUB allocator: * - * memcg_stop_kmem_account(); - * kmalloc(<large_number>) - * memcg_resume_kmem_account(); + * memcg_stop_kmem_account(); + * kmalloc(<large_number>) + * memcg_resume_kmem_account(); * * would effectively ignore the fact that we should skip accounting, * since it will drive us directly to this function without passing @@ -3750,6 +3655,20 @@ void mem_cgroup_split_huge_fixup(struct page *head) } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +static inline +void mem_cgroup_move_account_page_stat(struct mem_cgroup *from, + struct mem_cgroup *to, + unsigned int nr_pages, + enum mem_cgroup_stat_index idx) +{ + /* Update stat data for mem_cgroup */ + preempt_disable(); + WARN_ON_ONCE(from->stat->count[idx] < nr_pages); + __this_cpu_add(from->stat->count[idx], -nr_pages); + __this_cpu_add(to->stat->count[idx], nr_pages); + preempt_enable(); +} + /** * mem_cgroup_move_account - move account of the page * @page: the page @@ -3795,13 +3714,14 @@ static int mem_cgroup_move_account(struct page *page, move_lock_mem_cgroup(from, &flags); - if (!anon && page_mapped(page)) { - /* Update mapped_file data for mem_cgroup */ - preempt_disable(); - __this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); - __this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); - preempt_enable(); - } + if (!anon && page_mapped(page)) + mem_cgroup_move_account_page_stat(from, to, nr_pages, + MEM_CGROUP_STAT_FILE_MAPPED); + + if (PageWriteback(page)) + mem_cgroup_move_account_page_stat(from, to, nr_pages, + MEM_CGROUP_STAT_WRITEBACK); + mem_cgroup_charge_statistics(from, page, anon, -nr_pages); /* caller should have done css_get */ @@ -4657,7 +4577,7 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, MEM_CGROUP_RECLAIM_SHRINK); curusage = res_counter_read_u64(&memcg->res, RES_USAGE); /* Usage is reduced ? */ - if (curusage >= oldusage) + if (curusage >= oldusage) retry_count--; else oldusage = curusage; @@ -4678,7 +4598,7 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, int enlarge = 0; /* see mem_cgroup_resize_res_limit */ - retry_count = children * MEM_CGROUP_RECLAIM_RETRIES; + retry_count = children * MEM_CGROUP_RECLAIM_RETRIES; oldusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); while (retry_count) { if (signal_pending(current)) { @@ -4727,98 +4647,6 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, return ret; } -unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, - gfp_t gfp_mask, - unsigned long *total_scanned) -{ - unsigned long nr_reclaimed = 0; - struct mem_cgroup_per_zone *mz, *next_mz = NULL; - unsigned long reclaimed; - int loop = 0; - struct mem_cgroup_tree_per_zone *mctz; - unsigned long long excess; - unsigned long nr_scanned; - - if (order > 0) - return 0; - - mctz = soft_limit_tree_node_zone(zone_to_nid(zone), zone_idx(zone)); - /* - * This loop can run a while, specially if mem_cgroup's continuously - * keep exceeding their soft limit and putting the system under - * pressure - */ - do { - if (next_mz) - mz = next_mz; - else - mz = mem_cgroup_largest_soft_limit_node(mctz); - if (!mz) - break; - - nr_scanned = 0; - reclaimed = mem_cgroup_soft_reclaim(mz->memcg, zone, - gfp_mask, &nr_scanned); - nr_reclaimed += reclaimed; - *total_scanned += nr_scanned; - spin_lock(&mctz->lock); - - /* - * If we failed to reclaim anything from this memory cgroup - * it is time to move on to the next cgroup - */ - next_mz = NULL; - if (!reclaimed) { - do { - /* - * Loop until we find yet another one. - * - * By the time we get the soft_limit lock - * again, someone might have aded the - * group back on the RB tree. Iterate to - * make sure we get a different mem. - * mem_cgroup_largest_soft_limit_node returns - * NULL if no other cgroup is present on - * the tree - */ - next_mz = - __mem_cgroup_largest_soft_limit_node(mctz); - if (next_mz == mz) - css_put(&next_mz->memcg->css); - else /* next_mz == NULL or other memcg */ - break; - } while (1); - } - __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz); - excess = res_counter_soft_limit_excess(&mz->memcg->res); - /* - * One school of thought says that we should not add - * back the node to the tree if reclaim returns 0. - * But our reclaim could return 0, simply because due - * to priority we are exposing a smaller subset of - * memory to reclaim from. Consider this as a longer - * term TODO. - */ - /* If excess == 0, no tree ops */ - __mem_cgroup_insert_exceeded(mz->memcg, mz, mctz, excess); - spin_unlock(&mctz->lock); - css_put(&mz->memcg->css); - loop++; - /* - * Could not reclaim anything and there are no more - * mem cgroups to try or we seem to be looping without - * reclaiming anything. - */ - if (!nr_reclaimed && - (next_mz == NULL || - loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS)) - break; - } while (!nr_reclaimed); - if (next_mz) - css_put(&next_mz->memcg->css); - return nr_reclaimed; -} - /** * mem_cgroup_force_empty_list - clears LRU of a group * @memcg: group to clear @@ -4990,18 +4818,12 @@ static int mem_cgroup_force_empty_write(struct cgroup_subsys_state *css, unsigned int event) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); - int ret; if (mem_cgroup_is_root(memcg)) return -EINVAL; - css_get(&memcg->css); - ret = mem_cgroup_force_empty(memcg); - css_put(&memcg->css); - - return ret; + return mem_cgroup_force_empty(memcg); } - static u64 mem_cgroup_hierarchy_read(struct cgroup_subsys_state *css, struct cftype *cft) { @@ -5139,7 +4961,7 @@ static int memcg_update_kmem_limit(struct cgroup_subsys_state *css, u64 val) */ mutex_lock(&memcg_create_mutex); mutex_lock(&set_limit_mutex); - if (!memcg->kmem_account_flags && val != RESOURCE_MAX) { + if (!memcg->kmem_account_flags && val != RES_COUNTER_MAX) { if (cgroup_task_count(css->cgroup) || memcg_has_children(memcg)) { ret = -EBUSY; goto out; @@ -5149,7 +4971,7 @@ static int memcg_update_kmem_limit(struct cgroup_subsys_state *css, u64 val) ret = memcg_update_cache_sizes(memcg); if (ret) { - res_counter_set_limit(&memcg->kmem, RESOURCE_MAX); + res_counter_set_limit(&memcg->kmem, RES_COUNTER_MAX); goto out; } static_key_slow_inc(&memcg_kmem_enabled_key); @@ -6089,8 +5911,6 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) for (zone = 0; zone < MAX_NR_ZONES; zone++) { mz = &pn->zoneinfo[zone]; lruvec_init(&mz->lruvec); - mz->usage_in_excess = 0; - mz->on_tree = false; mz->memcg = memcg; } memcg->nodeinfo[node] = pn; @@ -6146,7 +5966,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) int node; size_t size = memcg_size(); - mem_cgroup_remove_from_trees(memcg); free_css_id(&mem_cgroup_subsys, &memcg->css); for_each_node(node) @@ -6183,29 +6002,6 @@ struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) } EXPORT_SYMBOL(parent_mem_cgroup); -static void __init mem_cgroup_soft_limit_tree_init(void) -{ - struct mem_cgroup_tree_per_node *rtpn; - struct mem_cgroup_tree_per_zone *rtpz; - int tmp, node, zone; - - for_each_node(node) { - tmp = node; - if (!node_state(node, N_NORMAL_MEMORY)) - tmp = -1; - rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, tmp); - BUG_ON(!rtpn); - - soft_limit_tree.rb_tree_per_node[node] = rtpn; - - for (zone = 0; zone < MAX_NR_ZONES; zone++) { - rtpz = &rtpn->rb_tree_per_zone[zone]; - rtpz->rb_root = RB_ROOT; - spin_lock_init(&rtpz->lock); - } - } -} - static struct cgroup_subsys_state * __ref mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) { @@ -6235,6 +6031,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) mutex_init(&memcg->thresholds_lock); spin_lock_init(&memcg->move_lock); vmpressure_init(&memcg->vmpressure); + spin_lock_init(&memcg->soft_lock); return &memcg->css; @@ -6312,6 +6109,13 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) mem_cgroup_invalidate_reclaim_iterators(memcg); mem_cgroup_reparent_charges(memcg); + if (memcg->soft_contributed) { + while ((memcg = parent_mem_cgroup(memcg))) + atomic_dec(&memcg->children_in_excess); + + if (memcg != root_mem_cgroup && !root_mem_cgroup->use_hierarchy) + atomic_dec(&root_mem_cgroup->children_in_excess); + } mem_cgroup_destroy_all_caches(memcg); vmpressure_cleanup(&memcg->vmpressure); } @@ -6986,7 +6790,6 @@ static int __init mem_cgroup_init(void) { hotcpu_notifier(memcg_cpu_hotplug_callback, 0); enable_swap_cgroup(); - mem_cgroup_soft_limit_tree_init(); memcg_stock_init(); return 0; } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index d472e14c6808..947ed5413279 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -248,10 +248,12 @@ void shake_page(struct page *p, int access) */ if (access) { int nr; + int nid = page_to_nid(p); do { struct shrink_control shrink = { .gfp_mask = GFP_KERNEL, }; + node_set(nid, shrink.nodes_to_scan); nr = shrink_slab(&shrink, 1000, 1000); if (page_count(p) == 1) diff --git a/mm/memory.c b/mm/memory.c index 2b73dbde2274..ca0003947115 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3695,7 +3695,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, * but allow concurrent faults), and pte mapped but not yet locked. * We return with mmap_sem still held, but pte unmapped and unlocked. */ -int handle_pte_fault(struct mm_struct *mm, +static int handle_pte_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *pte, pmd_t *pmd, unsigned int flags) { @@ -3754,22 +3754,14 @@ unlock: /* * By the time we get here, we already hold the mm semaphore */ -int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, unsigned int flags) +static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address, unsigned int flags) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; - __set_current_state(TASK_RUNNING); - - count_vm_event(PGFAULT); - mem_cgroup_count_vm_event(mm, PGFAULT); - - /* do counter updates before entering really critical section. */ - check_sync_rss_stat(current); - if (unlikely(is_vm_hugetlb_page(vma))) return hugetlb_fault(mm, vma, address, flags); @@ -3782,9 +3774,12 @@ retry: if (!pmd) return VM_FAULT_OOM; if (pmd_none(*pmd) && transparent_hugepage_enabled(vma)) { + int ret = VM_FAULT_FALLBACK; if (!vma->vm_ops) - return do_huge_pmd_anonymous_page(mm, vma, address, - pmd, flags); + ret = do_huge_pmd_anonymous_page(mm, vma, address, + pmd, flags); + if (!(ret & VM_FAULT_FALLBACK)) + return ret; } else { pmd_t orig_pmd = *pmd; int ret; @@ -3850,6 +3845,37 @@ retry: return handle_pte_fault(mm, vma, address, pte, pmd, flags); } +int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address, unsigned int flags) +{ + int ret; + + __set_current_state(TASK_RUNNING); + + count_vm_event(PGFAULT); + mem_cgroup_count_vm_event(mm, PGFAULT); + + /* do counter updates before entering really critical section. */ + check_sync_rss_stat(current); + + /* + * Enable the memcg OOM handling for faults triggered in user + * space. Kernel faults are handled more gracefully. + */ + if (flags & FAULT_FLAG_USER) + mem_cgroup_enable_oom(); + + ret = __handle_mm_fault(mm, vma, address, flags); + + if (flags & FAULT_FLAG_USER) + mem_cgroup_disable_oom(); + + if (WARN_ON(task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM))) + mem_cgroup_oom_synchronize(); + + return ret; +} + #ifndef __PAGETABLE_PUD_FOLDED /* * Allocate page upper directory. diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 0eb1a1df649d..ed85fe3870e2 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -52,14 +52,10 @@ DEFINE_MUTEX(mem_hotplug_mutex); void lock_memory_hotplug(void) { mutex_lock(&mem_hotplug_mutex); - - /* for exclusive hibernation if CONFIG_HIBERNATION=y */ - lock_system_sleep(); } void unlock_memory_hotplug(void) { - unlock_system_sleep(); mutex_unlock(&mem_hotplug_mutex); } diff --git a/mm/migrate.c b/mm/migrate.c index b7ded7eafe3a..9c8d5f59d30b 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -311,7 +311,7 @@ static inline bool buffer_migrate_lock_buffers(struct buffer_head *head, * 2 for pages with a mapping * 3 for pages with a mapping and PagePrivate/PagePrivate2 set. */ -static int migrate_page_move_mapping(struct address_space *mapping, +int migrate_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page, struct buffer_head *head, enum migrate_mode mode) { diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 98e75f2ac7bc..314e9d274381 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -678,9 +678,12 @@ out: */ void pagefault_out_of_memory(void) { - struct zonelist *zonelist = node_zonelist(first_online_node, - GFP_KERNEL); + struct zonelist *zonelist; + if (mem_cgroup_oom_synchronize()) + return; + + zonelist = node_zonelist(first_online_node, GFP_KERNEL); if (try_set_zonelist_oom(zonelist, GFP_KERNEL)) { out_of_memory(NULL, 0, 0, NULL, false); clear_zonelist_oom(zonelist, GFP_KERNEL); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 6c7b0187be8e..f5236f804aa6 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2143,11 +2143,17 @@ EXPORT_SYMBOL(account_page_dirtied); /* * Helper function for set_page_writeback family. + * + * The caller must hold mem_cgroup_begin/end_update_page_stat() lock + * while calling this function. + * See test_set_page_writeback for example. + * * NOTE: Unlike account_page_dirtied this does not rely on being atomic * wrt interrupts. */ void account_page_writeback(struct page *page) { + mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK); inc_zone_page_state(page, NR_WRITEBACK); } EXPORT_SYMBOL(account_page_writeback); @@ -2364,7 +2370,10 @@ int test_clear_page_writeback(struct page *page) { struct address_space *mapping = page_mapping(page); int ret; + bool locked; + unsigned long memcg_flags; + mem_cgroup_begin_update_page_stat(page, &locked, &memcg_flags); if (mapping) { struct backing_dev_info *bdi = mapping->backing_dev_info; unsigned long flags; @@ -2385,9 +2394,11 @@ int test_clear_page_writeback(struct page *page) ret = TestClearPageWriteback(page); } if (ret) { + mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK); dec_zone_page_state(page, NR_WRITEBACK); inc_zone_page_state(page, NR_WRITTEN); } + mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags); return ret; } @@ -2395,7 +2406,10 @@ int test_set_page_writeback(struct page *page) { struct address_space *mapping = page_mapping(page); int ret; + bool locked; + unsigned long memcg_flags; + mem_cgroup_begin_update_page_stat(page, &locked, &memcg_flags); if (mapping) { struct backing_dev_info *bdi = mapping->backing_dev_info; unsigned long flags; @@ -2422,6 +2436,7 @@ int test_set_page_writeback(struct page *page) } if (!ret) account_page_writeback(page); + mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags); return ret; } diff --git a/mm/page_io.c b/mm/page_io.c index ba05b64e5d8d..8c79a4764be0 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -266,7 +266,6 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, init_sync_kiocb(&kiocb, swap_file); kiocb.ki_pos = page_file_offset(page); - kiocb.ki_left = PAGE_SIZE; kiocb.ki_nbytes = PAGE_SIZE; set_page_writeback(page); diff --git a/mm/rmap.c b/mm/rmap.c index 07748e68b729..fd3ee7a54a13 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1052,11 +1052,11 @@ void do_page_add_anon_rmap(struct page *page, { int first = atomic_inc_and_test(&page->_mapcount); if (first) { - if (!PageTransHuge(page)) - __inc_zone_page_state(page, NR_ANON_PAGES); - else + if (PageTransHuge(page)) __inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); + __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, + hpage_nr_pages(page)); } if (unlikely(PageKsm(page))) return; @@ -1085,10 +1085,10 @@ void page_add_new_anon_rmap(struct page *page, VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); SetPageSwapBacked(page); atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */ - if (!PageTransHuge(page)) - __inc_zone_page_state(page, NR_ANON_PAGES); - else + if (PageTransHuge(page)) __inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); + __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, + hpage_nr_pages(page)); __page_set_anon_rmap(page, vma, address, 1); if (!mlocked_vma_newpage(vma, page)) { SetPageActive(page); @@ -1111,7 +1111,7 @@ void page_add_file_rmap(struct page *page) mem_cgroup_begin_update_page_stat(page, &locked, &flags); if (atomic_inc_and_test(&page->_mapcount)) { __inc_zone_page_state(page, NR_FILE_MAPPED); - mem_cgroup_inc_page_stat(page, MEMCG_NR_FILE_MAPPED); + mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED); } mem_cgroup_end_update_page_stat(page, &locked, &flags); } @@ -1148,14 +1148,14 @@ void page_remove_rmap(struct page *page) goto out; if (anon) { mem_cgroup_uncharge_page(page); - if (!PageTransHuge(page)) - __dec_zone_page_state(page, NR_ANON_PAGES); - else + if (PageTransHuge(page)) __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); + __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, + -hpage_nr_pages(page)); } else { __dec_zone_page_state(page, NR_FILE_MAPPED); - mem_cgroup_dec_page_stat(page, MEMCG_NR_FILE_MAPPED); + mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED); mem_cgroup_end_update_page_stat(page, &locked, &flags); } if (unlikely(PageMlocked(page))) diff --git a/mm/slab_common.c b/mm/slab_common.c index 538bade6df7d..a3443278ce3a 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -19,6 +19,7 @@ #include <asm/tlbflush.h> #include <asm/page.h> #include <linux/memcontrol.h> +#include <trace/events/kmem.h> #include "slab.h" @@ -373,7 +374,7 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags) { int index; - if (size > KMALLOC_MAX_SIZE) { + if (unlikely(size > KMALLOC_MAX_SIZE)) { WARN_ON_ONCE(!(flags & __GFP_NOWARN)); return NULL; } @@ -495,6 +496,15 @@ void __init create_kmalloc_caches(unsigned long flags) } #endif /* !CONFIG_SLOB */ +#ifdef CONFIG_TRACING +void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) +{ + void *ret = kmalloc_order(size, flags, order); + trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags); + return ret; +} +EXPORT_SYMBOL(kmalloc_order_trace); +#endif #ifdef CONFIG_SLABINFO diff --git a/mm/slob.c b/mm/slob.c index 91bd3f2dd2f0..4bf8809dfcce 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -462,11 +462,11 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller) return ret; } -void *__kmalloc_node(size_t size, gfp_t gfp, int node) +void *__kmalloc(size_t size, gfp_t gfp) { - return __do_kmalloc_node(size, gfp, node, _RET_IP_); + return __do_kmalloc_node(size, gfp, NUMA_NO_NODE, _RET_IP_); } -EXPORT_SYMBOL(__kmalloc_node); +EXPORT_SYMBOL(__kmalloc); #ifdef CONFIG_TRACING void *__kmalloc_track_caller(size_t size, gfp_t gfp, unsigned long caller) @@ -534,7 +534,7 @@ int __kmem_cache_create(struct kmem_cache *c, unsigned long flags) return 0; } -void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node) +void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node) { void *b; @@ -560,7 +560,27 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node) kmemleak_alloc_recursive(b, c->size, 1, c->flags, flags); return b; } +EXPORT_SYMBOL(slob_alloc_node); + +void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) +{ + return slob_alloc_node(cachep, flags, NUMA_NO_NODE); +} +EXPORT_SYMBOL(kmem_cache_alloc); + +#ifdef CONFIG_NUMA +void *__kmalloc_node(size_t size, gfp_t gfp, int node) +{ + return __do_kmalloc_node(size, gfp, node, _RET_IP_); +} +EXPORT_SYMBOL(__kmalloc_node); + +void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t gfp, int node) +{ + return slob_alloc_node(cachep, gfp, node); +} EXPORT_SYMBOL(kmem_cache_alloc_node); +#endif static void __kmem_cache_free(void *b, int size) { diff --git a/mm/slub.c b/mm/slub.c index 51df8272cfaf..c3eb3d3ca835 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -373,7 +373,8 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page #endif { slab_lock(page); - if (page->freelist == freelist_old && page->counters == counters_old) { + if (page->freelist == freelist_old && + page->counters == counters_old) { page->freelist = freelist_new; page->counters = counters_new; slab_unlock(page); @@ -411,7 +412,8 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, local_irq_save(flags); slab_lock(page); - if (page->freelist == freelist_old && page->counters == counters_old) { + if (page->freelist == freelist_old && + page->counters == counters_old) { page->freelist = freelist_new; page->counters = counters_new; slab_unlock(page); @@ -553,8 +555,9 @@ static void print_tracking(struct kmem_cache *s, void *object) static void print_page_info(struct page *page) { - printk(KERN_ERR "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n", - page, page->objects, page->inuse, page->freelist, page->flags); + printk(KERN_ERR + "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n", + page, page->objects, page->inuse, page->freelist, page->flags); } @@ -629,7 +632,8 @@ static void object_err(struct kmem_cache *s, struct page *page, print_trailer(s, page, object); } -static void slab_err(struct kmem_cache *s, struct page *page, const char *fmt, ...) +static void slab_err(struct kmem_cache *s, struct page *page, + const char *fmt, ...) { va_list args; char buf[100]; @@ -788,7 +792,8 @@ static int check_object(struct kmem_cache *s, struct page *page, } else { if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) { check_bytes_and_report(s, page, p, "Alignment padding", - endobject, POISON_INUSE, s->inuse - s->object_size); + endobject, POISON_INUSE, + s->inuse - s->object_size); } } @@ -873,7 +878,6 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search) object_err(s, page, object, "Freechain corrupt"); set_freepointer(s, object, NULL); - break; } else { slab_err(s, page, "Freepointer corrupt"); page->freelist = NULL; @@ -918,7 +922,8 @@ static void trace(struct kmem_cache *s, struct page *page, void *object, page->freelist); if (!alloc) - print_section("Object ", (void *)object, s->object_size); + print_section("Object ", (void *)object, + s->object_size); dump_stack(); } @@ -937,7 +942,8 @@ static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) return should_failslab(s->object_size, flags, s->flags); } -static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object) +static inline void slab_post_alloc_hook(struct kmem_cache *s, + gfp_t flags, void *object) { flags &= gfp_allowed_mask; kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); @@ -1039,7 +1045,8 @@ static void setup_object_debug(struct kmem_cache *s, struct page *page, init_tracking(s, object); } -static noinline int alloc_debug_processing(struct kmem_cache *s, struct page *page, +static noinline int alloc_debug_processing(struct kmem_cache *s, + struct page *page, void *object, unsigned long addr) { if (!check_slab(s, page)) @@ -1743,7 +1750,8 @@ static void init_kmem_cache_cpus(struct kmem_cache *s) /* * Remove the cpu slab */ -static void deactivate_slab(struct kmem_cache *s, struct page *page, void *freelist) +static void deactivate_slab(struct kmem_cache *s, struct page *page, + void *freelist) { enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE }; struct kmem_cache_node *n = get_node(s, page_to_nid(page)); @@ -1999,7 +2007,8 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) page->pobjects = pobjects; page->next = oldpage; - } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage); + } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) + != oldpage); #endif } @@ -2169,8 +2178,8 @@ static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags) } /* - * Check the page->freelist of a page and either transfer the freelist to the per cpu freelist - * or deactivate the page. + * Check the page->freelist of a page and either transfer the freelist to the + * per cpu freelist or deactivate the page. * * The page is still frozen if the return value is not NULL. * @@ -2314,7 +2323,8 @@ new_slab: goto load_freelist; /* Only entered in the debug case */ - if (kmem_cache_debug(s) && !alloc_debug_processing(s, page, freelist, addr)) + if (kmem_cache_debug(s) && + !alloc_debug_processing(s, page, freelist, addr)) goto new_slab; /* Slab failed checks. Next slab needed */ deactivate_slab(s, page, get_freepointer(s, freelist)); @@ -2372,7 +2382,7 @@ redo: object = c->freelist; page = c->page; - if (unlikely(!object || !page || !node_match(page, node))) + if (unlikely(!object || !node_match(page, node))) object = __slab_alloc(s, gfpflags, node, addr, c); else { @@ -2382,13 +2392,15 @@ redo: * The cmpxchg will only match if there was no additional * operation and if we are on the right processor. * - * The cmpxchg does the following atomically (without lock semantics!) + * The cmpxchg does the following atomically (without lock + * semantics!) * 1. Relocate first pointer to the current per cpu area. * 2. Verify that tid and freelist have not been changed * 3. If they were not changed replace tid and freelist * - * Since this is without lock semantics the protection is only against - * code executing on this cpu *not* from access by other cpus. + * Since this is without lock semantics the protection is only + * against code executing on this cpu *not* from access by + * other cpus. */ if (unlikely(!this_cpu_cmpxchg_double( s->cpu_slab->freelist, s->cpu_slab->tid, @@ -2420,7 +2432,8 @@ void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) { void *ret = slab_alloc(s, gfpflags, _RET_IP_); - trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size, s->size, gfpflags); + trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size, + s->size, gfpflags); return ret; } @@ -2434,14 +2447,6 @@ void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) return ret; } EXPORT_SYMBOL(kmem_cache_alloc_trace); - -void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) -{ - void *ret = kmalloc_order(size, flags, order); - trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags); - return ret; -} -EXPORT_SYMBOL(kmalloc_order_trace); #endif #ifdef CONFIG_NUMA @@ -2512,8 +2517,10 @@ static void __slab_free(struct kmem_cache *s, struct page *page, if (kmem_cache_has_cpu_partial(s) && !prior) /* - * Slab was on no list before and will be partially empty - * We can defer the list move and instead freeze it. + * Slab was on no list before and will be + * partially empty + * We can defer the list move and instead + * freeze it. */ new.frozen = 1; @@ -3071,8 +3078,8 @@ static int kmem_cache_open(struct kmem_cache *s, unsigned long flags) * A) The number of objects from per cpu partial slabs dumped to the * per node list when we reach the limit. * B) The number of objects in cpu partial slabs to extract from the - * per node list when we run out of per cpu objects. We only fetch 50% - * to keep some capacity around for frees. + * per node list when we run out of per cpu objects. We only fetch + * 50% to keep some capacity around for frees. */ if (!kmem_cache_has_cpu_partial(s)) s->cpu_partial = 0; @@ -3099,8 +3106,8 @@ error: if (flags & SLAB_PANIC) panic("Cannot create slab %s size=%lu realsize=%u " "order=%u offset=%u flags=%lx\n", - s->name, (unsigned long)s->size, s->size, oo_order(s->oo), - s->offset, flags); + s->name, (unsigned long)s->size, s->size, + oo_order(s->oo), s->offset, flags); return -EINVAL; } @@ -3316,42 +3323,6 @@ size_t ksize(const void *object) } EXPORT_SYMBOL(ksize); -#ifdef CONFIG_SLUB_DEBUG -bool verify_mem_not_deleted(const void *x) -{ - struct page *page; - void *object = (void *)x; - unsigned long flags; - bool rv; - - if (unlikely(ZERO_OR_NULL_PTR(x))) - return false; - - local_irq_save(flags); - - page = virt_to_head_page(x); - if (unlikely(!PageSlab(page))) { - /* maybe it was from stack? */ - rv = true; - goto out_unlock; - } - - slab_lock(page); - if (on_freelist(page->slab_cache, page, object)) { - object_err(page->slab_cache, page, object, "Object is on free-list"); - rv = false; - } else { - rv = true; - } - slab_unlock(page); - -out_unlock: - local_irq_restore(flags); - return rv; -} -EXPORT_SYMBOL(verify_mem_not_deleted); -#endif - void kfree(const void *x) { struct page *page; @@ -4162,15 +4133,17 @@ static int list_locations(struct kmem_cache *s, char *buf, !cpumask_empty(to_cpumask(l->cpus)) && len < PAGE_SIZE - 60) { len += sprintf(buf + len, " cpus="); - len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50, + len += cpulist_scnprintf(buf + len, + PAGE_SIZE - len - 50, to_cpumask(l->cpus)); } if (nr_online_nodes > 1 && !nodes_empty(l->nodes) && len < PAGE_SIZE - 60) { len += sprintf(buf + len, " nodes="); - len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50, - l->nodes); + len += nodelist_scnprintf(buf + len, + PAGE_SIZE - len - 50, + l->nodes); } len += sprintf(buf + len, "\n"); @@ -4268,18 +4241,17 @@ static ssize_t show_slab_objects(struct kmem_cache *s, int node; int x; unsigned long *nodes; - unsigned long *per_cpu; - nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL); + nodes = kzalloc(sizeof(unsigned long) * nr_node_ids, GFP_KERNEL); if (!nodes) return -ENOMEM; - per_cpu = nodes + nr_node_ids; if (flags & SO_CPU) { int cpu; for_each_possible_cpu(cpu) { - struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); + struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, + cpu); int node; struct page *page; @@ -4304,8 +4276,6 @@ static ssize_t show_slab_objects(struct kmem_cache *s, total += x; nodes[node] += x; } - - per_cpu[node]++; } } @@ -4315,12 +4285,11 @@ static ssize_t show_slab_objects(struct kmem_cache *s, for_each_node_state(node, N_NORMAL_MEMORY) { struct kmem_cache_node *n = get_node(s, node); - if (flags & SO_TOTAL) - x = atomic_long_read(&n->total_objects); - else if (flags & SO_OBJECTS) - x = atomic_long_read(&n->total_objects) - - count_partial(n, count_free); - + if (flags & SO_TOTAL) + x = atomic_long_read(&n->total_objects); + else if (flags & SO_OBJECTS) + x = atomic_long_read(&n->total_objects) - + count_partial(n, count_free); else x = atomic_long_read(&n->nr_slabs); total += x; @@ -5136,7 +5105,8 @@ static char *create_unique_id(struct kmem_cache *s) #ifdef CONFIG_MEMCG_KMEM if (!is_root_cache(s)) - p += sprintf(p, "-%08d", memcg_cache_id(s->memcg_params->memcg)); + p += sprintf(p, "-%08d", + memcg_cache_id(s->memcg_params->memcg)); #endif BUG_ON(p > name + ID_STR_LENGTH - 1); diff --git a/mm/swap.c b/mm/swap.c index c899502d3e36..759c3caf44bd 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -432,6 +432,11 @@ static void activate_page_drain(int cpu) pagevec_lru_move_fn(pvec, __activate_page, NULL); } +static bool need_activate_page_drain(int cpu) +{ + return pagevec_count(&per_cpu(activate_page_pvecs, cpu)) != 0; +} + void activate_page(struct page *page) { if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { @@ -449,6 +454,11 @@ static inline void activate_page_drain(int cpu) { } +static bool need_activate_page_drain(int cpu) +{ + return false; +} + void activate_page(struct page *page) { struct zone *zone = page_zone(page); @@ -701,12 +711,36 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy) lru_add_drain(); } -/* - * Returns 0 for success - */ -int lru_add_drain_all(void) +static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); + +void lru_add_drain_all(void) { - return schedule_on_each_cpu(lru_add_drain_per_cpu); + static DEFINE_MUTEX(lock); + static struct cpumask has_work; + int cpu; + + mutex_lock(&lock); + get_online_cpus(); + cpumask_clear(&has_work); + + for_each_online_cpu(cpu) { + struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); + + if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) || + pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) || + pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) || + need_activate_page_drain(cpu)) { + INIT_WORK(work, lru_add_drain_per_cpu); + schedule_work_on(cpu, work); + cpumask_set_cpu(cpu, &has_work); + } + } + + for_each_cpu(cpu, &has_work) + flush_work(&per_cpu(lru_add_drain_work, cpu)); + + put_online_cpus(); + mutex_unlock(&lock); } /* diff --git a/mm/truncate.c b/mm/truncate.c index e2e8a8a7eb9d..353b683afd6e 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -567,7 +567,6 @@ EXPORT_SYMBOL_GPL(invalidate_inode_pages2); /** * truncate_pagecache - unmap and remove pagecache that has been truncated * @inode: inode - * @oldsize: old file size * @newsize: new file size * * inode's new i_size must already be written before truncate_pagecache @@ -580,7 +579,7 @@ EXPORT_SYMBOL_GPL(invalidate_inode_pages2); * situations such as writepage being called for a page that has already * had its underlying blocks deallocated. */ -void truncate_pagecache(struct inode *inode, loff_t oldsize, loff_t newsize) +void truncate_pagecache(struct inode *inode, loff_t newsize) { struct address_space *mapping = inode->i_mapping; loff_t holebegin = round_up(newsize, PAGE_SIZE); @@ -614,12 +613,8 @@ EXPORT_SYMBOL(truncate_pagecache); */ void truncate_setsize(struct inode *inode, loff_t newsize) { - loff_t oldsize; - - oldsize = inode->i_size; i_size_write(inode, newsize); - - truncate_pagecache(inode, oldsize, newsize); + truncate_pagecache(inode, newsize); } EXPORT_SYMBOL(truncate_setsize); diff --git a/mm/vmscan.c b/mm/vmscan.c index fe715daeb8bc..8ed1b775bdc9 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -139,11 +139,23 @@ static bool global_reclaim(struct scan_control *sc) { return !sc->target_mem_cgroup; } + +static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc) +{ + struct mem_cgroup *root = sc->target_mem_cgroup; + return !mem_cgroup_disabled() && + mem_cgroup_soft_reclaim_eligible(root, root) != SKIP_TREE; +} #else static bool global_reclaim(struct scan_control *sc) { return true; } + +static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc) +{ + return false; +} #endif unsigned long zone_reclaimable_pages(struct zone *zone) @@ -174,14 +186,31 @@ static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru) } /* - * Add a shrinker callback to be called from the vm + * Add a shrinker callback to be called from the vm. */ -void register_shrinker(struct shrinker *shrinker) +int register_shrinker(struct shrinker *shrinker) { - atomic_long_set(&shrinker->nr_in_batch, 0); + size_t size = sizeof(*shrinker->nr_deferred); + + /* + * If we only have one possible node in the system anyway, save + * ourselves the trouble and disable NUMA aware behavior. This way we + * will save memory and some small loop time later. + */ + if (nr_node_ids == 1) + shrinker->flags &= ~SHRINKER_NUMA_AWARE; + + if (shrinker->flags & SHRINKER_NUMA_AWARE) + size *= nr_node_ids; + + shrinker->nr_deferred = kzalloc(size, GFP_KERNEL); + if (!shrinker->nr_deferred) + return -ENOMEM; + down_write(&shrinker_rwsem); list_add_tail(&shrinker->list, &shrinker_list); up_write(&shrinker_rwsem); + return 0; } EXPORT_SYMBOL(register_shrinker); @@ -196,15 +225,102 @@ void unregister_shrinker(struct shrinker *shrinker) } EXPORT_SYMBOL(unregister_shrinker); -static inline int do_shrinker_shrink(struct shrinker *shrinker, - struct shrink_control *sc, - unsigned long nr_to_scan) -{ - sc->nr_to_scan = nr_to_scan; - return (*shrinker->shrink)(shrinker, sc); +#define SHRINK_BATCH 128 + +static unsigned long +shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker, + unsigned long nr_pages_scanned, unsigned long lru_pages) +{ + unsigned long freed = 0; + unsigned long long delta; + long total_scan; + long max_pass; + long nr; + long new_nr; + int nid = shrinkctl->nid; + long batch_size = shrinker->batch ? shrinker->batch + : SHRINK_BATCH; + + max_pass = shrinker->count_objects(shrinker, shrinkctl); + if (max_pass == 0) + return 0; + + /* + * copy the current shrinker scan count into a local variable + * and zero it so that other concurrent shrinker invocations + * don't also do this scanning work. + */ + nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0); + + total_scan = nr; + delta = (4 * nr_pages_scanned) / shrinker->seeks; + delta *= max_pass; + do_div(delta, lru_pages + 1); + total_scan += delta; + if (total_scan < 0) { + printk(KERN_ERR + "shrink_slab: %pF negative objects to delete nr=%ld\n", + shrinker->scan_objects, total_scan); + total_scan = max_pass; + } + + /* + * We need to avoid excessive windup on filesystem shrinkers + * due to large numbers of GFP_NOFS allocations causing the + * shrinkers to return -1 all the time. This results in a large + * nr being built up so when a shrink that can do some work + * comes along it empties the entire cache due to nr >>> + * max_pass. This is bad for sustaining a working set in + * memory. + * + * Hence only allow the shrinker to scan the entire cache when + * a large delta change is calculated directly. + */ + if (delta < max_pass / 4) + total_scan = min(total_scan, max_pass / 2); + + /* + * Avoid risking looping forever due to too large nr value: + * never try to free more than twice the estimate number of + * freeable entries. + */ + if (total_scan > max_pass * 2) + total_scan = max_pass * 2; + + trace_mm_shrink_slab_start(shrinker, shrinkctl, nr, + nr_pages_scanned, lru_pages, + max_pass, delta, total_scan); + + while (total_scan >= batch_size) { + unsigned long ret; + + shrinkctl->nr_to_scan = batch_size; + ret = shrinker->scan_objects(shrinker, shrinkctl); + if (ret == SHRINK_STOP) + break; + freed += ret; + + count_vm_events(SLABS_SCANNED, batch_size); + total_scan -= batch_size; + + cond_resched(); + } + + /* + * move the unused scan count back into the shrinker in a + * manner that handles concurrent updates. If we exhausted the + * scan, there is no need to do an update. + */ + if (total_scan > 0) + new_nr = atomic_long_add_return(total_scan, + &shrinker->nr_deferred[nid]); + else + new_nr = atomic_long_read(&shrinker->nr_deferred[nid]); + + trace_mm_shrink_slab_end(shrinker, freed, nr, new_nr); + return freed; } -#define SHRINK_BATCH 128 /* * Call the shrink functions to age shrinkable caches * @@ -224,115 +340,45 @@ static inline int do_shrinker_shrink(struct shrinker *shrinker, * * Returns the number of slab objects which we shrunk. */ -unsigned long shrink_slab(struct shrink_control *shrink, +unsigned long shrink_slab(struct shrink_control *shrinkctl, unsigned long nr_pages_scanned, unsigned long lru_pages) { struct shrinker *shrinker; - unsigned long ret = 0; + unsigned long freed = 0; if (nr_pages_scanned == 0) nr_pages_scanned = SWAP_CLUSTER_MAX; if (!down_read_trylock(&shrinker_rwsem)) { - /* Assume we'll be able to shrink next time */ - ret = 1; + /* + * If we would return 0, our callers would understand that we + * have nothing else to shrink and give up trying. By returning + * 1 we keep it going and assume we'll be able to shrink next + * time. + */ + freed = 1; goto out; } list_for_each_entry(shrinker, &shrinker_list, list) { - unsigned long long delta; - long total_scan; - long max_pass; - int shrink_ret = 0; - long nr; - long new_nr; - long batch_size = shrinker->batch ? shrinker->batch - : SHRINK_BATCH; - - max_pass = do_shrinker_shrink(shrinker, shrink, 0); - if (max_pass <= 0) - continue; - - /* - * copy the current shrinker scan count into a local variable - * and zero it so that other concurrent shrinker invocations - * don't also do this scanning work. - */ - nr = atomic_long_xchg(&shrinker->nr_in_batch, 0); - - total_scan = nr; - delta = (4 * nr_pages_scanned) / shrinker->seeks; - delta *= max_pass; - do_div(delta, lru_pages + 1); - total_scan += delta; - if (total_scan < 0) { - printk(KERN_ERR "shrink_slab: %pF negative objects to " - "delete nr=%ld\n", - shrinker->shrink, total_scan); - total_scan = max_pass; - } - - /* - * We need to avoid excessive windup on filesystem shrinkers - * due to large numbers of GFP_NOFS allocations causing the - * shrinkers to return -1 all the time. This results in a large - * nr being built up so when a shrink that can do some work - * comes along it empties the entire cache due to nr >>> - * max_pass. This is bad for sustaining a working set in - * memory. - * - * Hence only allow the shrinker to scan the entire cache when - * a large delta change is calculated directly. - */ - if (delta < max_pass / 4) - total_scan = min(total_scan, max_pass / 2); - - /* - * Avoid risking looping forever due to too large nr value: - * never try to free more than twice the estimate number of - * freeable entries. - */ - if (total_scan > max_pass * 2) - total_scan = max_pass * 2; - - trace_mm_shrink_slab_start(shrinker, shrink, nr, - nr_pages_scanned, lru_pages, - max_pass, delta, total_scan); - - while (total_scan >= batch_size) { - int nr_before; + for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) { + if (!node_online(shrinkctl->nid)) + continue; - nr_before = do_shrinker_shrink(shrinker, shrink, 0); - shrink_ret = do_shrinker_shrink(shrinker, shrink, - batch_size); - if (shrink_ret == -1) + if (!(shrinker->flags & SHRINKER_NUMA_AWARE) && + (shrinkctl->nid != 0)) break; - if (shrink_ret < nr_before) - ret += nr_before - shrink_ret; - count_vm_events(SLABS_SCANNED, batch_size); - total_scan -= batch_size; - cond_resched(); - } + freed += shrink_slab_node(shrinkctl, shrinker, + nr_pages_scanned, lru_pages); - /* - * move the unused scan count back into the shrinker in a - * manner that handles concurrent updates. If we exhausted the - * scan, there is no need to do an update. - */ - if (total_scan > 0) - new_nr = atomic_long_add_return(total_scan, - &shrinker->nr_in_batch); - else - new_nr = atomic_long_read(&shrinker->nr_in_batch); - - trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr); + } } up_read(&shrinker_rwsem); out: cond_resched(); - return ret; + return freed; } static inline int is_page_cache_freeable(struct page *page) @@ -2130,9 +2176,11 @@ static inline bool should_continue_reclaim(struct zone *zone, } } -static void shrink_zone(struct zone *zone, struct scan_control *sc) +static int +__shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim) { unsigned long nr_reclaimed, nr_scanned; + int groups_scanned = 0; do { struct mem_cgroup *root = sc->target_mem_cgroup; @@ -2140,15 +2188,17 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc) .zone = zone, .priority = sc->priority, }; - struct mem_cgroup *memcg; + struct mem_cgroup *memcg = NULL; + mem_cgroup_iter_filter filter = (soft_reclaim) ? + mem_cgroup_soft_reclaim_eligible : NULL; nr_reclaimed = sc->nr_reclaimed; nr_scanned = sc->nr_scanned; - memcg = mem_cgroup_iter(root, NULL, &reclaim); - do { + while ((memcg = mem_cgroup_iter_cond(root, memcg, &reclaim, filter))) { struct lruvec *lruvec; + groups_scanned++; lruvec = mem_cgroup_zone_lruvec(zone, memcg); shrink_lruvec(lruvec, sc); @@ -2168,8 +2218,7 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc) mem_cgroup_iter_break(root, memcg); break; } - memcg = mem_cgroup_iter(root, memcg, &reclaim); - } while (memcg); + } vmpressure(sc->gfp_mask, sc->target_mem_cgroup, sc->nr_scanned - nr_scanned, @@ -2177,6 +2226,37 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc) } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed, sc->nr_scanned - nr_scanned, sc)); + + return groups_scanned; +} + + +static void shrink_zone(struct zone *zone, struct scan_control *sc) +{ + bool do_soft_reclaim = mem_cgroup_should_soft_reclaim(sc); + unsigned long nr_scanned = sc->nr_scanned; + int scanned_groups; + + scanned_groups = __shrink_zone(zone, sc, do_soft_reclaim); + /* + * memcg iterator might race with other reclaimer or start from + * a incomplete tree walk so the tree walk in __shrink_zone + * might have missed groups that are above the soft limit. Try + * another loop to catch up with others. Do it just once to + * prevent from reclaim latencies when other reclaimers always + * preempt this one. + */ + if (do_soft_reclaim && !scanned_groups) + __shrink_zone(zone, sc, do_soft_reclaim); + + /* + * No group is over the soft limit or those that are do not have + * pages in the zone we are reclaiming so we have to reclaim everybody + */ + if (do_soft_reclaim && (sc->nr_scanned == nr_scanned)) { + __shrink_zone(zone, sc, false); + return; + } } /* Returns true if compaction should go ahead for a high-order request */ @@ -2240,8 +2320,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) { struct zoneref *z; struct zone *zone; - unsigned long nr_soft_reclaimed; - unsigned long nr_soft_scanned; bool aborted_reclaim = false; /* @@ -2281,18 +2359,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) continue; } } - /* - * This steals pages from memory cgroups over softlimit - * and returns the number of reclaimed pages and - * scanned pages. This works for global memory pressure - * and balancing, not for a memcg's limit. - */ - nr_soft_scanned = 0; - nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone, - sc->order, sc->gfp_mask, - &nr_soft_scanned); - sc->nr_reclaimed += nr_soft_reclaimed; - sc->nr_scanned += nr_soft_scanned; /* need some check for avoid more shrink_zone() */ } @@ -2368,12 +2434,16 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, */ if (global_reclaim(sc)) { unsigned long lru_pages = 0; + + nodes_clear(shrink->nodes_to_scan); for_each_zone_zonelist(zone, z, zonelist, gfp_zone(sc->gfp_mask)) { if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) continue; lru_pages += zone_reclaimable_pages(zone); + node_set(zone_to_nid(zone), + shrink->nodes_to_scan); } shrink_slab(shrink, sc->nr_scanned, lru_pages); @@ -2829,6 +2899,8 @@ static bool kswapd_shrink_zone(struct zone *zone, return true; shrink_zone(zone, sc); + nodes_clear(shrink.nodes_to_scan); + node_set(zone_to_nid(zone), shrink.nodes_to_scan); reclaim_state->reclaimed_slab = 0; shrink_slab(&shrink, sc->nr_scanned, lru_pages); @@ -2880,8 +2952,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, { int i; int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ - unsigned long nr_soft_reclaimed; - unsigned long nr_soft_scanned; struct scan_control sc = { .gfp_mask = GFP_KERNEL, .priority = DEF_PRIORITY, @@ -2996,15 +3066,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, sc.nr_scanned = 0; - nr_soft_scanned = 0; - /* - * Call soft limit reclaim before calling shrink_zone. - */ - nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone, - order, sc.gfp_mask, - &nr_soft_scanned); - sc.nr_reclaimed += nr_soft_reclaimed; - /* * There should be no need to raise the scanning * priority if enough pages are already being scanned @@ -3520,10 +3581,9 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) * number of slab pages and shake the slab until it is reduced * by the same nr_pages that we used for reclaiming unmapped * pages. - * - * Note that shrink_slab will free memory on all zones and may - * take a long time. */ + nodes_clear(shrink.nodes_to_scan); + node_set(zone_to_nid(zone), shrink.nodes_to_scan); for (;;) { unsigned long lru_pages = zone_reclaimable_pages(zone); diff --git a/net/Kconfig b/net/Kconfig index ee0213667272..b50dacc072f0 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -228,7 +228,7 @@ config RPS config RFS_ACCEL boolean - depends on RPS && GENERIC_HARDIRQS + depends on RPS select CPU_RMAP default y diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 1606f740d6ae..2b4b32aaa893 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -2216,6 +2216,17 @@ void ceph_osdc_sync(struct ceph_osd_client *osdc) EXPORT_SYMBOL(ceph_osdc_sync); /* + * Call all pending notify callbacks - for use after a watch is + * unregistered, to make sure no more callbacks for it will be invoked + */ +extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc) +{ + flush_workqueue(osdc->notify_wq); +} +EXPORT_SYMBOL(ceph_osdc_flush_notifies); + + +/* * init, shutdown */ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 8a57d79b0b16..559d4ae6ebf4 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -87,8 +87,8 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) if (!cg_proto) return -EINVAL; - if (val > RESOURCE_MAX) - val = RESOURCE_MAX; + if (val > RES_COUNTER_MAX) + val = RES_COUNTER_MAX; tcp = tcp_from_cgproto(cg_proto); @@ -101,9 +101,9 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT, net->ipv4.sysctl_tcp_mem[i]); - if (val == RESOURCE_MAX) + if (val == RES_COUNTER_MAX) clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); - else if (val != RESOURCE_MAX) { + else if (val != RES_COUNTER_MAX) { /* * The active bit needs to be written after the static_key * update. This is what guarantees that the socket activation @@ -187,7 +187,7 @@ static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft) switch (cft->private) { case RES_LIMIT: - val = tcp_read_stat(memcg, RES_LIMIT, RESOURCE_MAX); + val = tcp_read_stat(memcg, RES_LIMIT, RES_COUNTER_MAX); break; case RES_USAGE: val = tcp_read_usage(memcg); diff --git a/net/socket.c b/net/socket.c index 0ceaa5cb9ead..ebed4b68f768 100644 --- a/net/socket.c +++ b/net/socket.c @@ -854,11 +854,6 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg, } EXPORT_SYMBOL(kernel_recvmsg); -static void sock_aio_dtor(struct kiocb *iocb) -{ - kfree(iocb->private); -} - static ssize_t sock_sendpage(struct file *file, struct page *page, int offset, size_t size, loff_t *ppos, int more) { @@ -889,12 +884,8 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos, static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, struct sock_iocb *siocb) { - if (!is_sync_kiocb(iocb)) { - siocb = kmalloc(sizeof(*siocb), GFP_KERNEL); - if (!siocb) - return NULL; - iocb->ki_dtor = sock_aio_dtor; - } + if (!is_sync_kiocb(iocb)) + BUG(); siocb->kiocb = iocb; iocb->private = siocb; @@ -931,7 +922,7 @@ static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, if (pos != 0) return -ESPIPE; - if (iocb->ki_left == 0) /* Match SYS5 behaviour */ + if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */ return 0; diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 415159061cd0..5285ead196c0 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -434,12 +434,13 @@ EXPORT_SYMBOL_GPL(rpcauth_destroy_credcache); /* * Remove stale credentials. Avoid sleeping inside the loop. */ -static int +static long rpcauth_prune_expired(struct list_head *free, int nr_to_scan) { spinlock_t *cache_lock; struct rpc_cred *cred, *next; unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM; + long freed = 0; list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) { @@ -451,10 +452,11 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) */ if (time_in_range(cred->cr_expire, expired, jiffies) && test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) - return 0; + break; list_del_init(&cred->cr_lru); number_cred_unused--; + freed++; if (atomic_read(&cred->cr_count) != 0) continue; @@ -467,29 +469,39 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) } spin_unlock(cache_lock); } - return (number_cred_unused / 100) * sysctl_vfs_cache_pressure; + return freed; } /* * Run memory cache shrinker. */ -static int -rpcauth_cache_shrinker(struct shrinker *shrink, struct shrink_control *sc) +static unsigned long +rpcauth_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) + { LIST_HEAD(free); - int res; - int nr_to_scan = sc->nr_to_scan; - gfp_t gfp_mask = sc->gfp_mask; + unsigned long freed; + + if ((sc->gfp_mask & GFP_KERNEL) != GFP_KERNEL) + return SHRINK_STOP; - if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) - return (nr_to_scan == 0) ? 0 : -1; + /* nothing left, don't come back */ if (list_empty(&cred_unused)) - return 0; + return SHRINK_STOP; + spin_lock(&rpc_credcache_lock); - res = rpcauth_prune_expired(&free, nr_to_scan); + freed = rpcauth_prune_expired(&free, sc->nr_to_scan); spin_unlock(&rpc_credcache_lock); rpcauth_destroy_credlist(&free); - return res; + + return freed; +} + +static unsigned long +rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc) + +{ + return (number_cred_unused / 100) * sysctl_vfs_cache_pressure; } /* @@ -805,7 +817,8 @@ rpcauth_uptodatecred(struct rpc_task *task) } static struct shrinker rpc_cred_shrinker = { - .shrink = rpcauth_cache_shrinker, + .count_objects = rpcauth_cache_shrink_count, + .scan_objects = rpcauth_cache_shrink_scan, .seeks = DEFAULT_SEEKS, }; diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index f6d84be49050..ed04869b2d4f 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -239,7 +239,7 @@ generic_key_timeout(struct rpc_auth *auth, struct rpc_cred *cred) if (test_and_clear_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags)) dprintk("RPC: UID %d Credential key reset\n", - tcred->cr_uid); + from_kuid(&init_user_ns, tcred->cr_uid)); /* set up fasttrack for the normal case */ set_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags); } diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 30eb502135bb..fcac5d141717 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -75,7 +75,7 @@ static unsigned int gss_key_expire_timeo = GSS_KEY_EXPIRE_TIMEO; * using integrity (two 4-byte integers): */ #define GSS_VERF_SLACK 100 -static DEFINE_HASHTABLE(gss_auth_hash_table, 16); +static DEFINE_HASHTABLE(gss_auth_hash_table, 4); static DEFINE_SPINLOCK(gss_auth_hash_lock); struct gss_pipe { diff --git a/scripts/config b/scripts/config index 2283be2bb62c..68041793698c 100755 --- a/scripts/config +++ b/scripts/config @@ -82,7 +82,7 @@ txt_subst() { local infile="$3" local tmpfile="$infile.swp" - sed -e "s/$before/$after/" "$infile" >"$tmpfile" + sed -e "s:$before:$after:" "$infile" >"$tmpfile" # replace original file with the edited one mv "$tmpfile" "$infile" } diff --git a/scripts/package/buildtar b/scripts/package/buildtar index cdd9bb909bcd..aa22f9447ddc 100644 --- a/scripts/package/buildtar +++ b/scripts/package/buildtar @@ -87,6 +87,27 @@ case "${ARCH}" in [ -f "${objtree}/vmlinux.SYS" ] && cp -v -- "${objtree}/vmlinux.SYS" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}.SYS" [ -f "${objtree}/vmlinux.dsk" ] && cp -v -- "${objtree}/vmlinux.dsk" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}.dsk" ;; + mips) + if [ -f "${objtree}/arch/mips/boot/compressed/vmlinux.bin" ]; then + cp -v -- "${objtree}/arch/mips/boot/compressed/vmlinux.bin" "${tmpdir}/boot/vmlinuz-${KERNELRELEASE}" + elif [ -f "${objtree}/arch/mips/boot/compressed/vmlinux.ecoff" ]; then + cp -v -- "${objtree}/arch/mips/boot/compressed/vmlinux.ecoff" "${tmpdir}/boot/vmlinuz-${KERNELRELEASE}" + elif [ -f "${objtree}/arch/mips/boot/compressed/vmlinux.srec" ]; then + cp -v -- "${objtree}/arch/mips/boot/compressed/vmlinux.srec" "${tmpdir}/boot/vmlinuz-${KERNELRELEASE}" + elif [ -f "${objtree}/vmlinux.32" ]; then + cp -v -- "${objtree}/vmlinux.32" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}" + elif [ -f "${objtree}/vmlinux.64" ]; then + cp -v -- "${objtree}/vmlinux.64" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}" + elif [ -f "${objtree}/arch/mips/boot/vmlinux.bin" ]; then + cp -v -- "${objtree}/arch/mips/boot/vmlinux.bin" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}" + elif [ -f "${objtree}/arch/mips/boot/vmlinux.ecoff" ]; then + cp -v -- "${objtree}/arch/mips/boot/vmlinux.ecoff" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}" + elif [ -f "${objtree}/arch/mips/boot/vmlinux.srec" ]; then + cp -v -- "${objtree}/arch/mips/boot/vmlinux.srec" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}" + elif [ -f "${objtree}/vmlinux" ]; then + cp -v -- "${objtree}/vmlinux" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}" + fi + ;; *) [ -f "${KBUILD_IMAGE}" ] && cp -v -- "${KBUILD_IMAGE}" "${tmpdir}/boot/vmlinux-kbuild-${KERNELRELEASE}" echo "" >&2 diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index e54ebd530849..6e61a019aa5e 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -3428,6 +3428,7 @@ static struct snd_pci_quirk msi_black_list[] = { SND_PCI_QUIRK(0x1043, 0x81f2, "ASUS", 0), /* Athlon64 X2 + nvidia */ SND_PCI_QUIRK(0x1043, 0x81f6, "ASUS", 0), /* nvidia */ SND_PCI_QUIRK(0x1043, 0x822d, "ASUS", 0), /* Athlon64 X2 + nvidia MCP55 */ + SND_PCI_QUIRK(0x1179, 0xfb44, "Toshiba Satellite C870", 0), /* AMD Hudson */ SND_PCI_QUIRK(0x1849, 0x0888, "ASRock", 0), /* Athlon64 X2 + nvidia */ SND_PCI_QUIRK(0xa0a0, 0x0575, "Aopen MZ915-M", 0), /* ICH6 */ {} diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index cccaf9c7a7bb..b524f89a1f13 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -169,7 +169,7 @@ static void cs_automute(struct hda_codec *codec) snd_hda_gen_update_outputs(codec); - if (spec->gpio_eapd_hp) { + if (spec->gpio_eapd_hp || spec->gpio_eapd_speaker) { spec->gpio_data = spec->gen.hp_jack_present ? spec->gpio_eapd_hp : spec->gpio_eapd_speaker; snd_hda_codec_write(codec, 0x01, 0, @@ -291,10 +291,11 @@ static int cs_init(struct hda_codec *codec) { struct cs_spec *spec = codec->spec; - /* init_verb sequence for C0/C1/C2 errata*/ - snd_hda_sequence_write(codec, cs_errata_init_verbs); - - snd_hda_sequence_write(codec, cs_coef_init_verbs); + if (spec->vendor_nid == CS420X_VENDOR_NID) { + /* init_verb sequence for C0/C1/C2 errata*/ + snd_hda_sequence_write(codec, cs_errata_init_verbs); + snd_hda_sequence_write(codec, cs_coef_init_verbs); + } snd_hda_gen_init(codec); @@ -307,8 +308,10 @@ static int cs_init(struct hda_codec *codec) spec->gpio_data); } - init_input_coef(codec); - init_digital_coef(codec); + if (spec->vendor_nid == CS420X_VENDOR_NID) { + init_input_coef(codec); + init_digital_coef(codec); + } return 0; } @@ -552,6 +555,76 @@ static int patch_cs420x(struct hda_codec *codec) } /* + * CS4208 support: + * Its layout is no longer compatible with CS4206/CS4207, and the generic + * parser seems working fairly well, except for trivial fixups. + */ +enum { + CS4208_GPIO0, +}; + +static const struct hda_model_fixup cs4208_models[] = { + { .id = CS4208_GPIO0, .name = "gpio0" }, + {} +}; + +static const struct snd_pci_quirk cs4208_fixup_tbl[] = { + /* codec SSID */ + SND_PCI_QUIRK(0x106b, 0x7100, "MacBookPro 6,1", CS4208_GPIO0), + SND_PCI_QUIRK(0x106b, 0x7200, "MacBookPro 6,2", CS4208_GPIO0), + {} /* terminator */ +}; + +static void cs4208_fixup_gpio0(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + struct cs_spec *spec = codec->spec; + spec->gpio_eapd_hp = 0; + spec->gpio_eapd_speaker = 1; + spec->gpio_mask = spec->gpio_dir = + spec->gpio_eapd_hp | spec->gpio_eapd_speaker; + } +} + +static const struct hda_fixup cs4208_fixups[] = { + [CS4208_GPIO0] = { + .type = HDA_FIXUP_FUNC, + .v.func = cs4208_fixup_gpio0, + }, +}; + +static int patch_cs4208(struct hda_codec *codec) +{ + struct cs_spec *spec; + int err; + + spec = cs_alloc_spec(codec, 0); /* no specific w/a */ + if (!spec) + return -ENOMEM; + + spec->gen.automute_hook = cs_automute; + + snd_hda_pick_fixup(codec, cs4208_models, cs4208_fixup_tbl, + cs4208_fixups); + snd_hda_apply_fixup(codec, HDA_FIXUP_ACT_PRE_PROBE); + + err = cs_parse_auto_config(codec); + if (err < 0) + goto error; + + codec->patch_ops = cs_patch_ops; + + snd_hda_apply_fixup(codec, HDA_FIXUP_ACT_PROBE); + + return 0; + + error: + cs_free(codec); + return err; +} + +/* * Cirrus Logic CS4210 * * 1 DAC => HP(sense) / Speakers, @@ -991,6 +1064,7 @@ static int patch_cs4213(struct hda_codec *codec) static const struct hda_codec_preset snd_hda_preset_cirrus[] = { { .id = 0x10134206, .name = "CS4206", .patch = patch_cs420x }, { .id = 0x10134207, .name = "CS4207", .patch = patch_cs420x }, + { .id = 0x10134208, .name = "CS4208", .patch = patch_cs4208 }, { .id = 0x10134210, .name = "CS4210", .patch = patch_cs4210 }, { .id = 0x10134213, .name = "CS4213", .patch = patch_cs4213 }, {} /* terminator */ @@ -998,6 +1072,7 @@ static const struct hda_codec_preset snd_hda_preset_cirrus[] = { MODULE_ALIAS("snd-hda-codec-id:10134206"); MODULE_ALIAS("snd-hda-codec-id:10134207"); +MODULE_ALIAS("snd-hda-codec-id:10134208"); MODULE_ALIAS("snd-hda-codec-id:10134210"); MODULE_ALIAS("snd-hda-codec-id:10134213"); diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 9a58893d52a7..3d8cd04455a6 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -44,6 +44,8 @@ static bool static_hdmi_pcm; module_param(static_hdmi_pcm, bool, 0644); MODULE_PARM_DESC(static_hdmi_pcm, "Don't restrict PCM parameters per ELD info"); +#define is_haswell(codec) ((codec)->vendor_id == 0x80862807) + struct hdmi_spec_per_cvt { hda_nid_t cvt_nid; int assigned; @@ -894,6 +896,11 @@ static void hdmi_setup_audio_infoframe(struct hda_codec *codec, if (!channels) return; + if (is_haswell(codec)) + snd_hda_codec_write(codec, pin_nid, 0, + AC_VERB_SET_AMP_GAIN_MUTE, + AMP_OUT_UNMUTE); + eld = &per_pin->sink_eld; if (!eld->monitor_present) return; @@ -1033,10 +1040,10 @@ static void hdmi_unsol_event(struct hda_codec *codec, unsigned int res) hdmi_non_intrinsic_event(codec, res); } -static void haswell_verify_pin_D0(struct hda_codec *codec, +static void haswell_verify_D0(struct hda_codec *codec, hda_nid_t cvt_nid, hda_nid_t nid) { - int pwr, lamp, ramp; + int pwr; /* For Haswell, the converter 1/2 may keep in D3 state after bootup, * thus pins could only choose converter 0 for use. Make sure the @@ -1052,25 +1059,6 @@ static void haswell_verify_pin_D0(struct hda_codec *codec, pwr = (pwr & AC_PWRST_ACTUAL) >> AC_PWRST_ACTUAL_SHIFT; snd_printd("Haswell HDMI audio: Power for pin 0x%x is now D%d\n", nid, pwr); } - - lamp = snd_hda_codec_read(codec, nid, 0, - AC_VERB_GET_AMP_GAIN_MUTE, - AC_AMP_GET_LEFT | AC_AMP_GET_OUTPUT); - ramp = snd_hda_codec_read(codec, nid, 0, - AC_VERB_GET_AMP_GAIN_MUTE, - AC_AMP_GET_RIGHT | AC_AMP_GET_OUTPUT); - if (lamp != ramp) { - snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_AMP_GAIN_MUTE, - AC_AMP_SET_RIGHT | AC_AMP_SET_OUTPUT | lamp); - - lamp = snd_hda_codec_read(codec, nid, 0, - AC_VERB_GET_AMP_GAIN_MUTE, - AC_AMP_GET_LEFT | AC_AMP_GET_OUTPUT); - ramp = snd_hda_codec_read(codec, nid, 0, - AC_VERB_GET_AMP_GAIN_MUTE, - AC_AMP_GET_RIGHT | AC_AMP_GET_OUTPUT); - snd_printd("Haswell HDMI audio: Mute after set on pin 0x%x: [0x%x 0x%x]\n", nid, lamp, ramp); - } } /* @@ -1087,8 +1075,8 @@ static int hdmi_setup_stream(struct hda_codec *codec, hda_nid_t cvt_nid, int pinctl; int new_pinctl = 0; - if (codec->vendor_id == 0x80862807) - haswell_verify_pin_D0(codec, cvt_nid, pin_nid); + if (is_haswell(codec)) + haswell_verify_D0(codec, cvt_nid, pin_nid); if (snd_hda_query_pin_caps(codec, pin_nid) & AC_PINCAP_HBR) { pinctl = snd_hda_codec_read(codec, pin_nid, 0, @@ -1227,7 +1215,7 @@ static int hdmi_pcm_open(struct hda_pcm_stream *hinfo, mux_idx); /* configure unused pins to choose other converters */ - if (codec->vendor_id == 0x80862807) + if (is_haswell(codec)) haswell_config_cvts(codec, pin_idx, mux_idx); snd_hda_spdif_ctls_assign(codec, pin_idx, per_cvt->cvt_nid); @@ -1358,14 +1346,10 @@ static void hdmi_present_sense(struct hdmi_spec_per_pin *per_pin, int repoll) /* Haswell-specific workaround: re-setup when the transcoder is * changed during the stream playback */ - if (codec->vendor_id == 0x80862807 && - eld->eld_valid && !old_eld_valid && per_pin->setup) { - snd_hda_codec_write(codec, pin_nid, 0, - AC_VERB_SET_AMP_GAIN_MUTE, - AMP_OUT_UNMUTE); + if (is_haswell(codec) && + eld->eld_valid && !old_eld_valid && per_pin->setup) hdmi_setup_audio_infoframe(codec, per_pin, per_pin->non_pcm); - } } mutex_unlock(&pin_eld->lock); @@ -1405,7 +1389,7 @@ static int hdmi_add_pin(struct hda_codec *codec, hda_nid_t pin_nid) if (get_defcfg_connect(config) == AC_JACK_PORT_NONE) return 0; - if (codec->vendor_id == 0x80862807) + if (is_haswell(codec)) intel_haswell_fixup_connect_list(codec, pin_nid); pin_idx = spec->num_pins; @@ -2014,7 +1998,7 @@ static int patch_generic_hdmi(struct hda_codec *codec) codec->spec = spec; hdmi_array_init(spec, 4); - if (codec->vendor_id == 0x80862807) { + if (is_haswell(codec)) { intel_haswell_enable_all_pins(codec, true); intel_haswell_fixup_enable_dp12(codec); } @@ -2025,7 +2009,7 @@ static int patch_generic_hdmi(struct hda_codec *codec) return -EINVAL; } codec->patch_ops = generic_hdmi_patch_ops; - if (codec->vendor_id == 0x80862807) { + if (is_haswell(codec)) { codec->patch_ops.set_power_state = haswell_set_power_state; codec->dp_mst = true; } diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 9e9378cde8fa..bc07d369fac4 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3443,6 +3443,56 @@ static void alc283_fixup_chromebook(struct hda_codec *codec, } } +/* mute tablet speaker pin (0x14) via dock plugging in addition */ +static void asus_tx300_automute(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + snd_hda_gen_update_outputs(codec); + if (snd_hda_jack_detect(codec, 0x1b)) + spec->gen.mute_bits |= (1ULL << 0x14); +} + +static void alc282_fixup_asus_tx300(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + /* TX300 needs to set up GPIO2 for the speaker amp */ + static const struct hda_verb gpio2_verbs[] = { + { 0x01, AC_VERB_SET_GPIO_MASK, 0x04 }, + { 0x01, AC_VERB_SET_GPIO_DIRECTION, 0x04 }, + { 0x01, AC_VERB_SET_GPIO_DATA, 0x04 }, + {} + }; + static const struct hda_pintbl dock_pins[] = { + { 0x1b, 0x21114000 }, /* dock speaker pin */ + {} + }; + struct snd_kcontrol *kctl; + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + snd_hda_add_verbs(codec, gpio2_verbs); + snd_hda_apply_pincfgs(codec, dock_pins); + spec->gen.auto_mute_via_amp = 1; + spec->gen.automute_hook = asus_tx300_automute; + snd_hda_jack_detect_enable_callback(codec, 0x1b, + HDA_GEN_HP_EVENT, + snd_hda_gen_hp_automute); + break; + case HDA_FIXUP_ACT_BUILD: + /* this is a bit tricky; give more sane names for the main + * (tablet) speaker and the dock speaker, respectively + */ + kctl = snd_hda_find_mixer_ctl(codec, "Speaker Playback Switch"); + if (kctl) + strcpy(kctl->id.name, "Dock Speaker Playback Switch"); + kctl = snd_hda_find_mixer_ctl(codec, "Bass Speaker Playback Switch"); + if (kctl) + strcpy(kctl->id.name, "Speaker Playback Switch"); + break; + } +} + enum { ALC269_FIXUP_SONY_VAIO, ALC275_FIXUP_SONY_VAIO_GPIO2, @@ -3480,6 +3530,7 @@ enum { ALC269_FIXUP_LIMIT_INT_MIC_BOOST, ALC269VB_FIXUP_ORDISSIMO_EVE2, ALC283_FIXUP_CHROME_BOOK, + ALC282_FIXUP_ASUS_TX300, }; static const struct hda_fixup alc269_fixups[] = { @@ -3735,6 +3786,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc283_fixup_chromebook, }, + [ALC282_FIXUP_ASUS_TX300] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc282_fixup_asus_tx300, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -3784,6 +3839,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x1983, "HP Pavilion", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x21ed, "HP Falco Chromebook", ALC283_FIXUP_CHROME_BOOK), SND_PCI_QUIRK_VENDOR(0x103c, "HP", ALC269_FIXUP_HP_MUTE_LED), + SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1043, 0x1427, "Asus Zenbook UX31E", ALC269VB_FIXUP_DMIC), diff --git a/sound/soc/atmel/atmel_ssc_dai.c b/sound/soc/atmel/atmel_ssc_dai.c index 0ecf356027f6..bb53dea85b17 100644 --- a/sound/soc/atmel/atmel_ssc_dai.c +++ b/sound/soc/atmel/atmel_ssc_dai.c @@ -649,7 +649,7 @@ static int atmel_ssc_prepare(struct snd_pcm_substream *substream, dma_params = ssc_p->dma_params[dir]; ssc_writel(ssc_p->ssc->regs, CR, dma_params->mask->ssc_enable); - ssc_writel(ssc_p->ssc->regs, IER, dma_params->mask->ssc_error); + ssc_writel(ssc_p->ssc->regs, IDR, dma_params->mask->ssc_error); pr_debug("%s enabled SSC_SR=0x%08x\n", dir ? "receive" : "transmit", diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 15106c045478..b33b45dfceec 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -107,7 +107,7 @@ config SND_SOC_ALL_CODECS select SND_SOC_WM8782 select SND_SOC_WM8804 if SND_SOC_I2C_AND_SPI select SND_SOC_WM8900 if I2C - select SND_SOC_WM8903 if I2C && GENERIC_HARDIRQS + select SND_SOC_WM8903 if I2C select SND_SOC_WM8904 if I2C select SND_SOC_WM8940 if I2C select SND_SOC_WM8955 if I2C diff --git a/sound/soc/codecs/mc13783.c b/sound/soc/codecs/mc13783.c index 4d3c8fd8c5db..ea141e1d6f28 100644 --- a/sound/soc/codecs/mc13783.c +++ b/sound/soc/codecs/mc13783.c @@ -125,6 +125,10 @@ static int mc13783_write(struct snd_soc_codec *codec, ret = mc13xxx_reg_write(priv->mc13xxx, reg, value); + /* include errata fix for spi audio problems */ + if (reg == MC13783_AUDIO_CODEC || reg == MC13783_AUDIO_DAC) + ret = mc13xxx_reg_write(priv->mc13xxx, reg, value); + mc13xxx_unlock(priv->mc13xxx); return ret; diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig index 704e246f5b1e..b7ab71f2ccc1 100644 --- a/sound/soc/fsl/Kconfig +++ b/sound/soc/fsl/Kconfig @@ -198,6 +198,7 @@ config SND_SOC_IMX_SPDIF select SND_SOC_IMX_PCM_DMA select SND_SOC_FSL_SPDIF select SND_SOC_SPDIF + select REGMAP_MMIO help SoC Audio support for i.MX boards with S/PDIF Say Y if you want to add support for SoC audio on an i.MX board with diff --git a/sound/soc/fsl/imx-audmux.c b/sound/soc/fsl/imx-audmux.c index ab17381cc981..d3bf71a0ec56 100644 --- a/sound/soc/fsl/imx-audmux.c +++ b/sound/soc/fsl/imx-audmux.c @@ -335,7 +335,8 @@ static int imx_audmux_probe(struct platform_device *pdev) if (audmux_type == IMX31_AUDMUX) audmux_debugfs_init(); - imx_audmux_parse_dt_defaults(pdev, pdev->dev.of_node); + if (of_id) + imx_audmux_parse_dt_defaults(pdev, pdev->dev.of_node); return 0; } diff --git a/sound/soc/kirkwood/kirkwood-i2s.c b/sound/soc/kirkwood/kirkwood-i2s.c index 7fce340ab3ef..0f3d73d4ef48 100644 --- a/sound/soc/kirkwood/kirkwood-i2s.c +++ b/sound/soc/kirkwood/kirkwood-i2s.c @@ -559,7 +559,8 @@ static int kirkwood_i2s_dev_remove(struct platform_device *pdev) #ifdef CONFIG_OF static struct of_device_id mvebu_audio_of_match[] = { - { .compatible = "marvell,mvebu-audio" }, + { .compatible = "marvell,kirkwood-audio" }, + { .compatible = "marvell,dove-audio" }, { } }; MODULE_DEVICE_TABLE(of, mvebu_audio_of_match); diff --git a/sound/soc/samsung/Kconfig b/sound/soc/samsung/Kconfig index 9855dfc3e3ec..2eea1840315d 100644 --- a/sound/soc/samsung/Kconfig +++ b/sound/soc/samsung/Kconfig @@ -63,7 +63,7 @@ config SND_SOC_SAMSUNG_SMDK_WM8580 config SND_SOC_SAMSUNG_SMDK_WM8994 tristate "SoC I2S Audio support for WM8994 on SMDK" depends on SND_SOC_SAMSUNG - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y select MFD_WM8994 select SND_SOC_WM8994 select SND_SAMSUNG_I2S @@ -151,7 +151,7 @@ config SND_SOC_SMARTQ config SND_SOC_GONI_AQUILA_WM8994 tristate "SoC I2S Audio support for AQUILA/GONI - WM8994" depends on SND_SOC_SAMSUNG && (MACH_GONI || MACH_AQUILA) - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y select SND_SAMSUNG_I2S select MFD_WM8994 select SND_SOC_WM8994 @@ -177,7 +177,7 @@ config SND_SOC_SMDK_WM8580_PCM config SND_SOC_SMDK_WM8994_PCM tristate "SoC PCM Audio support for WM8994 on SMDK" depends on SND_SOC_SAMSUNG - depends on I2C=y && GENERIC_HARDIRQS + depends on I2C=y select MFD_WM8994 select SND_SOC_WM8994 select SND_SAMSUNG_PCM diff --git a/sound/soc/sh/rcar/scu.c b/sound/soc/sh/rcar/scu.c index 184d9008cecd..2df2e9150b89 100644 --- a/sound/soc/sh/rcar/scu.c +++ b/sound/soc/sh/rcar/scu.c @@ -157,9 +157,9 @@ static int rsnd_scu_start(struct rsnd_mod *mod, int ret; /* - * SCU will be used if it has RSND_SCU_USB_HPBIF flags + * SCU will be used if it has RSND_SCU_USE_HPBIF flags */ - if (!(flags & RSND_SCU_USB_HPBIF)) { + if (!(flags & RSND_SCU_USE_HPBIF)) { /* it use PIO transter */ dev_dbg(dev, "%s%d is not used\n", rsnd_mod_name(mod), rsnd_mod_id(mod)); diff --git a/tools/perf/Makefile b/tools/perf/Makefile index c5dc1ad1b8d7..3a0ff7fb71b6 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -394,6 +394,8 @@ ifeq ($(ARCH),x86) LIB_OBJS += $(OUTPUT)tests/perf-time-to-tsc.o endif LIB_OBJS += $(OUTPUT)tests/code-reading.o +LIB_OBJS += $(OUTPUT)tests/sample-parsing.o +LIB_OBJS += $(OUTPUT)tests/parse-no-sample-id-all.o BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o BUILTIN_OBJS += $(OUTPUT)builtin-bench.o @@ -439,7 +441,6 @@ PERFLIBS = $(LIB_FILE) $(LIBLK) $(LIBTRACEEVENT) ifneq ($(OUTPUT),) CFLAGS += -I$(OUTPUT) endif -LIB_OBJS += $(OUTPUT)tests/sample-parsing.o ifdef NO_LIBELF EXTLIBS := $(filter-out -lelf,$(EXTLIBS)) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index f988d380c52f..5ebd0c3b71b6 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -277,6 +277,7 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused) .tool = { .sample = process_sample_event, .mmap = perf_event__process_mmap, + .mmap2 = perf_event__process_mmap2, .comm = perf_event__process_comm, .exit = perf_event__process_exit, .fork = perf_event__process_fork, diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 9b336fdb6f71..423875c999b2 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -123,6 +123,19 @@ static int perf_event__repipe_mmap(struct perf_tool *tool, return err; } +static int perf_event__repipe_mmap2(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + int err; + + err = perf_event__process_mmap2(tool, event, sample, machine); + perf_event__repipe(tool, event, sample, machine); + + return err; +} + static int perf_event__repipe_fork(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -339,6 +352,7 @@ static int __cmd_inject(struct perf_inject *inject) if (inject->build_ids || inject->sched_stat) { inject->tool.mmap = perf_event__repipe_mmap; + inject->tool.mmap2 = perf_event__repipe_mmap2; inject->tool.fork = perf_event__repipe_fork; inject->tool.tracing_data = perf_event__repipe_tracing_data; } @@ -390,6 +404,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) .tool = { .sample = perf_event__repipe_sample, .mmap = perf_event__repipe, + .mmap2 = perf_event__repipe, .comm = perf_event__repipe, .fork = perf_event__repipe, .exit = perf_event__repipe, diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 47b35407c2f2..935d52216c89 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1165,16 +1165,16 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm) struct perf_event_attr *attr = &pos->attr; /* make sure these *are* set */ - attr->sample_type |= PERF_SAMPLE_TID; - attr->sample_type |= PERF_SAMPLE_TIME; - attr->sample_type |= PERF_SAMPLE_CPU; - attr->sample_type |= PERF_SAMPLE_RAW; + perf_evsel__set_sample_bit(pos, TID); + perf_evsel__set_sample_bit(pos, TIME); + perf_evsel__set_sample_bit(pos, CPU); + perf_evsel__set_sample_bit(pos, RAW); /* make sure these are *not*; want as small a sample as possible */ - attr->sample_type &= ~PERF_SAMPLE_PERIOD; - attr->sample_type &= ~PERF_SAMPLE_IP; - attr->sample_type &= ~PERF_SAMPLE_CALLCHAIN; - attr->sample_type &= ~PERF_SAMPLE_ADDR; - attr->sample_type &= ~PERF_SAMPLE_READ; + perf_evsel__reset_sample_bit(pos, PERIOD); + perf_evsel__reset_sample_bit(pos, IP); + perf_evsel__reset_sample_bit(pos, CALLCHAIN); + perf_evsel__reset_sample_bit(pos, ADDR); + perf_evsel__reset_sample_bit(pos, READ); attr->mmap = 0; attr->comm = 0; attr->task = 0; diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 791b432df847..253133a6251d 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -190,6 +190,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) .tool = { .sample = process_sample_event, .mmap = perf_event__process_mmap, + .mmap2 = perf_event__process_mmap2, .comm = perf_event__process_comm, .lost = perf_event__process_lost, .fork = perf_event__process_fork, diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 9725aa375414..8e50d8d77419 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -744,6 +744,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) .tool = { .sample = process_sample_event, .mmap = perf_event__process_mmap, + .mmap2 = perf_event__process_mmap2, .comm = perf_event__process_comm, .exit = perf_event__process_exit, .fork = perf_event__process_fork, diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 93a34cef9676..7f31a3ded1b6 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -542,6 +542,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, static struct perf_tool perf_script = { .sample = process_sample_event, .mmap = perf_event__process_mmap, + .mmap2 = perf_event__process_mmap2, .comm = perf_event__process_comm, .exit = perf_event__process_exit, .fork = perf_event__process_fork, diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index b6f0725068bd..f5aa6375e3e9 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -100,7 +100,9 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, P_MMAP_FLAG(SHARED); P_MMAP_FLAG(PRIVATE); +#ifdef MAP_32BIT P_MMAP_FLAG(32BIT); +#endif P_MMAP_FLAG(ANONYMOUS); P_MMAP_FLAG(DENYWRITE); P_MMAP_FLAG(EXECUTABLE); @@ -994,6 +996,9 @@ again: handler = evsel->handler.func; handler(trace, evsel, &sample); + + if (done) + goto out_unmap_evlist; } } diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 8bbeba322df9..1e67437fb4ca 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -112,6 +112,10 @@ static struct test { .func = test__keep_tracking, }, { + .desc = "Test parsing with no sample_id_all bit set", + .func = test__parse_no_sample_id_all, + }, + { .func = NULL, }, }; diff --git a/tools/perf/tests/parse-no-sample-id-all.c b/tools/perf/tests/parse-no-sample-id-all.c new file mode 100644 index 000000000000..e117b6c6a248 --- /dev/null +++ b/tools/perf/tests/parse-no-sample-id-all.c @@ -0,0 +1,108 @@ +#include <sys/types.h> +#include <stddef.h> + +#include "tests.h" + +#include "event.h" +#include "evlist.h" +#include "header.h" +#include "util.h" + +static int process_event(struct perf_evlist **pevlist, union perf_event *event) +{ + struct perf_sample sample; + + if (event->header.type == PERF_RECORD_HEADER_ATTR) { + if (perf_event__process_attr(NULL, event, pevlist)) { + pr_debug("perf_event__process_attr failed\n"); + return -1; + } + return 0; + } + + if (event->header.type >= PERF_RECORD_USER_TYPE_START) + return -1; + + if (!*pevlist) + return -1; + + if (perf_evlist__parse_sample(*pevlist, event, &sample)) { + pr_debug("perf_evlist__parse_sample failed\n"); + return -1; + } + + return 0; +} + +static int process_events(union perf_event **events, size_t count) +{ + struct perf_evlist *evlist = NULL; + int err = 0; + size_t i; + + for (i = 0; i < count && !err; i++) + err = process_event(&evlist, events[i]); + + if (evlist) + perf_evlist__delete(evlist); + + return err; +} + +struct test_attr_event { + struct attr_event attr; + u64 id; +}; + +/** + * test__parse_no_sample_id_all - test parsing with no sample_id_all bit set. + * + * This function tests parsing data produced on kernel's that do not support the + * sample_id_all bit. Without the sample_id_all bit, non-sample events (such as + * mmap events) do not have an id sample appended, and consequently logic + * designed to determine the id will not work. That case happens when there is + * more than one selected event, so this test processes three events: 2 + * attributes representing the selected events and one mmap event. + * + * Return: %0 on success, %-1 if the test fails. + */ +int test__parse_no_sample_id_all(void) +{ + int err; + + struct test_attr_event event1 = { + .attr = { + .header = { + .type = PERF_RECORD_HEADER_ATTR, + .size = sizeof(struct test_attr_event), + }, + }, + .id = 1, + }; + struct test_attr_event event2 = { + .attr = { + .header = { + .type = PERF_RECORD_HEADER_ATTR, + .size = sizeof(struct test_attr_event), + }, + }, + .id = 2, + }; + struct mmap_event event3 = { + .header = { + .type = PERF_RECORD_MMAP, + .size = sizeof(struct mmap_event), + }, + }; + union perf_event *events[] = { + (union perf_event *)&event1, + (union perf_event *)&event2, + (union perf_event *)&event3, + }; + + err = process_events(events, ARRAY_SIZE(events)); + if (err) + return -1; + + return 0; +} diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 72d8881873b0..b8a7056519ac 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -50,7 +50,7 @@ int test__PERF_RECORD(void) struct perf_sample sample; const char *cmd = "sleep"; const char *argv[] = { cmd, "1", NULL, }; - char *bname; + char *bname, *mmap_filename; u64 prev_time = 0; bool found_cmd_mmap = false, found_libc_mmap = false, @@ -212,6 +212,7 @@ int test__PERF_RECORD(void) if ((type == PERF_RECORD_COMM || type == PERF_RECORD_MMAP || + type == PERF_RECORD_MMAP2 || type == PERF_RECORD_FORK || type == PERF_RECORD_EXIT) && (pid_t)event->comm.pid != evlist->workload.pid) { @@ -220,7 +221,8 @@ int test__PERF_RECORD(void) } if ((type == PERF_RECORD_COMM || - type == PERF_RECORD_MMAP) && + type == PERF_RECORD_MMAP || + type == PERF_RECORD_MMAP2) && event->comm.pid != event->comm.tid) { pr_debug("%s with different pid/tid!\n", name); ++errs; @@ -236,7 +238,12 @@ int test__PERF_RECORD(void) case PERF_RECORD_EXIT: goto found_exit; case PERF_RECORD_MMAP: - bname = strrchr(event->mmap.filename, '/'); + mmap_filename = event->mmap.filename; + goto check_bname; + case PERF_RECORD_MMAP2: + mmap_filename = event->mmap2.filename; + check_bname: + bname = strrchr(mmap_filename, '/'); if (bname != NULL) { if (!found_cmd_mmap) found_cmd_mmap = !strcmp(bname + 1, cmd); @@ -245,7 +252,7 @@ int test__PERF_RECORD(void) if (!found_ld_mmap) found_ld_mmap = !strncmp(bname + 1, "ld", 2); } else if (!found_vdso_mmap) - found_vdso_mmap = !strcmp(event->mmap.filename, "[vdso]"); + found_vdso_mmap = !strcmp(mmap_filename, "[vdso]"); break; case PERF_RECORD_SAMPLE: diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index c048b589998a..e0ac713857ba 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -39,5 +39,6 @@ int test__perf_time_to_tsc(void); int test__code_reading(void); int test__sample_parsing(void); int test__keep_tracking(void); +int test__parse_no_sample_id_all(void); #endif /* TESTS_H */ diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 5b4fb330f656..194e2f42ff5d 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -350,9 +350,9 @@ static int hist_entry__period_snprintf(struct perf_hpp *hpp, } static int hist_entry__fprintf(struct hist_entry *he, size_t size, - struct hists *hists, FILE *fp) + struct hists *hists, + char *bf, size_t bfsz, FILE *fp) { - char bf[512]; int ret; struct perf_hpp hpp = { .buf = bf, @@ -360,8 +360,8 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, }; bool color = !symbol_conf.field_sep; - if (size == 0 || size > sizeof(bf)) - size = hpp.size = sizeof(bf); + if (size == 0 || size > bfsz) + size = hpp.size = bfsz; ret = hist_entry__period_snprintf(&hpp, he, color); hist_entry__sort_snprintf(he, bf + ret, size - ret, hists); @@ -392,6 +392,8 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, .ptr = hists_to_evsel(hists), }; bool first = true; + size_t linesz; + char *line = NULL; init_rem_hits(); @@ -479,6 +481,13 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, goto out; print_entries: + linesz = hists__sort_list_width(hists) + 3 + 1; + line = malloc(linesz); + if (line == NULL) { + ret = -1; + goto out; + } + for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); float percent = h->stat.period * 100.0 / @@ -490,10 +499,10 @@ print_entries: if (percent < min_pcnt) continue; - ret += hist_entry__fprintf(h, max_cols, hists, fp); + ret += hist_entry__fprintf(h, max_cols, hists, line, linesz, fp); if (max_rows && ++nr_rows >= max_rows) - goto out; + break; if (h->ms.map == NULL && verbose > 1) { __map_groups__fprintf_maps(&h->thread->mg, @@ -501,6 +510,8 @@ print_entries: fprintf(fp, "%.10s end\n", graph_dotted_line); } } + + free(line); out: free(rem_sq_bracket); diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index fb584092eb88..7ded71d19d75 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -67,6 +67,7 @@ static int perf_event__exit_del_thread(struct perf_tool *tool __maybe_unused, struct perf_tool build_id__mark_dso_hit_ops = { .sample = build_id__mark_dso_hit, .mmap = perf_event__process_mmap, + .mmap2 = perf_event__process_mmap2, .fork = perf_event__process_fork, .exit = perf_event__exit_del_thread, .attr = perf_event__process_attr, diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 8d51f21107aa..9b393e7dca6f 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -11,6 +11,7 @@ static const char *perf_event__names[] = { [0] = "TOTAL", [PERF_RECORD_MMAP] = "MMAP", + [PERF_RECORD_MMAP2] = "MMAP2", [PERF_RECORD_LOST] = "LOST", [PERF_RECORD_COMM] = "COMM", [PERF_RECORD_EXIT] = "EXIT", @@ -186,7 +187,7 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool, return -1; } - event->header.type = PERF_RECORD_MMAP; + event->header.type = PERF_RECORD_MMAP2; /* * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c */ @@ -197,7 +198,9 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool, char prot[5]; char execname[PATH_MAX]; char anonstr[] = "//anon"; + unsigned int ino; size_t size; + ssize_t n; if (fgets(bf, sizeof(bf), fp) == NULL) break; @@ -206,9 +209,16 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool, strcpy(execname, ""); /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ - sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %*x:%*x %*u %s\n", - &event->mmap.start, &event->mmap.len, prot, - &event->mmap.pgoff, execname); + n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %s\n", + &event->mmap2.start, &event->mmap2.len, prot, + &event->mmap2.pgoff, &event->mmap2.maj, + &event->mmap2.min, + &ino, execname); + + event->mmap2.ino = (u64)ino; + + if (n != 8) + continue; if (prot[2] != 'x') continue; @@ -217,15 +227,15 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool, strcpy(execname, anonstr); size = strlen(execname) + 1; - memcpy(event->mmap.filename, execname, size); + memcpy(event->mmap2.filename, execname, size); size = PERF_ALIGN(size, sizeof(u64)); - event->mmap.len -= event->mmap.start; - event->mmap.header.size = (sizeof(event->mmap) - - (sizeof(event->mmap.filename) - size)); - memset(event->mmap.filename + size, 0, machine->id_hdr_size); - event->mmap.header.size += machine->id_hdr_size; - event->mmap.pid = tgid; - event->mmap.tid = pid; + event->mmap2.len -= event->mmap.start; + event->mmap2.header.size = (sizeof(event->mmap2) - + (sizeof(event->mmap2.filename) - size)); + memset(event->mmap2.filename + size, 0, machine->id_hdr_size); + event->mmap2.header.size += machine->id_hdr_size; + event->mmap2.pid = tgid; + event->mmap2.tid = pid; if (process(tool, event, &synth_sample, machine) != 0) { rc = -1; @@ -527,6 +537,17 @@ size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp) event->mmap.len, event->mmap.pgoff, event->mmap.filename); } +size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp) +{ + return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 + " %02x:%02x %"PRIu64" %"PRIu64"]: %s\n", + event->mmap2.pid, event->mmap2.tid, event->mmap2.start, + event->mmap2.len, event->mmap2.pgoff, event->mmap2.maj, + event->mmap2.min, event->mmap2.ino, + event->mmap2.ino_generation, + event->mmap2.filename); +} + int perf_event__process_mmap(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample __maybe_unused, @@ -535,6 +556,14 @@ int perf_event__process_mmap(struct perf_tool *tool __maybe_unused, return machine__process_mmap_event(machine, event); } +int perf_event__process_mmap2(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine) +{ + return machine__process_mmap2_event(machine, event); +} + size_t perf_event__fprintf_task(union perf_event *event, FILE *fp) { return fprintf(fp, "(%d:%d):(%d:%d)\n", @@ -574,6 +603,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp) case PERF_RECORD_MMAP: ret += perf_event__fprintf_mmap(event, fp); break; + case PERF_RECORD_MMAP2: + ret += perf_event__fprintf_mmap2(event, fp); + break; default: ret += fprintf(fp, "\n"); } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 93130d856bf0..c67ecc457d29 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -17,6 +17,19 @@ struct mmap_event { char filename[PATH_MAX]; }; +struct mmap2_event { + struct perf_event_header header; + u32 pid, tid; + u64 start; + u64 len; + u64 pgoff; + u32 maj; + u32 min; + u64 ino; + u64 ino_generation; + char filename[PATH_MAX]; +}; + struct comm_event { struct perf_event_header header; u32 pid, tid; @@ -159,6 +172,7 @@ struct tracing_data_event { union perf_event { struct perf_event_header header; struct mmap_event mmap; + struct mmap2_event mmap2; struct comm_event comm; struct fork_event fork; struct lost_event lost; @@ -208,6 +222,10 @@ int perf_event__process_mmap(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); +int perf_event__process_mmap2(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); int perf_event__process_fork(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -238,6 +256,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp); size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp); size_t perf_event__fprintf_task(union perf_event *event, FILE *fp); size_t perf_event__fprintf(union perf_event *event, FILE *fp); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index b8727ae45e3b..f9f77bee0b1b 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -64,6 +64,16 @@ void perf_evlist__set_id_pos(struct perf_evlist *evlist) evlist->is_pos = first->is_pos; } +static void perf_evlist__update_id_pos(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + list_for_each_entry(evsel, &evlist->entries, node) + perf_evsel__calc_id_pos(evsel); + + perf_evlist__set_id_pos(evlist); +} + static void perf_evlist__purge(struct perf_evlist *evlist) { struct perf_evsel *pos, *n; @@ -446,20 +456,25 @@ static int perf_evlist__event2id(struct perf_evlist *evlist, static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, union perf_event *event) { + struct perf_evsel *first = perf_evlist__first(evlist); struct hlist_head *head; struct perf_sample_id *sid; int hash; u64 id; if (evlist->nr_entries == 1) - return perf_evlist__first(evlist); + return first; + + if (!first->attr.sample_id_all && + event->header.type != PERF_RECORD_SAMPLE) + return first; if (perf_evlist__event2id(evlist, event, &id)) return NULL; /* Synthesized events have an id of zero */ if (!id) - return perf_evlist__first(evlist); + return first; hash = hash_64(id, PERF_EVLIST__HLIST_BITS); head = &evlist->heads[hash]; @@ -915,6 +930,8 @@ int perf_evlist__open(struct perf_evlist *evlist) struct perf_evsel *evsel; int err; + perf_evlist__update_id_pos(evlist); + list_for_each_entry(evsel, &evlist->entries, node) { err = perf_evsel__open(evsel, evlist->cpus, evlist->threads); if (err < 0) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 3612183e2cc5..0ce9febf1ba0 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -27,6 +27,7 @@ static struct { bool sample_id_all; bool exclude_guest; + bool mmap2; } perf_missing_features; #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) @@ -676,8 +677,9 @@ void perf_evsel__config(struct perf_evsel *evsel, if (opts->sample_weight) attr->sample_type |= PERF_SAMPLE_WEIGHT; - attr->mmap = track; - attr->comm = track; + attr->mmap = track; + attr->mmap2 = track && !perf_missing_features.mmap2; + attr->comm = track; /* * XXX see the function comment above @@ -1016,6 +1018,8 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, } fallback_missing_features: + if (perf_missing_features.mmap2) + evsel->attr.mmap2 = 0; if (perf_missing_features.exclude_guest) evsel->attr.exclude_guest = evsel->attr.exclude_host = 0; retry_sample_id: @@ -1080,8 +1084,11 @@ try_fallback: if (err != -EINVAL || cpu > 0 || thread > 0) goto out_close; - if (!perf_missing_features.exclude_guest && - (evsel->attr.exclude_guest || evsel->attr.exclude_host)) { + if (!perf_missing_features.mmap2 && evsel->attr.mmap2) { + perf_missing_features.mmap2 = true; + goto fallback_missing_features; + } else if (!perf_missing_features.exclude_guest && + (evsel->attr.exclude_guest || evsel->attr.exclude_host)) { perf_missing_features.exclude_guest = true; goto fallback_missing_features; } else if (!perf_missing_features.sample_id_all) { @@ -1925,6 +1932,7 @@ int perf_evsel__fprintf(struct perf_evsel *evsel, if_print(exclude_hv); if_print(exclude_idle); if_print(mmap); + if_print(mmap2); if_print(comm); if_print(freq); if_print(inherit_stat); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index a33197a4fd21..26441d0e571b 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1351,6 +1351,9 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) fprintf(fp, ", precise_ip = %d", evsel->attr.precise_ip); + fprintf(fp, ", attr_mmap2 = %d", evsel->attr.mmap2); + fprintf(fp, ", attr_mmap = %d", evsel->attr.mmap); + fprintf(fp, ", attr_mmap_data = %d", evsel->attr.mmap_data); if (evsel->ids) { fprintf(fp, ", id = {"); for (j = 0, id = evsel->id; j < evsel->ids; j++, id++) { diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 1dca61f0512d..933d14f287ca 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -997,6 +997,54 @@ out_problem: return -1; } +int machine__process_mmap2_event(struct machine *machine, + union perf_event *event) +{ + u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + struct thread *thread; + struct map *map; + enum map_type type; + int ret = 0; + + if (dump_trace) + perf_event__fprintf_mmap2(event, stdout); + + if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL || + cpumode == PERF_RECORD_MISC_KERNEL) { + ret = machine__process_kernel_mmap_event(machine, event); + if (ret < 0) + goto out_problem; + return 0; + } + + thread = machine__findnew_thread(machine, event->mmap2.pid, + event->mmap2.pid); + if (thread == NULL) + goto out_problem; + + if (event->header.misc & PERF_RECORD_MISC_MMAP_DATA) + type = MAP__VARIABLE; + else + type = MAP__FUNCTION; + + map = map__new(&machine->user_dsos, event->mmap2.start, + event->mmap2.len, event->mmap2.pgoff, + event->mmap2.pid, event->mmap2.maj, + event->mmap2.min, event->mmap2.ino, + event->mmap2.ino_generation, + event->mmap2.filename, type); + + if (map == NULL) + goto out_problem; + + thread__insert_map(thread, map); + return 0; + +out_problem: + dump_printf("problem processing PERF_RECORD_MMAP2, skipping event.\n"); + return 0; +} + int machine__process_mmap_event(struct machine *machine, union perf_event *event) { u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; @@ -1028,7 +1076,8 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event map = map__new(&machine->user_dsos, event->mmap.start, event->mmap.len, event->mmap.pgoff, - event->mmap.pid, event->mmap.filename, + event->mmap.pid, 0, 0, 0, 0, + event->mmap.filename, type); if (map == NULL) @@ -1101,6 +1150,8 @@ int machine__process_event(struct machine *machine, union perf_event *event) ret = machine__process_comm_event(machine, event); break; case PERF_RECORD_MMAP: ret = machine__process_mmap_event(machine, event); break; + case PERF_RECORD_MMAP2: + ret = machine__process_mmap2_event(machine, event); break; case PERF_RECORD_FORK: ret = machine__process_fork_event(machine, event); break; case PERF_RECORD_EXIT: diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 0df925ba6a44..58a6be1fc739 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -45,6 +45,7 @@ int machine__process_exit_event(struct machine *machine, union perf_event *event int machine__process_fork_event(struct machine *machine, union perf_event *event); int machine__process_lost_event(struct machine *machine, union perf_event *event); int machine__process_mmap_event(struct machine *machine, union perf_event *event); +int machine__process_mmap2_event(struct machine *machine, union perf_event *event); int machine__process_event(struct machine *machine, union perf_event *event); typedef void (*machine__process_t)(struct machine *machine, void *data); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 9e8304ca343e..4f6680d2043b 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -48,7 +48,8 @@ void map__init(struct map *map, enum map_type type, } struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, - u64 pgoff, u32 pid, char *filename, + u64 pgoff, u32 pid, u32 d_maj, u32 d_min, u64 ino, + u64 ino_gen, char *filename, enum map_type type) { struct map *map = malloc(sizeof(*map)); @@ -62,6 +63,11 @@ struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, vdso = is_vdso_map(filename); no_dso = is_no_dso_memory(filename); + map->maj = d_maj; + map->min = d_min; + map->ino = ino; + map->ino_generation = ino_gen; + if (anon) { snprintf(newfilename, sizeof(newfilename), "/tmp/perf-%d.map", pid); filename = newfilename; diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 2cc93cbf0e17..4886ca280536 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -36,6 +36,9 @@ struct map { bool erange_warned; u32 priv; u64 pgoff; + u32 maj, min; /* only valid for MMAP2 record */ + u64 ino; /* only valid for MMAP2 record */ + u64 ino_generation;/* only valid for MMAP2 record */ /* ip -> dso rip */ u64 (*map_ip)(struct map *, u64); @@ -88,8 +91,9 @@ typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym); void map__init(struct map *map, enum map_type type, u64 start, u64 end, u64 pgoff, struct dso *dso); struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, - u64 pgoff, u32 pid, char *filename, - enum map_type type); + u64 pgoff, u32 pid, u32 d_maj, u32 d_min, u64 ino, + u64 ino_gen, + char *filename, enum map_type type); struct map *map__new2(u64 start, struct dso *dso, enum map_type type); void map__delete(struct map *map); struct map *map__clone(struct map *map); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 1fc0c628683e..51f5edf2a6d0 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -351,6 +351,25 @@ static void perf_event__mmap_swap(union perf_event *event, } } +static void perf_event__mmap2_swap(union perf_event *event, + bool sample_id_all) +{ + event->mmap2.pid = bswap_32(event->mmap2.pid); + event->mmap2.tid = bswap_32(event->mmap2.tid); + event->mmap2.start = bswap_64(event->mmap2.start); + event->mmap2.len = bswap_64(event->mmap2.len); + event->mmap2.pgoff = bswap_64(event->mmap2.pgoff); + event->mmap2.maj = bswap_32(event->mmap2.maj); + event->mmap2.min = bswap_32(event->mmap2.min); + event->mmap2.ino = bswap_64(event->mmap2.ino); + + if (sample_id_all) { + void *data = &event->mmap2.filename; + + data += PERF_ALIGN(strlen(data) + 1, sizeof(u64)); + swap_sample_id_all(event, data); + } +} static void perf_event__task_swap(union perf_event *event, bool sample_id_all) { event->fork.pid = bswap_32(event->fork.pid); @@ -455,6 +474,7 @@ typedef void (*perf_event__swap_op)(union perf_event *event, static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_MMAP] = perf_event__mmap_swap, + [PERF_RECORD_MMAP2] = perf_event__mmap2_swap, [PERF_RECORD_COMM] = perf_event__comm_swap, [PERF_RECORD_FORK] = perf_event__task_swap, [PERF_RECORD_EXIT] = perf_event__task_swap, @@ -504,6 +524,7 @@ static int flush_sample_queue(struct perf_session *s, u64 limit = os->next_flush; u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL; unsigned idx = 0, progress_next = os->nr_samples / 16; + bool show_progress = limit == ULLONG_MAX; int ret; if (!tool->ordered_samples || !limit) @@ -526,7 +547,7 @@ static int flush_sample_queue(struct perf_session *s, os->last_flush = iter->timestamp; list_del(&iter->list); list_add(&iter->list, &os->sample_cache); - if (++idx >= progress_next) { + if (show_progress && (++idx >= progress_next)) { progress_next += os->nr_samples / 16; ui_progress__update(idx, os->nr_samples, "Processing time ordered events..."); @@ -850,7 +871,8 @@ static struct machine * (cpumode == PERF_RECORD_MISC_GUEST_USER))) { u32 pid; - if (event->header.type == PERF_RECORD_MMAP) + if (event->header.type == PERF_RECORD_MMAP + || event->header.type == PERF_RECORD_MMAP2) pid = event->mmap.pid; else pid = sample->pid; @@ -977,6 +999,8 @@ static int perf_session_deliver_event(struct perf_session *session, sample, evsel, machine); case PERF_RECORD_MMAP: return tool->mmap(tool, event, sample, machine); + case PERF_RECORD_MMAP2: + return tool->mmap2(tool, event, sample, machine); case PERF_RECORD_COMM: return tool->comm(tool, event, sample, machine); case PERF_RECORD_FORK: @@ -1619,52 +1643,26 @@ int __perf_session__set_tracepoints_handlers(struct perf_session *session, const struct perf_evsel_str_handler *assocs, size_t nr_assocs) { - struct perf_evlist *evlist = session->evlist; - struct event_format *format; struct perf_evsel *evsel; - char *tracepoint, *name; size_t i; int err; for (i = 0; i < nr_assocs; i++) { - err = -ENOMEM; - tracepoint = strdup(assocs[i].name); - if (tracepoint == NULL) - goto out; - - err = -ENOENT; - name = strchr(tracepoint, ':'); - if (name == NULL) - goto out_free; - - *name++ = '\0'; - format = pevent_find_event_by_name(session->pevent, - tracepoint, name); - if (format == NULL) { - /* - * Adding a handler for an event not in the session, - * just ignore it. - */ - goto next; - } - - evsel = perf_evlist__find_tracepoint_by_id(evlist, format->id); + /* + * Adding a handler for an event not in the session, + * just ignore it. + */ + evsel = perf_evlist__find_tracepoint_by_name(session->evlist, assocs[i].name); if (evsel == NULL) - goto next; + continue; err = -EEXIST; if (evsel->handler.func != NULL) - goto out_free; + goto out; evsel->handler.func = assocs[i].handler; -next: - free(tracepoint); } err = 0; out: return err; - -out_free: - free(tracepoint); - goto out; } diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 62b16b6165ba..4385816d3d49 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -29,6 +29,7 @@ struct perf_tool { event_sample sample, read; event_op mmap, + mmap2, comm, fork, exit, diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index ea475cd03511..8a39dda7a325 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -101,8 +101,11 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) typeof(*work), queue); cancel_work_sync(&work->work); list_del(&work->queue); - if (!work->done) /* work was canceled */ + if (!work->done) { /* work was canceled */ + mmdrop(work->mm); + kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ kmem_cache_free(async_pf_cache, work); + } } spin_lock(&vcpu->async_pf.lock); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index bf040c4e02b3..979bff485fb0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1058,11 +1058,15 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) EXPORT_SYMBOL_GPL(gfn_to_hva); /* - * The hva returned by this function is only allowed to be read. - * It should pair with kvm_read_hva() or kvm_read_hva_atomic(). + * If writable is set to false, the hva returned by this function is only + * allowed to be read. */ -static unsigned long gfn_to_hva_read(struct kvm *kvm, gfn_t gfn) +unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) { + struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); + if (writable) + *writable = !memslot_is_readonly(slot); + return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false); } @@ -1430,7 +1434,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, int r; unsigned long addr; - addr = gfn_to_hva_read(kvm, gfn); + addr = gfn_to_hva_prot(kvm, gfn, NULL); if (kvm_is_error_hva(addr)) return -EFAULT; r = kvm_read_hva(data, (void __user *)addr + offset, len); @@ -1468,7 +1472,7 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, gfn_t gfn = gpa >> PAGE_SHIFT; int offset = offset_in_page(gpa); - addr = gfn_to_hva_read(kvm, gfn); + addr = gfn_to_hva_prot(kvm, gfn, NULL); if (kvm_is_error_hva(addr)) return -EFAULT; pagefault_disable(); |