From 9c44bc03fff44ff04237a7d92e35304a0e50c331 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:21:04 +0200 Subject: softlockup: allow panic on lockup allow users to configure the softlockup detector to generate a panic instead of a warning message. high-availability systems might opt for this strict method (combined with panic_timeout= boot option/sysctl), instead of generating softlockup warnings ad infinitum. also, automated tests work better if the system reboots reliably (into a safe kernel) in case of a lockup. The full spectrum of configurability is supported: boot option, sysctl option and Kconfig option. it's default-disabled. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- Documentation/kernel-parameters.txt | 3 +++ 1 file changed, 3 insertions(+) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index e07c432c731f..042588fa12e5 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1971,6 +1971,9 @@ and is between 256 and 4096 characters. It is defined in the file snd-ymfpci= [HW,ALSA] + softlockup_panic= + [KNL] Should the soft-lockup detector generate panics. + sonypi.*= [HW] Sony Programmable I/O Control Device driver See Documentation/sonypi.txt -- cgit v1.2.3 From 2dff41775decb9cb1f3a6ac577c78dc3eea60431 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Fri, 11 Jul 2008 17:31:15 -0500 Subject: powerpc: Document Freescale power management nodes, and the sleep property. Signed-off-by: Scott Wood Signed-off-by: Kumar Gala --- Documentation/powerpc/booting-without-of.txt | 142 +++++++++++++++++-------- Documentation/powerpc/dts-bindings/fsl/pmc.txt | 63 +++++++++++ 2 files changed, 163 insertions(+), 42 deletions(-) create mode 100644 Documentation/powerpc/dts-bindings/fsl/pmc.txt (limited to 'Documentation') diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt index de2e5c05d6e7..07ae2edff7c4 100644 --- a/Documentation/powerpc/booting-without-of.txt +++ b/Documentation/powerpc/booting-without-of.txt @@ -77,10 +77,12 @@ Table of Contents 3) OpenPIC Interrupt Controllers 4) ISA Interrupt Controllers - VIII - Specifying GPIO information for devices + IX - Specifying GPIO information for devices 1) gpios property 2) gpio-controller nodes + X - Specifying device power management information (sleep property) + Appendix A - Sample SOC node for MPC8540 @@ -2422,8 +2424,8 @@ encodings listed below: 2 = high to low edge sensitive type enabled 3 = low to high edge sensitive type enabled -VIII - Specifying GPIO information for devices -============================================== +IX - Specifying GPIO information for devices +============================================ 1) gpios property ----------------- @@ -2471,6 +2473,37 @@ Example of two SOC GPIO banks defined as gpio-controller nodes: gpio-controller; }; +X - Specifying Device Power Management Information (sleep property) +=================================================================== + +Devices on SOCs often have mechanisms for placing devices into low-power +states that are decoupled from the devices' own register blocks. Sometimes, +this information is more complicated than a cell-index property can +reasonably describe. Thus, each device controlled in such a manner +may contain a "sleep" property which describes these connections. + +The sleep property consists of one or more sleep resources, each of +which consists of a phandle to a sleep controller, followed by a +controller-specific sleep specifier of zero or more cells. + +The semantics of what type of low power modes are possible are defined +by the sleep controller. Some examples of the types of low power modes +that may be supported are: + + - Dynamic: The device may be disabled or enabled at any time. + - System Suspend: The device may request to be disabled or remain + awake during system suspend, but will not be disabled until then. + - Permanent: The device is disabled permanently (until the next hard + reset). + +Some devices may share a clock domain with each other, such that they should +only be suspended when none of the devices are in use. Where reasonable, +such nodes should be placed on a virtual bus, where the bus has the sleep +property. If the clock domain is shared among devices that cannot be +reasonably grouped in this manner, then create a virtual sleep controller +(similar to an interrupt nexus, except that defining a standardized +sleep-map should wait until its necessity is demonstrated). + Appendix A - Sample SOC node for MPC8540 ======================================== @@ -2487,47 +2520,48 @@ not necessary as they are usually the same as the root node. reg = ; bus-frequency = <0>; - mdio@24520 { - reg = <24520 20>; - device_type = "mdio"; - compatible = "gianfar"; - - ethernet-phy@0 { - linux,phandle = <2452000> - interrupt-parent = <40000>; - interrupts = <35 1>; - reg = <0>; - device_type = "ethernet-phy"; - }; - - ethernet-phy@1 { - linux,phandle = <2452001> - interrupt-parent = <40000>; - interrupts = <35 1>; - reg = <1>; - device_type = "ethernet-phy"; - }; - - ethernet-phy@3 { - linux,phandle = <2452002> - interrupt-parent = <40000>; - interrupts = <35 1>; - reg = <3>; - device_type = "ethernet-phy"; - }; - - }; - ethernet@24000 { - #size-cells = <0>; + #address-cells = <1>; + #size-cells = <1>; device_type = "network"; model = "TSEC"; - compatible = "gianfar"; + compatible = "gianfar", "simple-bus"; reg = <24000 1000>; mac-address = [ 00 E0 0C 00 73 00 ]; interrupts = ; interrupt-parent = <40000>; phy-handle = <2452000>; + sleep = <&pmc 00000080>; + ranges; + + mdio@24520 { + reg = <24520 20>; + compatible = "fsl,gianfar-mdio"; + + ethernet-phy@0 { + linux,phandle = <2452000> + interrupt-parent = <40000>; + interrupts = <35 1>; + reg = <0>; + device_type = "ethernet-phy"; + }; + + ethernet-phy@1 { + linux,phandle = <2452001> + interrupt-parent = <40000>; + interrupts = <35 1>; + reg = <1>; + device_type = "ethernet-phy"; + }; + + ethernet-phy@3 { + linux,phandle = <2452002> + interrupt-parent = <40000>; + interrupts = <35 1>; + reg = <3>; + device_type = "ethernet-phy"; + }; + }; }; ethernet@25000 { @@ -2541,6 +2575,7 @@ not necessary as they are usually the same as the root node. interrupts = <13 3 14 3 18 3>; interrupt-parent = <40000>; phy-handle = <2452001>; + sleep = <&pmc 00000040>; }; ethernet@26000 { @@ -2554,15 +2589,33 @@ not necessary as they are usually the same as the root node. interrupts = <19 3>; interrupt-parent = <40000>; phy-handle = <2452002>; + sleep = <&pmc 00000020>; }; serial@4500 { - device_type = "serial"; - compatible = "ns16550"; - reg = <4500 100>; - clock-frequency = <0>; - interrupts = <1a 3>; - interrupt-parent = <40000>; + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,mpc8540-duart", "simple-bus"; + sleep = <&pmc 00000002>; + ranges; + + serial@4500 { + device_type = "serial"; + compatible = "ns16550"; + reg = <4500 100>; + clock-frequency = <0>; + interrupts = <1a 3>; + interrupt-parent = <40000>; + }; + + serial@4600 { + device_type = "serial"; + compatible = "ns16550"; + reg = <4600 100>; + clock-frequency = <0>; + interrupts = <1a 3>; + interrupt-parent = <40000>; + }; }; pic@40000 { @@ -2581,6 +2634,11 @@ not necessary as they are usually the same as the root node. device_type = "i2c"; compatible = "fsl-i2c"; dfsrr; + sleep = <&pmc 00000004>; }; + pmc: power@e0070 { + compatible = "fsl,mpc8540-pmc", "fsl,mpc8548-pmc"; + reg = ; + }; }; diff --git a/Documentation/powerpc/dts-bindings/fsl/pmc.txt b/Documentation/powerpc/dts-bindings/fsl/pmc.txt new file mode 100644 index 000000000000..02f6f43ee1b7 --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/pmc.txt @@ -0,0 +1,63 @@ +* Power Management Controller + +Properties: +- compatible: "fsl,-pmc". + + "fsl,mpc8349-pmc" should be listed for any chip whose PMC is + compatible. "fsl,mpc8313-pmc" should also be listed for any chip + whose PMC is compatible, and implies deep-sleep capability. + + "fsl,mpc8548-pmc" should be listed for any chip whose PMC is + compatible. "fsl,mpc8536-pmc" should also be listed for any chip + whose PMC is compatible, and implies deep-sleep capability. + + "fsl,mpc8641d-pmc" should be listed for any chip whose PMC is + compatible; all statements below that apply to "fsl,mpc8548-pmc" also + apply to "fsl,mpc8641d-pmc". + + Compatibility does not include bit assigments in SCCR/PMCDR/DEVDISR; these + bit assigments are indicated via the sleep specifier in each device's + sleep property. + +- reg: For devices compatible with "fsl,mpc8349-pmc", the first resource + is the PMC block, and the second resource is the Clock Configuration + block. + + For devices compatible with "fsl,mpc8548-pmc", the first resource + is a 32-byte block beginning with DEVDISR. + +- interrupts: For "fsl,mpc8349-pmc"-compatible devices, the first + resource is the PMC block interrupt. + +- fsl,mpc8313-wakeup-timer: For "fsl,mpc8313-pmc"-compatible devices, + this is a phandle to an "fsl,gtm" node on which timer 4 can be used as + a wakeup source from deep sleep. + +Sleep specifiers: + + fsl,mpc8349-pmc: Sleep specifiers consist of one cell. For each bit + that is set in the cell, the corresponding bit in SCCR will be saved + and cleared on suspend, and restored on resume. This sleep controller + supports disabling and resuming devices at any time. + + fsl,mpc8536-pmc: Sleep specifiers consist of three cells, the third of + which will be ORed into PMCDR upon suspend, and cleared from PMCDR + upon resume. The first two cells are as described for fsl,mpc8578-pmc. + This sleep controller only supports disabling devices during system + sleep, or permanently. + + fsl,mpc8548-pmc: Sleep specifiers consist of one or two cells, the + first of which will be ORed into DEVDISR (and the second into + DEVDISR2, if present -- this cell should be zero or absent if the + hardware does not have DEVDISR2) upon a request for permanent device + disabling. This sleep controller does not support configuring devices + to disable during system sleep (unless supported by another compatible + match), or dynamically. + +Example: + + power@b00 { + compatible = "fsl,mpc8313-pmc", "fsl,mpc8349-pmc"; + reg = <0xb00 0x100 0xa00 0x100>; + interrupts = <80 8>; + }; -- cgit v1.2.3 From 7e72063c9aaeb618815589cd4d57f26186e6fcad Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Wed, 25 Jun 2008 12:07:39 -0500 Subject: powerpc: Update example SOC node in booting-without-of.txt. Convert to DTS version 1, eliminate some obsolete practices, and correct some errors (compared to the actual 8540 device tree). Signed-off-by: Scott Wood Signed-off-by: Kumar Gala --- Documentation/powerpc/booting-without-of.txt | 87 +++++++++++----------------- 1 file changed, 33 insertions(+), 54 deletions(-) (limited to 'Documentation') diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt index 07ae2edff7c4..ddafbf2fb1f3 100644 --- a/Documentation/powerpc/booting-without-of.txt +++ b/Documentation/powerpc/booting-without-of.txt @@ -2507,18 +2507,14 @@ sleep-map should wait until its necessity is demonstrated). Appendix A - Sample SOC node for MPC8540 ======================================== -Note that the #address-cells and #size-cells for the SoC node -in this example have been explicitly listed; these are likely -not necessary as they are usually the same as the root node. - - soc8540@e0000000 { + soc@e0000000 { #address-cells = <1>; #size-cells = <1>; - #interrupt-cells = <2>; + compatible = "fsl,mpc8540-ccsr", "simple-bus"; device_type = "soc"; - ranges = <00000000 e0000000 00100000> - reg = ; + ranges = <0x00000000 0xe0000000 0x00100000> bus-frequency = <0>; + interrupt-parent = <&pic>; ethernet@24000 { #address-cells = <1>; @@ -2526,38 +2522,31 @@ not necessary as they are usually the same as the root node. device_type = "network"; model = "TSEC"; compatible = "gianfar", "simple-bus"; - reg = <24000 1000>; - mac-address = [ 00 E0 0C 00 73 00 ]; - interrupts = ; - interrupt-parent = <40000>; - phy-handle = <2452000>; + reg = <0x24000 0x1000>; + local-mac-address = [ 00 E0 0C 00 73 00 ]; + interrupts = <29 2 30 2 34 2>; + phy-handle = <&phy0>; sleep = <&pmc 00000080>; ranges; mdio@24520 { - reg = <24520 20>; + reg = <0x24520 0x20>; compatible = "fsl,gianfar-mdio"; - ethernet-phy@0 { - linux,phandle = <2452000> - interrupt-parent = <40000>; - interrupts = <35 1>; + phy0: ethernet-phy@0 { + interrupts = <5 1>; reg = <0>; device_type = "ethernet-phy"; }; - ethernet-phy@1 { - linux,phandle = <2452001> - interrupt-parent = <40000>; - interrupts = <35 1>; + phy1: ethernet-phy@1 { + interrupts = <5 1>; reg = <1>; device_type = "ethernet-phy"; }; - ethernet-phy@3 { - linux,phandle = <2452002> - interrupt-parent = <40000>; - interrupts = <35 1>; + phy3: ethernet-phy@3 { + interrupts = <7 1>; reg = <3>; device_type = "ethernet-phy"; }; @@ -2565,30 +2554,24 @@ not necessary as they are usually the same as the root node. }; ethernet@25000 { - #address-cells = <1>; - #size-cells = <0>; device_type = "network"; model = "TSEC"; compatible = "gianfar"; - reg = <25000 1000>; - mac-address = [ 00 E0 0C 00 73 01 ]; - interrupts = <13 3 14 3 18 3>; - interrupt-parent = <40000>; - phy-handle = <2452001>; + reg = <0x25000 0x1000>; + local-mac-address = [ 00 E0 0C 00 73 01 ]; + interrupts = <13 2 14 2 18 2>; + phy-handle = <&phy1>; sleep = <&pmc 00000040>; }; ethernet@26000 { - #address-cells = <1>; - #size-cells = <0>; device_type = "network"; model = "FEC"; compatible = "gianfar"; - reg = <26000 1000>; - mac-address = [ 00 E0 0C 00 73 02 ]; - interrupts = <19 3>; - interrupt-parent = <40000>; - phy-handle = <2452002>; + reg = <0x26000 0x1000>; + local-mac-address = [ 00 E0 0C 00 73 02 ]; + interrupts = <41 2>; + phy-handle = <&phy3>; sleep = <&pmc 00000020>; }; @@ -2602,36 +2585,32 @@ not necessary as they are usually the same as the root node. serial@4500 { device_type = "serial"; compatible = "ns16550"; - reg = <4500 100>; + reg = <0x4500 0x100>; clock-frequency = <0>; - interrupts = <1a 3>; - interrupt-parent = <40000>; + interrupts = <42 2>; }; serial@4600 { device_type = "serial"; compatible = "ns16550"; - reg = <4600 100>; + reg = <0x4600 0x100>; clock-frequency = <0>; - interrupts = <1a 3>; - interrupt-parent = <40000>; + interrupts = <42 2>; }; }; - pic@40000 { - linux,phandle = <40000>; + pic: pic@40000 { interrupt-controller; #address-cells = <0>; - reg = <40000 40000>; + #interrupt-cells = <2>; + reg = <0x40000 0x40000>; compatible = "chrp,open-pic"; device_type = "open-pic"; }; i2c@3000 { - interrupt-parent = <40000>; - interrupts = <1b 3>; - reg = <3000 18>; - device_type = "i2c"; + interrupts = <43 2>; + reg = <0x3000 0x100>; compatible = "fsl-i2c"; dfsrr; sleep = <&pmc 00000004>; @@ -2639,6 +2618,6 @@ not necessary as they are usually the same as the root node. pmc: power@e0070 { compatible = "fsl,mpc8540-pmc", "fsl,mpc8548-pmc"; - reg = ; + reg = <0xe0070 0x20>; }; }; -- cgit v1.2.3 From 89ae5b2b9357cd715ab25df50e5fa56ae30aaf80 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Fri, 4 Jul 2008 20:53:28 +0400 Subject: powerpc: add FHCI USB, FSL MCU, FSL UPM and GPIO LEDs bindings This patch adds few bindings for the new drivers to be submitted through the appropriate maintainers. Signed-off-by: Anton Vorontsov Signed-off-by: Kumar Gala --- .../powerpc/dts-bindings/fsl/cpm_qe/qe/usb.txt | 53 ++++++++++++++-------- .../powerpc/dts-bindings/fsl/mcu-mpc8349emitx.txt | 17 +++++++ .../powerpc/dts-bindings/fsl/upm-nand.txt | 28 ++++++++++++ Documentation/powerpc/dts-bindings/gpio/led.txt | 15 ++++++ 4 files changed, 94 insertions(+), 19 deletions(-) create mode 100644 Documentation/powerpc/dts-bindings/fsl/mcu-mpc8349emitx.txt create mode 100644 Documentation/powerpc/dts-bindings/fsl/upm-nand.txt create mode 100644 Documentation/powerpc/dts-bindings/gpio/led.txt (limited to 'Documentation') diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe/usb.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe/usb.txt index c8f44d6bcbcf..9ccd5f30405b 100644 --- a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe/usb.txt +++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe/usb.txt @@ -1,22 +1,37 @@ -* USB (Universal Serial Bus Controller) +Freescale QUICC Engine USB Controller Required properties: -- compatible : could be "qe_udc" or "fhci-hcd". -- mode : the could be "host" or "slave". -- reg : Offset and length of the register set for the device -- interrupts : where a is the interrupt number and b is a - field that represents an encoding of the sense and level - information for the interrupt. This should be encoded based on - the information in section 2) depending on the type of interrupt - controller you have. -- interrupt-parent : the phandle for the interrupt controller that - services interrupts for this device. +- compatible : should be "fsl,-qe-usb", "fsl,mpc8323-qe-usb". +- reg : the first two cells should contain usb registers location and + length, the next two two cells should contain PRAM location and + length. +- interrupts : should contain USB interrupt. +- interrupt-parent : interrupt source phandle. +- fsl,fullspeed-clock : specifies the full speed USB clock source: + "none": clock source is disabled + "brg1" through "brg16": clock source is BRG1-BRG16, respectively + "clk1" through "clk24": clock source is CLK1-CLK24, respectively +- fsl,lowspeed-clock : specifies the low speed USB clock source: + "none": clock source is disabled + "brg1" through "brg16": clock source is BRG1-BRG16, respectively + "clk1" through "clk24": clock source is CLK1-CLK24, respectively +- hub-power-budget : USB power budget for the root hub, in mA. +- gpios : should specify GPIOs in this order: USBOE, USBTP, USBTN, USBRP, + USBRN, SPEED (optional), and POWER (optional). -Example(slave): - usb@6c0 { - compatible = "qe_udc"; - reg = <6c0 40>; - interrupts = <8b 0>; - interrupt-parent = <700>; - mode = "slave"; - }; +Example: + +usb@6c0 { + compatible = "fsl,mpc8360-qe-usb", "fsl,mpc8323-qe-usb"; + reg = <0x6c0 0x40 0x8b00 0x100>; + interrupts = <11>; + interrupt-parent = <&qeic>; + fsl,fullspeed-clock = "clk21"; + gpios = <&qe_pio_b 2 0 /* USBOE */ + &qe_pio_b 3 0 /* USBTP */ + &qe_pio_b 8 0 /* USBTN */ + &qe_pio_b 9 0 /* USBRP */ + &qe_pio_b 11 0 /* USBRN */ + &qe_pio_e 20 0 /* SPEED */ + &qe_pio_e 21 0 /* POWER */>; +}; diff --git a/Documentation/powerpc/dts-bindings/fsl/mcu-mpc8349emitx.txt b/Documentation/powerpc/dts-bindings/fsl/mcu-mpc8349emitx.txt new file mode 100644 index 000000000000..0f766333b6eb --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/mcu-mpc8349emitx.txt @@ -0,0 +1,17 @@ +Freescale MPC8349E-mITX-compatible Power Management Micro Controller Unit (MCU) + +Required properties: +- compatible : "fsl,-", "fsl,mcu-mpc8349emitx". +- reg : should specify I2C address (0x0a). +- #gpio-cells : should be 2. +- gpio-controller : should be present. + +Example: + +mcu@0a { + #gpio-cells = <2>; + compatible = "fsl,mc9s08qg8-mpc8349emitx", + "fsl,mcu-mpc8349emitx"; + reg = <0x0a>; + gpio-controller; +}; diff --git a/Documentation/powerpc/dts-bindings/fsl/upm-nand.txt b/Documentation/powerpc/dts-bindings/fsl/upm-nand.txt new file mode 100644 index 000000000000..84a04d5eb8e6 --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/upm-nand.txt @@ -0,0 +1,28 @@ +Freescale Localbus UPM programmed to work with NAND flash + +Required properties: +- compatible : "fsl,upm-nand". +- reg : should specify localbus chip select and size used for the chip. +- fsl,upm-addr-offset : UPM pattern offset for the address latch. +- fsl,upm-cmd-offset : UPM pattern offset for the command latch. +- gpios : may specify optional GPIO connected to the Ready-Not-Busy pin. + +Example: + +upm@1,0 { + compatible = "fsl,upm-nand"; + reg = <1 0 1>; + fsl,upm-addr-offset = <16>; + fsl,upm-cmd-offset = <8>; + gpios = <&qe_pio_e 18 0>; + + flash { + #address-cells = <1>; + #size-cells = <1>; + compatible = "..."; + + partition@0 { + ... + }; + }; +}; diff --git a/Documentation/powerpc/dts-bindings/gpio/led.txt b/Documentation/powerpc/dts-bindings/gpio/led.txt new file mode 100644 index 000000000000..ff51f4c0fa9d --- /dev/null +++ b/Documentation/powerpc/dts-bindings/gpio/led.txt @@ -0,0 +1,15 @@ +LED connected to GPIO + +Required properties: +- compatible : should be "gpio-led". +- label : (optional) the label for this LED. If omitted, the label is + taken from the node name (excluding the unit address). +- gpios : should specify LED GPIO. + +Example: + +led@0 { + compatible = "gpio-led"; + label = "hdd"; + gpios = <&mcu_pio 0 1>; +}; -- cgit v1.2.3 From ade254d7f05cb74ab0a951ab105bc9ae872f045e Mon Sep 17 00:00:00 2001 From: Jochen Friedrich Date: Sat, 5 Jul 2008 13:29:28 +0200 Subject: powerpc: Add documentation for CPM GPIO banks Signed-off-by: Jochen Friedrich Signed-off-by: Kumar Gala --- .../powerpc/dts-bindings/fsl/cpm_qe/gpio.txt | 38 ++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 Documentation/powerpc/dts-bindings/fsl/cpm_qe/gpio.txt (limited to 'Documentation') diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/gpio.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/gpio.txt new file mode 100644 index 000000000000..1815dfede1bc --- /dev/null +++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/gpio.txt @@ -0,0 +1,38 @@ +Every GPIO controller node must have #gpio-cells property defined, +this information will be used to translate gpio-specifiers. + +On CPM1 devices, all ports are using slightly different register layouts. +Ports A, C and D are 16bit ports and Ports B and E are 32bit ports. + +On CPM2 devices, all ports are 32bit ports and use a common register layout. + +Required properties: +- compatible : "fsl,cpm1-pario-bank-a", "fsl,cpm1-pario-bank-b", + "fsl,cpm1-pario-bank-c", "fsl,cpm1-pario-bank-d", + "fsl,cpm1-pario-bank-e", "fsl,cpm2-pario-bank" +- #gpio-cells : Should be two. The first cell is the pin number and the + second cell is used to specify optional paramters (currently unused). +- gpio-controller : Marks the port as GPIO controller. + +Example of three SOC GPIO banks defined as gpio-controller nodes: + + CPM1_PIO_A: gpio-controller@950 { + #gpio-cells = <2>; + compatible = "fsl,cpm1-pario-bank-a"; + reg = <0x950 0x10>; + gpio-controller; + }; + + CPM1_PIO_B: gpio-controller@ab8 { + #gpio-cells = <2>; + compatible = "fsl,cpm1-pario-bank-b"; + reg = <0xab8 0x10>; + gpio-controller; + }; + + CPM1_PIO_E: gpio-controller@ac8 { + #gpio-cells = <2>; + compatible = "fsl,cpm1-pario-bank-e"; + reg = <0xac8 0x18>; + gpio-controller; + }; -- cgit v1.2.3 From 00262986ceeb5c3358b70491aa898906503a0fe7 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Fri, 11 Jul 2008 18:04:43 -0500 Subject: Add fsl,magic-packet to, and clean up, the gianfar binding. Signed-off-by: Scott Wood Signed-off-by: Kumar Gala --- Documentation/powerpc/dts-bindings/fsl/tsec.txt | 31 ++++++++++--------------- 1 file changed, 12 insertions(+), 19 deletions(-) (limited to 'Documentation') diff --git a/Documentation/powerpc/dts-bindings/fsl/tsec.txt b/Documentation/powerpc/dts-bindings/fsl/tsec.txt index 583ef6b56c43..cf55fa4112d2 100644 --- a/Documentation/powerpc/dts-bindings/fsl/tsec.txt +++ b/Documentation/powerpc/dts-bindings/fsl/tsec.txt @@ -24,46 +24,39 @@ Example: * Gianfar-compatible ethernet nodes -Required properties: +Properties: - device_type : Should be "network" - model : Model of the device. Can be "TSEC", "eTSEC", or "FEC" - compatible : Should be "gianfar" - reg : Offset and length of the register set for the device - - mac-address : List of bytes representing the ethernet address of + - local-mac-address : List of bytes representing the ethernet address of this controller - - interrupts : where a is the interrupt number and b is a - field that represents an encoding of the sense and level - information for the interrupt. This should be encoded based on - the information in section 2) depending on the type of interrupt - controller you have. - - interrupt-parent : the phandle for the interrupt controller that - services interrupts for this device. + - interrupts : For FEC devices, the first interrupt is the device's + interrupt. For TSEC and eTSEC devices, the first interrupt is + transmit, the second is receive, and the third is error. - phy-handle : The phandle for the PHY connected to this ethernet controller. - fixed-link : where a is emulated phy id - choose any, but unique to the all specified fixed-links, b is duplex - 0 half, 1 full, c is link speed - d#10/d#100/d#1000, d is pause - 0 no pause, 1 pause, e is asym_pause - 0 no asym_pause, 1 asym_pause. - -Recommended properties: - - phy-connection-type : a string naming the controller/PHY interface type, i.e., "mii" (default), "rmii", "gmii", "rgmii", "rgmii-id", "sgmii", "tbi", or "rtbi". This property is only really needed if the connection is of type "rgmii-id", as all other connection types are detected by hardware. - + - fsl,magic-packet : If present, indicates that the hardware supports + waking up via magic packet. Example: ethernet@24000 { - #size-cells = <0>; device_type = "network"; model = "TSEC"; compatible = "gianfar"; - reg = <24000 1000>; - mac-address = [ 00 E0 0C 00 73 00 ]; - interrupts = ; - interrupt-parent = <40000>; - phy-handle = <2452000> + reg = <0x24000 0x1000>; + local-mac-address = [ 00 E0 0C 00 73 00 ]; + interrupts = <29 2 30 2 34 2>; + interrupt-parent = <&mpic>; + phy-handle = <&phy0> }; -- cgit v1.2.3 From 0e74dc2646db04b644faa8ea10ff4f408d55cf90 Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Mon, 21 Jul 2008 09:15:51 -0300 Subject: ACPI: thinkpad-acpi: add bluetooth and WWAN rfkill support Add a read/write rfkill interface to the bluetooth radio switch on the bluetooth submodule, and one for the wireless wan radio switch to the wan submodule. Since rfkill does care for when a switch changes state, use WLSW notifications to also check if the WWAN or Bluetooth switches did not change state (due to them being slaves of WLSW in firmware/hardware, but that reality not being always properly exported by the thinkpad firmware). Signed-off-by: Henrique de Moraes Holschuh Cc: Ivo van Doorn Cc: John W. Linville --- Documentation/laptops/thinkpad-acpi.txt | 22 +++- drivers/misc/Kconfig | 2 + drivers/misc/thinkpad_acpi.c | 208 ++++++++++++++++++++++++++++---- 3 files changed, 200 insertions(+), 32 deletions(-) (limited to 'Documentation') diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt index 64b3f146e4b0..1c1c0217ebd1 100644 --- a/Documentation/laptops/thinkpad-acpi.txt +++ b/Documentation/laptops/thinkpad-acpi.txt @@ -621,7 +621,8 @@ Bluetooth --------- procfs: /proc/acpi/ibm/bluetooth -sysfs device attribute: bluetooth_enable +sysfs device attribute: bluetooth_enable (deprecated) +sysfs rfkill class: switch "tpacpi_bluetooth_sw" This feature shows the presence and current state of a ThinkPad Bluetooth device in the internal ThinkPad CDC slot. @@ -643,8 +644,12 @@ Sysfs notes: 0: disables Bluetooth / Bluetooth is disabled 1: enables Bluetooth / Bluetooth is enabled. - Note: this interface will be probably be superseded by the - generic rfkill class, so it is NOT to be considered stable yet. + Note: this interface has been superseded by the generic rfkill + class. It has been deprecated, and it will be removed in year + 2010. + + rfkill controller switch "tpacpi_bluetooth_sw": refer to + Documentation/rfkill.txt for details. Video output control -- /proc/acpi/ibm/video -------------------------------------------- @@ -1374,7 +1379,8 @@ EXPERIMENTAL: WAN ----------------- procfs: /proc/acpi/ibm/wan -sysfs device attribute: wwan_enable +sysfs device attribute: wwan_enable (deprecated) +sysfs rfkill class: switch "tpacpi_wwan_sw" This feature is marked EXPERIMENTAL because the implementation directly accesses hardware registers and may not work as expected. USE @@ -1404,8 +1410,12 @@ Sysfs notes: 0: disables WWAN card / WWAN card is disabled 1: enables WWAN card / WWAN card is enabled. - Note: this interface will be probably be superseded by the - generic rfkill class, so it is NOT to be considered stable yet. + Note: this interface has been superseded by the generic rfkill + class. It has been deprecated, and it will be removed in year + 2010. + + rfkill controller switch "tpacpi_wwan_sw": refer to + Documentation/rfkill.txt for details. Multiple Commands, Module Parameters ------------------------------------ diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 1921b8dbb242..b27ca91fd15e 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -279,6 +279,8 @@ config THINKPAD_ACPI select INPUT select NEW_LEDS select LEDS_CLASS + select NET + select RFKILL ---help--- This is a driver for the IBM and Lenovo ThinkPad laptops. It adds support for Fn-Fx key combinations, Bluetooth control, video diff --git a/drivers/misc/thinkpad_acpi.c b/drivers/misc/thinkpad_acpi.c index 202d63e1b391..dc8d00a45701 100644 --- a/drivers/misc/thinkpad_acpi.c +++ b/drivers/misc/thinkpad_acpi.c @@ -68,6 +68,7 @@ #include #include #include +#include #include #include @@ -144,6 +145,12 @@ enum { #define TPACPI_MAX_ACPI_ARGS 3 +/* rfkill switches */ +enum { + TPACPI_RFK_BLUETOOTH_SW_ID = 0, + TPACPI_RFK_WWAN_SW_ID, +}; + /* Debugging */ #define TPACPI_LOG TPACPI_FILE ": " #define TPACPI_ERR KERN_ERR TPACPI_LOG @@ -905,6 +912,43 @@ static int __init tpacpi_check_std_acpi_brightness_support(void) return 0; } +static int __init tpacpi_new_rfkill(const unsigned int id, + struct rfkill **rfk, + const enum rfkill_type rfktype, + const char *name, + int (*toggle_radio)(void *, enum rfkill_state), + int (*get_state)(void *, enum rfkill_state *)) +{ + int res; + enum rfkill_state initial_state; + + *rfk = rfkill_allocate(&tpacpi_pdev->dev, rfktype); + if (!*rfk) { + printk(TPACPI_ERR + "failed to allocate memory for rfkill class\n"); + return -ENOMEM; + } + + (*rfk)->name = name; + (*rfk)->get_state = get_state; + (*rfk)->toggle_radio = toggle_radio; + + if (!get_state(NULL, &initial_state)) + (*rfk)->state = initial_state; + + res = rfkill_register(*rfk); + if (res < 0) { + printk(TPACPI_ERR + "failed to register %s rfkill switch: %d\n", + name, res); + rfkill_free(*rfk); + *rfk = NULL; + return res; + } + + return 0; +} + /************************************************************************* * thinkpad-acpi driver attributes */ @@ -1906,10 +1950,18 @@ static struct attribute *hotkey_mask_attributes[] __initdata = { &dev_attr_hotkey_wakeup_hotunplug_complete.attr, }; +static void bluetooth_update_rfk(void); +static void wan_update_rfk(void); static void tpacpi_send_radiosw_update(void) { int wlsw; + /* Sync these BEFORE sending any rfkill events */ + if (tp_features.bluetooth) + bluetooth_update_rfk(); + if (tp_features.wan) + wan_update_rfk(); + if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&wlsw)) { mutex_lock(&tpacpi_inputdev_send_mutex); @@ -2581,6 +2633,8 @@ enum { TP_ACPI_BLUETOOTH_UNK = 0x04, /* unknown function */ }; +static struct rfkill *tpacpi_bluetooth_rfkill; + static int bluetooth_get_radiosw(void) { int status; @@ -2590,15 +2644,29 @@ static int bluetooth_get_radiosw(void) /* WLSW overrides bluetooth in firmware/hardware, reflect that */ if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status) - return 0; + return RFKILL_STATE_HARD_BLOCKED; if (!acpi_evalf(hkey_handle, &status, "GBDC", "d")) return -EIO; - return (status & TP_ACPI_BLUETOOTH_RADIOSSW) != 0; + return ((status & TP_ACPI_BLUETOOTH_RADIOSSW) != 0) ? + RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED; } -static int bluetooth_set_radiosw(int radio_on) +static void bluetooth_update_rfk(void) +{ + int status; + + if (!tpacpi_bluetooth_rfkill) + return; + + status = bluetooth_get_radiosw(); + if (status < 0) + return; + rfkill_force_state(tpacpi_bluetooth_rfkill, status); +} + +static int bluetooth_set_radiosw(int radio_on, int update_rfk) { int status; @@ -2620,6 +2688,9 @@ static int bluetooth_set_radiosw(int radio_on) if (!acpi_evalf(hkey_handle, NULL, "SBDC", "vd", status)) return -EIO; + if (update_rfk) + bluetooth_update_rfk(); + return 0; } @@ -2634,7 +2705,8 @@ static ssize_t bluetooth_enable_show(struct device *dev, if (status < 0) return status; - return snprintf(buf, PAGE_SIZE, "%d\n", status ? 1 : 0); + return snprintf(buf, PAGE_SIZE, "%d\n", + (status == RFKILL_STATE_UNBLOCKED) ? 1 : 0); } static ssize_t bluetooth_enable_store(struct device *dev, @@ -2647,7 +2719,7 @@ static ssize_t bluetooth_enable_store(struct device *dev, if (parse_strtoul(buf, 1, &t)) return -EINVAL; - res = bluetooth_set_radiosw(t); + res = bluetooth_set_radiosw(t, 1); return (res) ? res : count; } @@ -2667,8 +2739,27 @@ static const struct attribute_group bluetooth_attr_group = { .attrs = bluetooth_attributes, }; +static int tpacpi_bluetooth_rfk_get(void *data, enum rfkill_state *state) +{ + int bts = bluetooth_get_radiosw(); + + if (bts < 0) + return bts; + + *state = bts; + return 0; +} + +static int tpacpi_bluetooth_rfk_set(void *data, enum rfkill_state state) +{ + return bluetooth_set_radiosw((state == RFKILL_STATE_UNBLOCKED), 0); +} + static void bluetooth_exit(void) { + if (tpacpi_bluetooth_rfkill) + rfkill_unregister(tpacpi_bluetooth_rfkill); + sysfs_remove_group(&tpacpi_pdev->dev.kobj, &bluetooth_attr_group); } @@ -2699,14 +2790,26 @@ static int __init bluetooth_init(struct ibm_init_struct *iibm) "bluetooth hardware not installed\n"); } - if (tp_features.bluetooth) { - res = sysfs_create_group(&tpacpi_pdev->dev.kobj, + if (!tp_features.bluetooth) + return 1; + + res = sysfs_create_group(&tpacpi_pdev->dev.kobj, &bluetooth_attr_group); - if (res) - return res; + if (res) + return res; + + res = tpacpi_new_rfkill(TPACPI_RFK_BLUETOOTH_SW_ID, + &tpacpi_bluetooth_rfkill, + RFKILL_TYPE_BLUETOOTH, + "tpacpi_bluetooth_sw", + tpacpi_bluetooth_rfk_set, + tpacpi_bluetooth_rfk_get); + if (res) { + bluetooth_exit(); + return res; } - return (tp_features.bluetooth)? 0 : 1; + return 0; } /* procfs -------------------------------------------------------------- */ @@ -2719,7 +2822,8 @@ static int bluetooth_read(char *p) len += sprintf(p + len, "status:\t\tnot supported\n"); else { len += sprintf(p + len, "status:\t\t%s\n", - (status)? "enabled" : "disabled"); + (status == RFKILL_STATE_UNBLOCKED) ? + "enabled" : "disabled"); len += sprintf(p + len, "commands:\tenable, disable\n"); } @@ -2735,9 +2839,9 @@ static int bluetooth_write(char *buf) while ((cmd = next_cmd(&buf))) { if (strlencmp(cmd, "enable") == 0) { - bluetooth_set_radiosw(1); + bluetooth_set_radiosw(1, 1); } else if (strlencmp(cmd, "disable") == 0) { - bluetooth_set_radiosw(0); + bluetooth_set_radiosw(0, 1); } else return -EINVAL; } @@ -2763,6 +2867,8 @@ enum { TP_ACPI_WANCARD_UNK = 0x04, /* unknown function */ }; +static struct rfkill *tpacpi_wan_rfkill; + static int wan_get_radiosw(void) { int status; @@ -2772,15 +2878,29 @@ static int wan_get_radiosw(void) /* WLSW overrides WWAN in firmware/hardware, reflect that */ if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status) - return 0; + return RFKILL_STATE_HARD_BLOCKED; if (!acpi_evalf(hkey_handle, &status, "GWAN", "d")) return -EIO; - return (status & TP_ACPI_WANCARD_RADIOSSW) != 0; + return ((status & TP_ACPI_WANCARD_RADIOSSW) != 0) ? + RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED; } -static int wan_set_radiosw(int radio_on) +static void wan_update_rfk(void) +{ + int status; + + if (!tpacpi_wan_rfkill) + return; + + status = wan_get_radiosw(); + if (status < 0) + return; + rfkill_force_state(tpacpi_wan_rfkill, status); +} + +static int wan_set_radiosw(int radio_on, int update_rfk) { int status; @@ -2802,6 +2922,9 @@ static int wan_set_radiosw(int radio_on) if (!acpi_evalf(hkey_handle, NULL, "SWAN", "vd", status)) return -EIO; + if (update_rfk) + wan_update_rfk(); + return 0; } @@ -2816,7 +2939,8 @@ static ssize_t wan_enable_show(struct device *dev, if (status < 0) return status; - return snprintf(buf, PAGE_SIZE, "%d\n", status ? 1 : 0); + return snprintf(buf, PAGE_SIZE, "%d\n", + (status == RFKILL_STATE_UNBLOCKED) ? 1 : 0); } static ssize_t wan_enable_store(struct device *dev, @@ -2829,7 +2953,7 @@ static ssize_t wan_enable_store(struct device *dev, if (parse_strtoul(buf, 1, &t)) return -EINVAL; - res = wan_set_radiosw(t); + res = wan_set_radiosw(t, 1); return (res) ? res : count; } @@ -2849,8 +2973,27 @@ static const struct attribute_group wan_attr_group = { .attrs = wan_attributes, }; +static int tpacpi_wan_rfk_get(void *data, enum rfkill_state *state) +{ + int wans = wan_get_radiosw(); + + if (wans < 0) + return wans; + + *state = wans; + return 0; +} + +static int tpacpi_wan_rfk_set(void *data, enum rfkill_state state) +{ + return wan_set_radiosw((state == RFKILL_STATE_UNBLOCKED), 0); +} + static void wan_exit(void) { + if (tpacpi_wan_rfkill) + rfkill_unregister(tpacpi_wan_rfkill); + sysfs_remove_group(&tpacpi_pdev->dev.kobj, &wan_attr_group); } @@ -2879,14 +3022,26 @@ static int __init wan_init(struct ibm_init_struct *iibm) "wan hardware not installed\n"); } - if (tp_features.wan) { - res = sysfs_create_group(&tpacpi_pdev->dev.kobj, + if (!tp_features.wan) + return 1; + + res = sysfs_create_group(&tpacpi_pdev->dev.kobj, &wan_attr_group); - if (res) - return res; + if (res) + return res; + + res = tpacpi_new_rfkill(TPACPI_RFK_WWAN_SW_ID, + &tpacpi_wan_rfkill, + RFKILL_TYPE_WWAN, + "tpacpi_wwan_sw", + tpacpi_wan_rfk_set, + tpacpi_wan_rfk_get); + if (res) { + wan_exit(); + return res; } - return (tp_features.wan)? 0 : 1; + return 0; } /* procfs -------------------------------------------------------------- */ @@ -2899,7 +3054,8 @@ static int wan_read(char *p) len += sprintf(p + len, "status:\t\tnot supported\n"); else { len += sprintf(p + len, "status:\t\t%s\n", - (status)? "enabled" : "disabled"); + (status == RFKILL_STATE_UNBLOCKED) ? + "enabled" : "disabled"); len += sprintf(p + len, "commands:\tenable, disable\n"); } @@ -2915,9 +3071,9 @@ static int wan_write(char *buf) while ((cmd = next_cmd(&buf))) { if (strlencmp(cmd, "enable") == 0) { - wan_set_radiosw(1); + wan_set_radiosw(1, 1); } else if (strlencmp(cmd, "disable") == 0) { - wan_set_radiosw(0); + wan_set_radiosw(0, 1); } else return -EINVAL; } -- cgit v1.2.3 From 490673dc98adfc7de1703cc88508902bd10f446b Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Mon, 21 Jul 2008 09:15:51 -0300 Subject: ACPI: thinkpad-acpi: bump up version to 0.21 rfkill support deserves a new version checkpoint... Signed-off-by: Henrique de Moraes Holschuh --- Documentation/laptops/thinkpad-acpi.txt | 4 ++-- drivers/misc/thinkpad_acpi.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt index 1c1c0217ebd1..02dc748b76c4 100644 --- a/Documentation/laptops/thinkpad-acpi.txt +++ b/Documentation/laptops/thinkpad-acpi.txt @@ -1,7 +1,7 @@ ThinkPad ACPI Extras Driver - Version 0.20 - April 09th, 2008 + Version 0.21 + May 29th, 2008 Borislav Deianov Henrique de Moraes Holschuh diff --git a/drivers/misc/thinkpad_acpi.c b/drivers/misc/thinkpad_acpi.c index dc8d00a45701..3eb01afe4306 100644 --- a/drivers/misc/thinkpad_acpi.c +++ b/drivers/misc/thinkpad_acpi.c @@ -21,7 +21,7 @@ * 02110-1301, USA. */ -#define TPACPI_VERSION "0.20" +#define TPACPI_VERSION "0.21" #define TPACPI_SYSFS_VERSION 0x020200 /* -- cgit v1.2.3 From 47112e25da41d9059626033986dc3353e101f815 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 21 Jul 2008 13:35:08 -0700 Subject: udplite: Protection against coverage value wrap-around This patch clamps the cscov setsockopt values to a maximum of 0xFFFF. Setsockopt values greater than 0xffff can cause an unwanted wrap-around. Further, IPv6 jumbograms are not supported (RFC 3838, 3.5), so that values greater than 0xffff are not even useful. Further changes: fixed a typo in the documentation. Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- Documentation/networking/udplite.txt | 2 +- net/ipv4/udp.c | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/networking/udplite.txt b/Documentation/networking/udplite.txt index 3870f280280b..855d8da57a23 100644 --- a/Documentation/networking/udplite.txt +++ b/Documentation/networking/udplite.txt @@ -148,7 +148,7 @@ getsockopt(sockfd, SOL_SOCKET, SO_NO_CHECK, &value, ...); is meaningless (as in TCP). Packets with a zero checksum field are - illegal (cf. RFC 3828, sec. 3.1) will be silently discarded. + illegal (cf. RFC 3828, sec. 3.1) and will be silently discarded. 4) Fragmentation diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a751770947a3..383d17359d01 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1325,6 +1325,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, return -ENOPROTOOPT; if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ val = 8; + else if (val > USHORT_MAX) + val = USHORT_MAX; up->pcslen = val; up->pcflag |= UDPLITE_SEND_CC; break; @@ -1337,6 +1339,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, return -ENOPROTOOPT; if (val != 0 && val < 8) /* Avoid silly minimal values. */ val = 8; + else if (val > USHORT_MAX) + val = USHORT_MAX; up->pcrlen = val; up->pcflag |= UDPLITE_RECV_CC; break; -- cgit v1.2.3 From 1ed6af73440c5ec920884bb800685a8cab4ce847 Mon Sep 17 00:00:00 2001 From: Mark Nelson Date: Fri, 18 Jul 2008 23:03:34 +1000 Subject: powerpc/cell: Add DMA_ATTR_WEAK_ORDERING dma attribute and use in Cell IOMMU code Introduce a new dma attriblue DMA_ATTR_WEAK_ORDERING to use weak ordering on DMA mappings in the Cell processor. Add the code to the Cell's IOMMU implementation to use this code. Dynamic mappings can be weakly or strongly ordered on an individual basis but the fixed mapping has to be either completely strong or completely weak. This is currently decided by a kernel boot option (pass iommu_fixed=weak for a weakly ordered fixed linear mapping, strongly ordered is the default). Signed-off-by: Mark Nelson Signed-off-by: Arnd Bergmann Signed-off-by: Benjamin Herrenschmidt --- Documentation/DMA-attributes.txt | 9 +++ arch/powerpc/platforms/cell/iommu.c | 113 ++++++++++++++++++++++++++++++++++-- include/linux/dma-attrs.h | 1 + 3 files changed, 118 insertions(+), 5 deletions(-) (limited to 'Documentation') diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt index 6d772f84b477..b768cc0e402b 100644 --- a/Documentation/DMA-attributes.txt +++ b/Documentation/DMA-attributes.txt @@ -22,3 +22,12 @@ ready and available in memory. The DMA of the "completion indication" could race with data DMA. Mapping the memory used for completion indications with DMA_ATTR_WRITE_BARRIER would prevent the race. +DMA_ATTR_WEAK_ORDERING +---------------------- + +DMA_ATTR_WEAK_ORDERING specifies that reads and writes to the mapping +may be weakly ordered, that is that reads and writes may pass each other. + +Since it is optional for platforms to implement DMA_ATTR_WEAK_ORDERING, +those that do not will simply ignore the attribute and exhibit default +behavior. diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 3b7078453e7f..208005ca262c 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -199,6 +199,8 @@ static void tce_build_cell(struct iommu_table *tbl, long index, long npages, base_pte = IOPTE_PP_W | IOPTE_PP_R | IOPTE_M | IOPTE_SO_RW | (window->ioid & IOPTE_IOID_Mask); #endif + if (unlikely(dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))) + base_pte &= ~IOPTE_SO_RW; io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset); @@ -539,7 +541,9 @@ static struct cbe_iommu *cell_iommu_for_node(int nid) static unsigned long cell_dma_direct_offset; static unsigned long dma_iommu_fixed_base; -struct dma_mapping_ops dma_iommu_fixed_ops; + +/* iommu_fixed_is_weak is set if booted with iommu_fixed=weak */ +static int iommu_fixed_is_weak; static struct iommu_table *cell_get_iommu_table(struct device *dev) { @@ -563,6 +567,98 @@ static struct iommu_table *cell_get_iommu_table(struct device *dev) return &window->table; } +/* A coherent allocation implies strong ordering */ + +static void *dma_fixed_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag) +{ + if (iommu_fixed_is_weak) + return iommu_alloc_coherent(dev, cell_get_iommu_table(dev), + size, dma_handle, + device_to_mask(dev), flag, + dev->archdata.numa_node); + else + return dma_direct_ops.alloc_coherent(dev, size, dma_handle, + flag); +} + +static void dma_fixed_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + if (iommu_fixed_is_weak) + iommu_free_coherent(cell_get_iommu_table(dev), size, vaddr, + dma_handle); + else + dma_direct_ops.free_coherent(dev, size, vaddr, dma_handle); +} + +static dma_addr_t dma_fixed_map_single(struct device *dev, void *ptr, + size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs)) + return dma_direct_ops.map_single(dev, ptr, size, direction, + attrs); + else + return iommu_map_single(dev, cell_get_iommu_table(dev), ptr, + size, device_to_mask(dev), direction, + attrs); +} + +static void dma_fixed_unmap_single(struct device *dev, dma_addr_t dma_addr, + size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs)) + dma_direct_ops.unmap_single(dev, dma_addr, size, direction, + attrs); + else + iommu_unmap_single(cell_get_iommu_table(dev), dma_addr, size, + direction, attrs); +} + +static int dma_fixed_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs)) + return dma_direct_ops.map_sg(dev, sg, nents, direction, attrs); + else + return iommu_map_sg(dev, cell_get_iommu_table(dev), sg, nents, + device_to_mask(dev), direction, attrs); +} + +static void dma_fixed_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs)) + dma_direct_ops.unmap_sg(dev, sg, nents, direction, attrs); + else + iommu_unmap_sg(cell_get_iommu_table(dev), sg, nents, direction, + attrs); +} + +static int dma_fixed_dma_supported(struct device *dev, u64 mask) +{ + return mask == DMA_64BIT_MASK; +} + +static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask); + +struct dma_mapping_ops dma_iommu_fixed_ops = { + .alloc_coherent = dma_fixed_alloc_coherent, + .free_coherent = dma_fixed_free_coherent, + .map_single = dma_fixed_map_single, + .unmap_single = dma_fixed_unmap_single, + .map_sg = dma_fixed_map_sg, + .unmap_sg = dma_fixed_unmap_sg, + .dma_supported = dma_fixed_dma_supported, + .set_dma_mask = dma_set_mask_and_switch, +}; + static void cell_dma_dev_setup_fixed(struct device *dev); static void cell_dma_dev_setup(struct device *dev) @@ -919,9 +1015,16 @@ static void cell_iommu_setup_fixed_ptab(struct cbe_iommu *iommu, pr_debug("iommu: mapping 0x%lx pages from 0x%lx\n", fsize, fbase); - base_pte = IOPTE_PP_W | IOPTE_PP_R | IOPTE_M | IOPTE_SO_RW + base_pte = IOPTE_PP_W | IOPTE_PP_R | IOPTE_M | (cell_iommu_get_ioid(np) & IOPTE_IOID_Mask); + if (iommu_fixed_is_weak) + pr_info("IOMMU: Using weak ordering for fixed mapping\n"); + else { + pr_info("IOMMU: Using strong ordering for fixed mapping\n"); + base_pte |= IOPTE_SO_RW; + } + for (uaddr = 0; uaddr < fsize; uaddr += (1 << 24)) { /* Don't touch the dynamic region */ ioaddr = uaddr + fbase; @@ -1037,9 +1140,6 @@ static int __init cell_iommu_fixed_mapping_init(void) cell_iommu_setup_window(iommu, np, dbase, dsize, 0); } - dma_iommu_fixed_ops = dma_direct_ops; - dma_iommu_fixed_ops.set_dma_mask = dma_set_mask_and_switch; - dma_iommu_ops.set_dma_mask = dma_set_mask_and_switch; set_pci_dma_ops(&dma_iommu_ops); @@ -1053,6 +1153,9 @@ static int __init setup_iommu_fixed(char *str) if (strcmp(str, "off") == 0) iommu_fixed_disabled = 1; + else if (strcmp(str, "weak") == 0) + iommu_fixed_is_weak = 1; + return 1; } __setup("iommu_fixed=", setup_iommu_fixed); diff --git a/include/linux/dma-attrs.h b/include/linux/dma-attrs.h index 1677e2bfa00c..71ad34eca6e3 100644 --- a/include/linux/dma-attrs.h +++ b/include/linux/dma-attrs.h @@ -12,6 +12,7 @@ */ enum dma_attr { DMA_ATTR_WRITE_BARRIER, + DMA_ATTR_WEAK_ORDERING, DMA_ATTR_MAX, }; -- cgit v1.2.3 From e105b8bfc769b0545b6f0f395179d1e43cbee822 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 21 Apr 2008 10:51:07 -0700 Subject: sysfs: add /sys/dev/{char,block} to lookup sysfs path by major:minor Why?: There are occasions where userspace would like to access sysfs attributes for a device but it may not know how sysfs has named the device or the path. For example what is the sysfs path for /dev/disk/by-id/ata-ST3160827AS_5MT004CK? With this change a call to stat(2) returns the major:minor then userspace can see that /sys/dev/block/8:32 links to /sys/block/sdc. What are the alternatives?: 1/ Add an ioctl to return the path: Doable, but sysfs is meant to reduce the need to proliferate ioctl interfaces into the kernel, so this seems counter productive. 2/ Use udev to create these symlinks: Also doable, but it adds a udev dependency to utilities that might be running in a limited environment like an initramfs. 3/ Do a full-tree search of sysfs. [kay.sievers@vrfy.org: fix duplicate registrations] [kay.sievers@vrfy.org: cleanup suggestions] Cc: Neil Brown Cc: Tejun Heo Acked-by: Kay Sievers Reviewed-by: SL Baur Acked-by: Kay Sievers Acked-by: Mark Lord Acked-by: H. Peter Anvin Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-dev | 20 +++++++++ Documentation/filesystems/sysfs.txt | 6 +++ block/genhd.c | 5 ++- drivers/base/class.c | 4 ++ drivers/base/core.c | 83 ++++++++++++++++++++++++++++++++++++- drivers/usb/core/devio.c | 5 +++ include/linux/device.h | 3 ++ 7 files changed, 124 insertions(+), 2 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-dev (limited to 'Documentation') diff --git a/Documentation/ABI/testing/sysfs-dev b/Documentation/ABI/testing/sysfs-dev new file mode 100644 index 000000000000..a9f2b8b0530f --- /dev/null +++ b/Documentation/ABI/testing/sysfs-dev @@ -0,0 +1,20 @@ +What: /sys/dev +Date: April 2008 +KernelVersion: 2.6.26 +Contact: Dan Williams +Description: The /sys/dev tree provides a method to look up the sysfs + path for a device using the information returned from + stat(2). There are two directories, 'block' and 'char', + beneath /sys/dev containing symbolic links with names of + the form ":". These links point to the + corresponding sysfs path for the given device. + + Example: + $ readlink /sys/dev/block/8:32 + ../../block/sdc + + Entries in /sys/dev/char and /sys/dev/block will be + dynamically created and destroyed as devices enter and + leave the system. + +Users: mdadm diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt index 7f27b8f840d0..9e9c348275a9 100644 --- a/Documentation/filesystems/sysfs.txt +++ b/Documentation/filesystems/sysfs.txt @@ -248,6 +248,7 @@ The top level sysfs directory looks like: block/ bus/ class/ +dev/ devices/ firmware/ net/ @@ -274,6 +275,11 @@ fs/ contains a directory for some filesystems. Currently each filesystem wanting to export attributes must create its own hierarchy below fs/ (see ./fuse.txt for an example). +dev/ contains two directories char/ and block/. Inside these two +directories there are symlinks named :. These symlinks +point to the sysfs directory for the given device. /sys/dev provides a +quick way to lookup the sysfs interface for a device from the result of +a stat(2) operation. More information can driver-model specific features can be found in Documentation/driver-model/. diff --git a/block/genhd.c b/block/genhd.c index 9074f384b097..24e3fc9095fe 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -370,7 +370,10 @@ static struct kobject *base_probe(dev_t devt, int *part, void *data) static int __init genhd_device_init(void) { - int error = class_register(&block_class); + int error; + + block_class.dev_kobj = sysfs_dev_block_kobj; + error = class_register(&block_class); if (unlikely(error)) return error; bdev_map = kobj_map_init(base_probe, &block_class_lock); diff --git a/drivers/base/class.c b/drivers/base/class.c index e085af0ff94f..71ce3ff6bdf5 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -148,6 +148,10 @@ int class_register(struct class *cls) if (error) return error; + /* set the default /sys/dev directory for devices of this class */ + if (!cls->dev_kobj) + cls->dev_kobj = sysfs_dev_char_kobj; + #if defined(CONFIG_SYSFS_DEPRECATED) && defined(CONFIG_BLOCK) /* let the block class directory show up in the root of sysfs */ if (cls != &block_class) diff --git a/drivers/base/core.c b/drivers/base/core.c index ee0a51a3a41d..be9aba4dc2fb 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -27,6 +27,9 @@ int (*platform_notify)(struct device *dev) = NULL; int (*platform_notify_remove)(struct device *dev) = NULL; +static struct kobject *dev_kobj; +struct kobject *sysfs_dev_char_kobj; +struct kobject *sysfs_dev_block_kobj; #ifdef CONFIG_BLOCK static inline int device_is_not_partition(struct device *dev) @@ -775,6 +778,54 @@ int dev_set_name(struct device *dev, const char *fmt, ...) } EXPORT_SYMBOL_GPL(dev_set_name); +/** + * device_to_dev_kobj - select a /sys/dev/ directory for the device + * @dev: device + * + * By default we select char/ for new entries. Setting class->dev_obj + * to NULL prevents an entry from being created. class->dev_kobj must + * be set (or cleared) before any devices are registered to the class + * otherwise device_create_sys_dev_entry() and + * device_remove_sys_dev_entry() will disagree about the the presence + * of the link. + */ +static struct kobject *device_to_dev_kobj(struct device *dev) +{ + struct kobject *kobj; + + if (dev->class) + kobj = dev->class->dev_kobj; + else + kobj = sysfs_dev_char_kobj; + + return kobj; +} + +static int device_create_sys_dev_entry(struct device *dev) +{ + struct kobject *kobj = device_to_dev_kobj(dev); + int error = 0; + char devt_str[15]; + + if (kobj) { + format_dev_t(devt_str, dev->devt); + error = sysfs_create_link(kobj, &dev->kobj, devt_str); + } + + return error; +} + +static void device_remove_sys_dev_entry(struct device *dev) +{ + struct kobject *kobj = device_to_dev_kobj(dev); + char devt_str[15]; + + if (kobj) { + format_dev_t(devt_str, dev->devt); + sysfs_remove_link(kobj, devt_str); + } +} + /** * device_add - add device to device hierarchy. * @dev: device. @@ -829,6 +880,10 @@ int device_add(struct device *dev) error = device_create_file(dev, &devt_attr); if (error) goto ueventattrError; + + error = device_create_sys_dev_entry(dev); + if (error) + goto devtattrError; } error = device_add_class_symlinks(dev); @@ -872,6 +927,9 @@ int device_add(struct device *dev) AttrsError: device_remove_class_symlinks(dev); SymlinkError: + if (MAJOR(dev->devt)) + device_remove_sys_dev_entry(dev); + devtattrError: if (MAJOR(dev->devt)) device_remove_file(dev, &devt_attr); ueventattrError: @@ -948,8 +1006,10 @@ void device_del(struct device *dev) device_pm_remove(dev); if (parent) klist_del(&dev->knode_parent); - if (MAJOR(dev->devt)) + if (MAJOR(dev->devt)) { + device_remove_sys_dev_entry(dev); device_remove_file(dev, &devt_attr); + } if (dev->class) { device_remove_class_symlinks(dev); @@ -1074,7 +1134,25 @@ int __init devices_init(void) devices_kset = kset_create_and_add("devices", &device_uevent_ops, NULL); if (!devices_kset) return -ENOMEM; + dev_kobj = kobject_create_and_add("dev", NULL); + if (!dev_kobj) + goto dev_kobj_err; + sysfs_dev_block_kobj = kobject_create_and_add("block", dev_kobj); + if (!sysfs_dev_block_kobj) + goto block_kobj_err; + sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj); + if (!sysfs_dev_char_kobj) + goto char_kobj_err; + return 0; + + char_kobj_err: + kobject_put(sysfs_dev_block_kobj); + block_kobj_err: + kobject_put(dev_kobj); + dev_kobj_err: + kset_unregister(devices_kset); + return -ENOMEM; } EXPORT_SYMBOL_GPL(device_for_each_child); @@ -1447,4 +1525,7 @@ void device_shutdown(void) dev->driver->shutdown(dev); } } + kobject_put(sysfs_dev_char_kobj); + kobject_put(sysfs_dev_block_kobj); + kobject_put(dev_kobj); } diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 54a350ccd033..6fbc8f5ab80c 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1792,6 +1792,11 @@ int __init usb_devio_init(void) usb_classdev_class = NULL; goto out; } + /* devices of this class shadow the major:minor of their parent + * device, so clear ->dev_kobj to prevent adding duplicate entries + * to /sys/dev + */ + usb_classdev_class->dev_kobj = NULL; usb_register_notify(&usbdev_nb); #endif diff --git a/include/linux/device.h b/include/linux/device.h index f71a78d123ae..e49aa74f248c 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -193,6 +193,7 @@ struct class { struct semaphore sem; /* locks children, devices, interfaces */ struct class_attribute *class_attrs; struct device_attribute *dev_attrs; + struct kobject *dev_kobj; int (*dev_uevent)(struct device *dev, struct kobj_uevent_env *env); @@ -205,6 +206,8 @@ struct class { struct pm_ops *pm; }; +extern struct kobject *sysfs_dev_block_kobj; +extern struct kobject *sysfs_dev_char_kobj; extern int __must_check class_register(struct class *class); extern void class_unregister(struct class *class); extern int class_for_each_device(struct class *class, void *data, -- cgit v1.2.3 From 328a14e70e7f46997cb50d4258dd93d5377f98c6 Mon Sep 17 00:00:00 2001 From: "Hans J. Koch" Date: Fri, 23 May 2008 13:50:14 +0200 Subject: UIO: Add write function to allow irq masking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sometimes it is necessary to enable/disable the interrupt of a UIO device from the userspace part of the driver. With this patch, the UIO kernel driver can implement an "irqcontrol()" function that does this. Userspace can write an s32 value to /dev/uioX (usually 0 or 1 to turn the irq off or on). The UIO core will then call the driver's irqcontrol function. Signed-off-by: Hans J. Koch Acked-by: Uwe Kleine-König Acked-by: Magnus Damm Signed-off-by: Greg Kroah-Hartman --- Documentation/DocBook/uio-howto.tmpl | 40 +++++++++++++++++++++++++++++++++++- drivers/uio/uio.c | 26 +++++++++++++++++++++++ include/linux/uio_driver.h | 2 ++ 3 files changed, 67 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/DocBook/uio-howto.tmpl b/Documentation/DocBook/uio-howto.tmpl index fdd7f4f887b7..c4d187313963 100644 --- a/Documentation/DocBook/uio-howto.tmpl +++ b/Documentation/DocBook/uio-howto.tmpl @@ -29,6 +29,12 @@ + + 0.5 + 2008-05-22 + hjk + Added description of write() function. + 0.4 2007-11-26 @@ -64,7 +70,7 @@ Copyright and License - Copyright (c) 2006 by Hans-Jürgen Koch. + Copyright (c) 2006-2008 by Hans-Jürgen Koch. This documentation is Free Software licensed under the terms of the GPL version 2. @@ -189,6 +195,30 @@ interested in translating it, please email me represents the total interrupt count. You can use this number to figure out if you missed some interrupts. + + For some hardware that has more than one interrupt source internally, + but not separate IRQ mask and status registers, there might be + situations where userspace cannot determine what the interrupt source + was if the kernel handler disables them by writing to the chip's IRQ + register. In such a case, the kernel has to disable the IRQ completely + to leave the chip's register untouched. Now the userspace part can + determine the cause of the interrupt, but it cannot re-enable + interrupts. Another cornercase is chips where re-enabling interrupts + is a read-modify-write operation to a combined IRQ status/acknowledge + register. This would be racy if a new interrupt occurred + simultaneously. + + + To address these problems, UIO also implements a write() function. It + is normally not used and can be ignored for hardware that has only a + single interrupt source or has separate IRQ mask and status registers. + If you need it, however, a write to /dev/uioX + will call the irqcontrol() function implemented + by the driver. You have to write a 32-bit value that is usually either + 0 or 1 to disable or enable interrupts. If a driver does not implement + irqcontrol(), write() will + return with -ENOSYS. + To handle interrupts properly, your custom kernel module can @@ -362,6 +392,14 @@ device is actually used. open(), you will probably also want a custom release() function. + + +int (*irqcontrol)(struct uio_info *info, s32 irq_on) +: Optional. If you need to be able to enable or disable +interrupts from userspace by writing to /dev/uioX, +you can implement this function. The parameter irq_on +will be 0 to disable interrupts and 1 to enable them. + diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index 5a7ca2e6094d..3a6934bf7131 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -427,6 +427,31 @@ static ssize_t uio_read(struct file *filep, char __user *buf, return retval; } +static ssize_t uio_write(struct file *filep, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct uio_listener *listener = filep->private_data; + struct uio_device *idev = listener->dev; + ssize_t retval; + s32 irq_on; + + if (idev->info->irq == UIO_IRQ_NONE) + return -EIO; + + if (count != sizeof(s32)) + return -EINVAL; + + if (!idev->info->irqcontrol) + return -ENOSYS; + + if (copy_from_user(&irq_on, buf, count)) + return -EFAULT; + + retval = idev->info->irqcontrol(idev->info, irq_on); + + return retval ? retval : sizeof(s32); +} + static int uio_find_mem_index(struct vm_area_struct *vma) { int mi; @@ -546,6 +571,7 @@ static const struct file_operations uio_fops = { .open = uio_open, .release = uio_release, .read = uio_read, + .write = uio_write, .mmap = uio_mmap, .poll = uio_poll, .fasync = uio_fasync, diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index 973386d439da..cf65e964102b 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -53,6 +53,7 @@ struct uio_device; * @mmap: mmap operation for this uio device * @open: open operation for this uio device * @release: release operation for this uio device + * @irqcontrol: disable/enable irqs when 0/1 is written to /dev/uioX */ struct uio_info { struct uio_device *uio_dev; @@ -66,6 +67,7 @@ struct uio_info { int (*mmap)(struct uio_info *info, struct vm_area_struct *vma); int (*open)(struct uio_info *info, struct inode *inode); int (*release)(struct uio_info *info, struct inode *inode); + int (*irqcontrol)(struct uio_info *info, s32 irq_on); }; extern int __must_check -- cgit v1.2.3 From 17149d9fff18c4811349140934dc541f70c617df Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 24 Jun 2008 14:24:57 -0700 Subject: uio-howto.tmpl: use standard copyright/legal markings The Userspace I/O HOWTO document uses straight tags and plain text to describe copyright/legal information. It should instead use the and tags like all other documents in the kernel. Signed-off-by: Mike Frysinger Acked-by: Hans J. Koch Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- Documentation/DocBook/uio-howto.tmpl | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'Documentation') diff --git a/Documentation/DocBook/uio-howto.tmpl b/Documentation/DocBook/uio-howto.tmpl index c4d187313963..d799a2621e20 100644 --- a/Documentation/DocBook/uio-howto.tmpl +++ b/Documentation/DocBook/uio-howto.tmpl @@ -21,6 +21,18 @@ + + 2006-2008 + Hans-Jürgen Koch. + + + + +This documentation is Free Software licensed under the terms of the +GPL version 2. + + + 2006-12-11 @@ -66,17 +78,6 @@ About this document - - -Copyright and License - - Copyright (c) 2006-2008 by Hans-Jürgen Koch. - -This documentation is Free Software licensed under the terms of the -GPL version 2. - - - Translations -- cgit v1.2.3 From 4f7e53096c93f0bdf2205134dfc541d0c3cc6e41 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 24 Jun 2008 14:25:00 -0700 Subject: uio-howto.tmpl: use unique output names The Userspace I/O HOWTO template sets two different sections with the same html output name (about.html). This clearly won't work, so change the first one to a unique "aboutthis.html" to prevent clobbering. Signed-off-by: Mike Frysinger Acked-by: Hans J. Koch Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- Documentation/DocBook/uio-howto.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/DocBook/uio-howto.tmpl b/Documentation/DocBook/uio-howto.tmpl index d799a2621e20..df87d1b93605 100644 --- a/Documentation/DocBook/uio-howto.tmpl +++ b/Documentation/DocBook/uio-howto.tmpl @@ -75,7 +75,7 @@ GPL version 2. - + About this document -- cgit v1.2.3 From 43166141f73f969794bd7c850c89913631df99e4 Mon Sep 17 00:00:00 2001 From: Tsugikazu Shibata Date: Fri, 20 Jun 2008 10:59:52 +0900 Subject: HOWTO: change email addresses of James in HOWTO Signed-off-by: Tsugikazu Shibata Signed-off-by: Greg Kroah-Hartman --- Documentation/HOWTO | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/HOWTO b/Documentation/HOWTO index 619e8caf30db..c2371c5a98f9 100644 --- a/Documentation/HOWTO +++ b/Documentation/HOWTO @@ -358,7 +358,7 @@ Here is a list of some of the different kernel trees available: - pcmcia, Dominik Brodowski git.kernel.org:/pub/scm/linux/kernel/git/brodo/pcmcia-2.6.git - - SCSI, James Bottomley + - SCSI, James Bottomley git.kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git - x86, Ingo Molnar -- cgit v1.2.3 From 83c79b55f0d929a0dcf2b0d347cd1875afc06f21 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Wed, 2 Jul 2008 10:21:30 -0700 Subject: sysfs-rules.txt: reword API stability statement The first paragraph of this document implies that user space developers shouldn't use sysfs at all, but then it goes on to describe rules that developers should follow when accessing sysfs. Not only is this somewhat self-contradictory, it has been shown to discourage developers from using established sysfs interfaces. A note of caution is more appropriate than a blanket "sysfs will never be stable" assertion. Signed-off-by: Nathan Lynch Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- Documentation/sysfs-rules.txt | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/sysfs-rules.txt b/Documentation/sysfs-rules.txt index 80ef562160bb..6049a2a84dda 100644 --- a/Documentation/sysfs-rules.txt +++ b/Documentation/sysfs-rules.txt @@ -3,9 +3,8 @@ Rules on how to access information in the Linux kernel sysfs The kernel-exported sysfs exports internal kernel implementation details and depends on internal kernel structures and layout. It is agreed upon by the kernel developers that the Linux kernel does not provide a stable -internal API. As sysfs is a direct export of kernel internal -structures, the sysfs interface cannot provide a stable interface either; -it may always change along with internal kernel changes. +internal API. Therefore, there are aspects of the sysfs interface that +may not be stable across kernel releases. To minimize the risk of breaking users of sysfs, which are in most cases low-level userspace applications, with a new kernel release, the users -- cgit v1.2.3 From a81792f668c20540c336af4242ba1400763eb14f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 8 Jul 2008 19:00:25 +0200 Subject: remove mention of CONFIG_KMOD from documentation Also includes a few Kconfig files (xtensa, blackfin) Signed-off-by: Johannes Berg Cc: Michael Kerrisk Cc: linux-doc@vger.kernel.org Signed-off-by: Rusty Russell Acked-by: Randy Dunlap --- Documentation/filesystems/bfs.txt | 10 +++++----- Documentation/sound/alsa/DocBook/alsa-driver-api.tmpl | 2 +- Documentation/telephony/ixj.txt | 13 +++---------- Documentation/video4linux/w9968cf.txt | 3 --- arch/blackfin/Kconfig | 4 ++-- arch/xtensa/Kconfig | 4 ++-- 6 files changed, 13 insertions(+), 23 deletions(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/bfs.txt b/Documentation/filesystems/bfs.txt index ea825e178e79..78043d5a8fc3 100644 --- a/Documentation/filesystems/bfs.txt +++ b/Documentation/filesystems/bfs.txt @@ -26,11 +26,11 @@ You can simplify mounting by just typing: this will allocate the first available loopback device (and load loop.o kernel module if necessary) automatically. If the loopback driver is not -loaded automatically, make sure that your kernel is compiled with kmod -support (CONFIG_KMOD) enabled. Beware that umount will not -deallocate /dev/loopN device if /etc/mtab file on your system is a -symbolic link to /proc/mounts. You will need to do it manually using -"-d" switch of losetup(8). Read losetup(8) manpage for more info. +loaded automatically, make sure that you have compiled the module and +that modprobe is functioning. Beware that umount will not deallocate +/dev/loopN device if /etc/mtab file on your system is a symbolic link to +/proc/mounts. You will need to do it manually using "-d" switch of +losetup(8). Read losetup(8) manpage for more info. To create the BFS image under UnixWare you need to find out first which slice contains it. The command prtvtoc(1M) is your friend: diff --git a/Documentation/sound/alsa/DocBook/alsa-driver-api.tmpl b/Documentation/sound/alsa/DocBook/alsa-driver-api.tmpl index c4d2e3507af9..9d644f7e241e 100644 --- a/Documentation/sound/alsa/DocBook/alsa-driver-api.tmpl +++ b/Documentation/sound/alsa/DocBook/alsa-driver-api.tmpl @@ -42,7 +42,7 @@ Device Components !Esound/core/device.c - KMOD and Device File Entries + Module requests and Device File Entries !Esound/core/sound.c Memory Management Helpers diff --git a/Documentation/telephony/ixj.txt b/Documentation/telephony/ixj.txt index 621024fd3a18..44d124005bad 100644 --- a/Documentation/telephony/ixj.txt +++ b/Documentation/telephony/ixj.txt @@ -305,21 +305,14 @@ driver, like this: which will result in the needed drivers getting loaded automatically. - g. if you are planning on using kerneld to automatically load the -module for you, then you need to edit /etc/conf.modules and add the + g. if you are planning on having the kernel automatically request +the module for you, then you need to edit /etc/conf.modules and add the following lines: options ixj dspio=0x340 xio=0x330 ixjdebug=0 If you do this, then when you execute an application that uses the -module kerneld will load the module for you. Note that to do this, -you need to have your kernel set to support kerneld. You can check -for this by looking at /usr/src/linux/.config and you should see this: - - # Loadable module support - # - - CONFIG_KMOD=y +module the kernel will request that it is loaded. h. if you want non-root users to be able to read and write to the ixj devices (this is a good idea!) you should do the following: diff --git a/Documentation/video4linux/w9968cf.txt b/Documentation/video4linux/w9968cf.txt index e0bba8393c77..05138e8aea07 100644 --- a/Documentation/video4linux/w9968cf.txt +++ b/Documentation/video4linux/w9968cf.txt @@ -193,9 +193,6 @@ Description: Automatic 'ovcamchip' module loading: 0 disabled, 1 enabled. loads that module automatically. This action is performed as once soon as the 'w9968cf' module is loaded into memory. Default: 1 -Note: The kernel must be compiled with the CONFIG_KMOD option - enabled for the 'ovcamchip' module to be loaded and for - this parameter to be present. ------------------------------------------------------------------------------- Name: simcams Type: int diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index b87634e75f20..b83b8ef84e91 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -873,8 +873,8 @@ config HOTPLUG plugged into slots found on all modern laptop computers. Another example, used on modern desktops as well as laptops, is USB. - Enable HOTPLUG and KMOD, and build a modular kernel. Get agent - software (at ) and install it. + Enable HOTPLUG and build a modular kernel. Get agent software + (from ) and install it. Then your kernel will automatically call out to a user mode "policy agent" (/sbin/hotplug) to load modules and set up software needed to use devices as you hotplug them. diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 9fc8551a1cf6..02e417d3d8e9 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -194,8 +194,8 @@ config HOTPLUG plugged into slots found on all modern laptop computers. Another example, used on modern desktops as well as laptops, is USB. - Enable HOTPLUG and KMOD, and build a modular kernel. Get agent - software (at ) and install it. + Enable HOTPLUG and build a modular kernel. Get agent software + (from ) and install it. Then your kernel will automatically call out to a user mode "policy agent" (/sbin/hotplug) to load modules and set up software needed to use devices as you hotplug them. -- cgit v1.2.3 From d2fbd0f2f9f5a34831a0b8fe6b16c6e1afba1200 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Tue, 22 Jul 2008 11:17:16 +0100 Subject: specialix: Code cleanups Go through the inlines and other oddments that are iffy. Remove various bits of dead code and bogus debug. Turn the crtsdts compile time option into a runtime switch. Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- Documentation/specialix.txt | 8 +- drivers/char/Kconfig | 10 -- drivers/char/specialix.c | 216 +++++++++----------------------------------- 3 files changed, 47 insertions(+), 187 deletions(-) (limited to 'Documentation') diff --git a/Documentation/specialix.txt b/Documentation/specialix.txt index 4a4b428ce8f6..6eb6f3a3331c 100644 --- a/Documentation/specialix.txt +++ b/Documentation/specialix.txt @@ -270,8 +270,8 @@ The pinout of the connectors on the IO8+ is: Hardware handshaking issues. ============================ -The driver can be compiled in two different ways. The default -("Specialix DTR/RTS pin is RTS" is off) the pin behaves as DTR when +The driver can be told to operate in two different ways. The default +behaviour is specialix.sx_rtscts = 0 where the pin behaves as DTR when hardware handshaking is off. It behaves as the RTS hardware handshaking signal when hardware handshaking is selected. @@ -280,7 +280,7 @@ cable will either be compatible with hardware handshaking or with software handshaking. So switching on the fly is not really an option. -I actually prefer to use the "Specialix DTR/RTS pin is RTS" option. +I actually prefer to use the "specialix.sx_rtscts=1" option. This makes the DTR/RTS pin always an RTS pin, and ioctls to change DTR are always ignored. I have a cable that is configured for this. @@ -379,7 +379,5 @@ it doesn't fit in your computer, bring back the card. You have to WRITE to the address register to even read-probe a CD186x register. Disable autodetection? -- Specialix: any suggestions? - - Arbitrary baud rates are not implemented yet. - If you need this, bug me about it. diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 650e6b44ce65..e0bbbfb6a36b 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -300,16 +300,6 @@ config SPECIALIX and compile this driver as kernel loadable module which will be called specialix. -config SPECIALIX_RTSCTS - bool "Specialix DTR/RTS pin is RTS" - depends on SPECIALIX - help - The Specialix IO8+ card can only support either RTS or DTR. If you - say N here, the driver will use the pin as "DTR" when the tty is in - software handshake mode. If you say Y here or hardware handshake is - on, it will always be RTS. Read the file - for more information. - config SX tristate "Specialix SX (and SI) card support" depends on SERIAL_NONSTANDARD && (PCI || EISA || ISA) diff --git a/drivers/char/specialix.c b/drivers/char/specialix.c index c390c6cc030e..f3184a8a75d6 100644 --- a/drivers/char/specialix.c +++ b/drivers/char/specialix.c @@ -110,6 +110,7 @@ static int sx_debug; static int sx_rxfifo = SPECIALIX_RXFIFO; +static int sx_rtscts; #ifdef DEBUG #define dprintk(f, str...) if (sx_debug & f) printk(str) @@ -134,25 +135,12 @@ static int sx_rxfifo = SPECIALIX_RXFIFO; #define func_enter() dprintk(SX_DEBUG_FLOW, "io8: enter %s\n", __func__) #define func_exit() dprintk(SX_DEBUG_FLOW, "io8: exit %s\n", __func__) -#define jiffies_from_ms(a) ((((a) * HZ)/1000)+1) - /* Configurable options: */ /* Am I paranoid or not ? ;-) */ #define SPECIALIX_PARANOIA_CHECK -/* Do I trust the IRQ from the card? (enabeling it doesn't seem to help) - When the IRQ routine leaves the chip in a state that is keeps on - requiring attention, the timer doesn't help either. */ -#undef SPECIALIX_TIMER - -#ifdef SPECIALIX_TIMER -static int sx_poll = HZ; -#endif - - - /* * The following defines are mostly for testing purposes. But if you need * some nice reporting in your syslog, you can define them also. @@ -162,16 +150,6 @@ static int sx_poll = HZ; -#ifdef CONFIG_SPECIALIX_RTSCTS -#define SX_CRTSCTS(bla) 1 -#else -#define SX_CRTSCTS(tty) C_CRTSCTS(tty) -#endif - - -/* Used to be outb(0xff, 0x80); */ -#define short_pause() udelay(1) - #define SPECIALIX_LEGAL_FLAGS \ (ASYNC_HUP_NOTIFY | ASYNC_SAK | ASYNC_SPLIT_TERMIOS | \ @@ -190,14 +168,7 @@ static struct specialix_board sx_board[SX_NBOARD] = { static struct specialix_port sx_port[SX_NBOARD * SX_NPORT]; -#ifdef SPECIALIX_TIMER -static struct timer_list missed_irq_timer; -static irqreturn_t sx_interrupt(int irq, void *dev_id); -#endif - - - -static inline int sx_paranoia_check(struct specialix_port const *port, +static int sx_paranoia_check(struct specialix_port const *port, char *name, const char *routine) { #ifdef SPECIALIX_PARANOIA_CHECK @@ -288,7 +259,7 @@ static inline void sx_out_off(struct specialix_board *bp, /* Wait for Channel Command Register ready */ -static inline void sx_wait_CCR(struct specialix_board *bp) +static void sx_wait_CCR(struct specialix_board *bp) { unsigned long delay, flags; unsigned char ccr; @@ -307,7 +278,7 @@ static inline void sx_wait_CCR(struct specialix_board *bp) /* Wait for Channel Command Register ready */ -static inline void sx_wait_CCR_off(struct specialix_board *bp) +static void sx_wait_CCR_off(struct specialix_board *bp) { unsigned long delay; unsigned char crr; @@ -330,7 +301,7 @@ static inline void sx_wait_CCR_off(struct specialix_board *bp) * specialix IO8+ IO range functions. */ -static inline int sx_request_io_range(struct specialix_board *bp) +static int sx_request_io_range(struct specialix_board *bp) { return request_region(bp->base, bp->flags & SX_BOARD_IS_PCI ? SX_PCI_IO_SPACE : SX_IO_SPACE, @@ -338,7 +309,7 @@ static inline int sx_request_io_range(struct specialix_board *bp) } -static inline void sx_release_io_range(struct specialix_board *bp) +static void sx_release_io_range(struct specialix_board *bp) { release_region(bp->base, bp->flags & SX_BOARD_IS_PCI ? SX_PCI_IO_SPACE : SX_IO_SPACE); @@ -440,36 +411,10 @@ static int read_cross_byte(struct specialix_board *bp, int reg, int bit) } -#ifdef SPECIALIX_TIMER -void missed_irq(unsigned long data) -{ - unsigned char irq; - unsigned long flags; - struct specialix_board *bp = (struct specialix_board *)data; - - spin_lock_irqsave(&bp->lock, flags); - irq = sx_in((struct specialix_board *)data, CD186x_SRSR) & - (SRSR_RREQint | SRSR_TREQint | SRSR_MREQint); - spin_unlock_irqrestore(&bp->lock, flags); - if (irq) { - printk(KERN_INFO - "Missed interrupt... Calling int from timer. \n"); - sx_interrupt(-1, bp); - } - mod_timer(&missed_irq_timer, jiffies + sx_poll); -} -#endif - - - /* Main probing routine, also sets irq. */ static int sx_probe(struct specialix_board *bp) { unsigned char val1, val2; -#if 0 - int irqs = 0; - int retries; -#endif int rev; int chip; @@ -482,15 +427,15 @@ static int sx_probe(struct specialix_board *bp) /* Are the I/O ports here ? */ sx_out_off(bp, CD186x_PPRL, 0x5a); - short_pause(); + udelay(1); val1 = sx_in_off(bp, CD186x_PPRL); sx_out_off(bp, CD186x_PPRL, 0xa5); - short_pause(); + udelay(1); val2 = sx_in_off(bp, CD186x_PPRL); - if ((val1 != 0x5a) || (val2 != 0xa5)) { + if (val1 != 0x5a || val2 != 0xa5) { printk(KERN_INFO "sx%d: specialix IO8+ Board at 0x%03x not found.\n", board_No(bp), bp->base); @@ -522,50 +467,6 @@ static int sx_probe(struct specialix_board *bp) } -#if 0 - /* It's time to find IRQ for this board */ - for (retries = 0; retries < 5 && irqs <= 0; retries++) { - irqs = probe_irq_on(); - sx_init_CD186x(bp); /* Reset CD186x chip */ - sx_out(bp, CD186x_CAR, 2); /* Select port 2 */ - sx_wait_CCR(bp); - sx_out(bp, CD186x_CCR, CCR_TXEN); /* Enable transmitter */ - sx_out(bp, CD186x_IER, IER_TXRDY); /* Enable tx empty intr */ - msleep(50); - irqs = probe_irq_off(irqs); - - dprintk(SX_DEBUG_INIT, "SRSR = %02x, ", sx_in(bp, CD186x_SRSR)); - dprintk(SX_DEBUG_INIT, "TRAR = %02x, ", sx_in(bp, CD186x_TRAR)); - dprintk(SX_DEBUG_INIT, "GIVR = %02x, ", sx_in(bp, CD186x_GIVR)); - dprintk(SX_DEBUG_INIT, "GICR = %02x, ", sx_in(bp, CD186x_GICR)); - dprintk(SX_DEBUG_INIT, "\n"); - - /* Reset CD186x again */ - if (!sx_init_CD186x(bp)) { - /* Hmmm. This is dead code anyway. */ - } - - dprintk(SX_DEBUG_INIT - "val1 = %02x, val2 = %02x, val3 = %02x.\n", - val1, val2, val3); - - } - -#if 0 - if (irqs <= 0) { - printk(KERN_ERR - "sx%d: Can't find IRQ for specialix IO8+ board at 0x%03x.\n", - board_No(bp), bp->base); - sx_release_io_range(bp); - func_exit(); - return 1; - } -#endif - printk(KERN_INFO "Started with irq=%d, but now have irq=%d.\n", - bp->irq, irqs); - if (irqs > 0) - bp->irq = irqs; -#endif /* Reset CD186x again */ if (!sx_init_CD186x(bp)) { sx_release_io_range(bp); @@ -610,11 +511,6 @@ static int sx_probe(struct specialix_board *bp) dprintk(SX_DEBUG_INIT, " GFCR = 0x%02x\n", sx_in_off(bp, CD186x_GFRCR)); -#ifdef SPECIALIX_TIMER - setup_timer(&missed_irq_timer, missed_irq, (unsigned long)bp); - mod_timer(&missed_irq_timer, jiffies + sx_poll); -#endif - printk(KERN_INFO "sx%d: specialix IO8+ board detected at 0x%03x, IRQ %d, CD%d Rev. %c.\n", board_No(bp), bp->base, bp->irq, chip, rev); @@ -628,7 +524,7 @@ static int sx_probe(struct specialix_board *bp) * Interrupt processing routines. * */ -static inline struct specialix_port *sx_get_port(struct specialix_board *bp, +static struct specialix_port *sx_get_port(struct specialix_board *bp, unsigned char const *what) { unsigned char channel; @@ -654,7 +550,7 @@ static inline struct specialix_port *sx_get_port(struct specialix_board *bp, } -static inline void sx_receive_exc(struct specialix_board *bp) +static void sx_receive_exc(struct specialix_board *bp) { struct specialix_port *port; struct tty_struct *tty; @@ -729,7 +625,7 @@ static inline void sx_receive_exc(struct specialix_board *bp) } -static inline void sx_receive(struct specialix_board *bp) +static void sx_receive(struct specialix_board *bp) { struct specialix_port *port; struct tty_struct *tty; @@ -758,7 +654,7 @@ static inline void sx_receive(struct specialix_board *bp) } -static inline void sx_transmit(struct specialix_board *bp) +static void sx_transmit(struct specialix_board *bp) { struct specialix_port *port; struct tty_struct *tty; @@ -838,7 +734,7 @@ static inline void sx_transmit(struct specialix_board *bp) } -static inline void sx_check_modem(struct specialix_board *bp) +static void sx_check_modem(struct specialix_board *bp) { struct specialix_port *port; struct tty_struct *tty; @@ -853,7 +749,6 @@ static inline void sx_check_modem(struct specialix_board *bp) tty = port->port.tty; mcr = sx_in(bp, CD186x_MCR); - printk("mcr = %02x.\n", mcr); /* FIXME */ if ((mcr & MCR_CDCHG)) { dprintk(SX_DEBUG_SIGNALS, "CD just changed... "); @@ -983,13 +878,6 @@ static void turn_ints_off(struct specialix_board *bp) unsigned long flags; func_enter(); - if (bp->flags & SX_BOARD_IS_PCI) { - /* This was intended for enabeling the interrupt on the - * PCI card. However it seems that it's already enabled - * and as PCI interrupts can be shared, there is no real - * reason to have to turn it off. */ - } - spin_lock_irqsave(&bp->lock, flags); (void) sx_in_off(bp, 0); /* Turn off interrupts. */ spin_unlock_irqrestore(&bp->lock, flags); @@ -1003,9 +891,6 @@ static void turn_ints_on(struct specialix_board *bp) func_enter(); - if (bp->flags & SX_BOARD_IS_PCI) { - /* play with the PCI chip. See comment above. */ - } spin_lock_irqsave(&bp->lock, flags); (void) sx_in(bp, 0); /* Turn ON interrupts. */ spin_unlock_irqrestore(&bp->lock, flags); @@ -1015,7 +900,7 @@ static void turn_ints_on(struct specialix_board *bp) /* Called with disabled interrupts */ -static inline int sx_setup_board(struct specialix_board *bp) +static int sx_setup_board(struct specialix_board *bp) { int error; @@ -1040,7 +925,7 @@ static inline int sx_setup_board(struct specialix_board *bp) /* Called with disabled interrupts */ -static inline void sx_shutdown_board(struct specialix_board *bp) +static void sx_shutdown_board(struct specialix_board *bp) { func_enter(); @@ -1058,6 +943,12 @@ static inline void sx_shutdown_board(struct specialix_board *bp) func_exit(); } +static unsigned int sx_crtscts(struct tty_struct *tty) +{ + if (sx_rtscts) + return C_CRTSCTS(tty); + return 1; +} /* * Setting up port characteristics. @@ -1090,7 +981,7 @@ static void sx_change_speed(struct specialix_board *bp, /* The Specialix board doens't implement the RTS lines. They are used to set the IRQ level. Don't touch them. */ - if (SX_CRTSCTS(tty)) + if (sx_crtscts(tty)) port->MSVR = MSVR_DTR | (sx_in(bp, CD186x_MSVR) & MSVR_RTS); else port->MSVR = (sx_in(bp, CD186x_MSVR) & MSVR_RTS); @@ -1108,7 +999,7 @@ static void sx_change_speed(struct specialix_board *bp, if (!baud) { /* Drop DTR & exit */ dprintk(SX_DEBUG_TERMIOS, "Dropping DTR... Hmm....\n"); - if (!SX_CRTSCTS(tty)) { + if (!sx_crtscts(tty)) { port->MSVR &= ~MSVR_DTR; spin_lock_irqsave(&bp->lock, flags); sx_out(bp, CD186x_MSVR, port->MSVR); @@ -1118,7 +1009,7 @@ static void sx_change_speed(struct specialix_board *bp, return; } else { /* Set DTR on */ - if (!SX_CRTSCTS(tty)) + if (!sx_crtscts(tty)) port->MSVR |= MSVR_DTR; } @@ -1448,7 +1339,7 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp, spin_lock_irqsave(&bp->lock, flags); sx_out(bp, CD186x_CAR, port_No(port)); CD = sx_in(bp, CD186x_MSVR) & MSVR_CD; - if (SX_CRTSCTS(tty)) { + if (sx_crtscts(tty)) { /* Activate RTS */ port->MSVR |= MSVR_DTR; /* WTF? */ sx_out(bp, CD186x_MSVR, port->MSVR); @@ -1600,7 +1491,7 @@ static void sx_close(struct tty_struct *tty, struct file *filp) } bp = port_Board(port); - if ((tty->count == 1) && (port->port.count != 1)) { + if (tty->count == 1 && port->port.count != 1) { printk(KERN_ERR "sx%d: sx_close: bad port count;" " tty->count is 1, port count is %d\n", board_No(bp), port->port.count); @@ -1868,17 +1759,15 @@ static int sx_tiocmget(struct tty_struct *tty, struct file *file) dprintk(SX_DEBUG_INIT, "Got msvr[%d] = %02x, car = %d.\n", port_No(port), status, sx_in(bp, CD186x_CAR)); dprintk(SX_DEBUG_INIT, "sx_port = %p, port = %p\n", sx_port, port); - if (SX_CRTSCTS(port->port.tty)) { - result = /* (status & MSVR_RTS) ? */ TIOCM_DTR /* : 0) */ + if (sx_crtscts(port->port.tty)) { + result = TIOCM_DTR | TIOCM_DSR | ((status & MSVR_DTR) ? TIOCM_RTS : 0) | ((status & MSVR_CD) ? TIOCM_CAR : 0) - |/* ((status & MSVR_DSR) ? */ TIOCM_DSR /* : 0) */ | ((status & MSVR_CTS) ? TIOCM_CTS : 0); } else { - result = /* (status & MSVR_RTS) ? */ TIOCM_RTS /* : 0) */ + result = TIOCM_RTS | TIOCM_DSR | ((status & MSVR_DTR) ? TIOCM_DTR : 0) | ((status & MSVR_CD) ? TIOCM_CAR : 0) - |/* ((status & MSVR_DSR) ? */ TIOCM_DSR /* : 0) */ | ((status & MSVR_CTS) ? TIOCM_CTS : 0); } @@ -1905,24 +1794,14 @@ static int sx_tiocmset(struct tty_struct *tty, struct file *file, bp = port_Board(port); spin_lock_irqsave(&port->lock, flags); - /* if (set & TIOCM_RTS) - port->MSVR |= MSVR_RTS; */ - /* if (set & TIOCM_DTR) - port->MSVR |= MSVR_DTR; */ - - if (SX_CRTSCTS(port->port.tty)) { + if (sx_crtscts(port->port.tty)) { if (set & TIOCM_RTS) port->MSVR |= MSVR_DTR; } else { if (set & TIOCM_DTR) port->MSVR |= MSVR_DTR; } - - /* if (clear & TIOCM_RTS) - port->MSVR &= ~MSVR_RTS; */ - /* if (clear & TIOCM_DTR) - port->MSVR &= ~MSVR_DTR; */ - if (SX_CRTSCTS(port->port.tty)) { + if (sx_crtscts(port->port.tty)) { if (clear & TIOCM_RTS) port->MSVR &= ~MSVR_DTR; } else { @@ -1939,7 +1818,7 @@ static int sx_tiocmset(struct tty_struct *tty, struct file *file, } -static inline void sx_send_break(struct specialix_port *port, +static void sx_send_break(struct specialix_port *port, unsigned long length) { struct specialix_board *bp = port_Board(port); @@ -1967,7 +1846,7 @@ static inline void sx_send_break(struct specialix_port *port, } -static inline int sx_set_serial_info(struct specialix_port *port, +static int sx_set_serial_info(struct specialix_port *port, struct serial_struct __user *newinfo) { struct serial_struct tmp; @@ -2015,7 +1894,7 @@ static inline int sx_set_serial_info(struct specialix_port *port, } -static inline int sx_get_serial_info(struct specialix_port *port, +static int sx_get_serial_info(struct specialix_port *port, struct serial_struct __user *retinfo) { struct serial_struct tmp; @@ -2112,7 +1991,7 @@ static void sx_throttle(struct tty_struct *tty) bp = port_Board(port); /* Use DTR instead of RTS ! */ - if (SX_CRTSCTS(tty)) + if (sx_crtscts(tty)) port->MSVR &= ~MSVR_DTR; else { /* Auch!!! I think the system shouldn't call this then. */ @@ -2158,7 +2037,7 @@ static void sx_unthrottle(struct tty_struct *tty) spin_lock_irqsave(&port->lock, flags); /* XXXX Use DTR INSTEAD???? */ - if (SX_CRTSCTS(tty)) + if (sx_crtscts(tty)) port->MSVR |= MSVR_DTR; /* Else clause: see remark in "sx_throttle"... */ spin_lock_irqsave(&bp->lock, flags); @@ -2381,11 +2260,11 @@ static int __init specialix_init(void) printk(KERN_INFO "sx: Specialix IO8+ driver v" VERSION ", (c) R.E.Wolff 1997/1998.\n"); printk(KERN_INFO "sx: derived from work (c) D.Gorodchanin 1994-1996.\n"); -#ifdef CONFIG_SPECIALIX_RTSCTS - printk(KERN_INFO "sx: DTR/RTS pin is always RTS.\n"); -#else - printk(KERN_INFO "sx: DTR/RTS pin is RTS when CRTSCTS is on.\n"); -#endif + if (sx_rtscts) + printk(KERN_INFO + "sx: DTR/RTS pin is RTS when CRTSCTS is on.\n"); + else + printk(KERN_INFO "sx: DTR/RTS pin is always RTS.\n"); for (i = 0; i < SX_NBOARD; i++) spin_lock_init(&sx_board[i].lock); @@ -2443,16 +2322,13 @@ static int __init specialix_init(void) } static int iobase[SX_NBOARD] = {0,}; - -static int irq [SX_NBOARD] = {0,}; +static int irq[SX_NBOARD] = {0,}; module_param_array(iobase, int, NULL, 0); module_param_array(irq, int, NULL, 0); module_param(sx_debug, int, 0); +module_param(sx_rtscts, int, 0); module_param(sx_rxfifo, int, 0); -#ifdef SPECIALIX_TIMER -module_param(sx_poll, int, 0); -#endif /* * You can setup up to 4 boards. @@ -2492,10 +2368,6 @@ static void __exit specialix_exit_module(void) for (i = 0; i < SX_NBOARD; i++) if (sx_board[i].flags & SX_BOARD_PRESENT) sx_release_io_range(&sx_board[i]); -#ifdef SPECIALIX_TIMER - del_timer_sync(&missed_irq_timer); -#endif - func_exit(); } -- cgit v1.2.3 From c3570acb53c885f8accb8c85eda195781d30632f Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Fri, 11 Jul 2008 15:17:38 -0700 Subject: e1000: delete non NAPI code from the driver Compile-tested only. Signed-off-by: Francois Romieu Signed-off-by: Jeff Kirsher Signed-off-by: Jeff Garzik --- Documentation/networking/e1000.txt | 14 +--- drivers/net/Kconfig | 14 ---- drivers/net/e1000/e1000.h | 10 +-- drivers/net/e1000/e1000_main.c | 151 ++++--------------------------------- 4 files changed, 16 insertions(+), 173 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/e1000.txt b/Documentation/networking/e1000.txt index 61b171cf5313..2df71861e578 100644 --- a/Documentation/networking/e1000.txt +++ b/Documentation/networking/e1000.txt @@ -513,21 +513,11 @@ Additional Configurations Intel(R) PRO/1000 PT Dual Port Server Connection Intel(R) PRO/1000 PT Dual Port Server Adapter Intel(R) PRO/1000 PF Dual Port Server Adapter - Intel(R) PRO/1000 PT Quad Port Server Adapter + Intel(R) PRO/1000 PT Quad Port Server Adapter NAPI ---- - NAPI (Rx polling mode) is supported in the e1000 driver. NAPI is enabled - or disabled based on the configuration of the kernel. To override - the default, use the following compile-time flags. - - To enable NAPI, compile the driver module, passing in a configuration option: - - make CFLAGS_EXTRA=-DE1000_NAPI install - - To disable NAPI, compile the driver module, passing in a configuration option: - - make CFLAGS_EXTRA=-DE1000_NO_NAPI install + NAPI (Rx polling mode) is enabled in the e1000 driver. See www.cyberus.ca/~hadi/usenix-paper.tgz for more information on NAPI. diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 32b89b0975ae..fa533c27052a 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -1926,20 +1926,6 @@ config E1000 To compile this driver as a module, choose M here. The module will be called e1000. -config E1000_NAPI - bool "Use Rx Polling (NAPI)" - depends on E1000 - help - NAPI is a new driver API designed to reduce CPU and interrupt load - when the driver is receiving lots of packets from the card. It is - still somewhat experimental and thus not yet enabled by default. - - If your estimated Rx load is 10kpps or more, or if the card will be - deployed on potentially unfriendly networks (e.g. in a firewall), - then say Y here. - - If in doubt, say N. - config E1000_DISABLE_PACKET_SPLIT bool "Disable Packet Split for PCI express adapters" depends on E1000 diff --git a/drivers/net/e1000/e1000.h b/drivers/net/e1000/e1000.h index 3aeba9305dfa..583b823f760c 100644 --- a/drivers/net/e1000/e1000.h +++ b/drivers/net/e1000/e1000.h @@ -253,9 +253,7 @@ struct e1000_adapter { u16 link_speed; u16 link_duplex; spinlock_t stats_lock; -#ifdef CONFIG_E1000_NAPI spinlock_t tx_queue_lock; -#endif unsigned int total_tx_bytes; unsigned int total_tx_packets; unsigned int total_rx_bytes; @@ -293,22 +291,16 @@ struct e1000_adapter { bool detect_tx_hung; /* RX */ -#ifdef CONFIG_E1000_NAPI bool (*clean_rx)(struct e1000_adapter *adapter, struct e1000_rx_ring *rx_ring, int *work_done, int work_to_do); -#else - bool (*clean_rx)(struct e1000_adapter *adapter, - struct e1000_rx_ring *rx_ring); -#endif void (*alloc_rx_buf)(struct e1000_adapter *adapter, struct e1000_rx_ring *rx_ring, int cleaned_count); struct e1000_rx_ring *rx_ring; /* One per active queue */ -#ifdef CONFIG_E1000_NAPI struct napi_struct napi; struct net_device *polling_netdev; /* One per active queue */ -#endif + int num_tx_queues; int num_rx_queues; diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index b2c3e4cc0cb4..190352c6216e 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -31,12 +31,7 @@ char e1000_driver_name[] = "e1000"; static char e1000_driver_string[] = "Intel(R) PRO/1000 Network Driver"; -#ifndef CONFIG_E1000_NAPI -#define DRIVERNAPI -#else -#define DRIVERNAPI "-NAPI" -#endif -#define DRV_VERSION "7.3.20-k2"DRIVERNAPI +#define DRV_VERSION "7.3.20-k3-NAPI" const char e1000_driver_version[] = DRV_VERSION; static const char e1000_copyright[] = "Copyright (c) 1999-2006 Intel Corporation."; @@ -138,7 +133,6 @@ static irqreturn_t e1000_intr(int irq, void *data); static irqreturn_t e1000_intr_msi(int irq, void *data); static bool e1000_clean_tx_irq(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring); -#ifdef CONFIG_E1000_NAPI static int e1000_clean(struct napi_struct *napi, int budget); static bool e1000_clean_rx_irq(struct e1000_adapter *adapter, struct e1000_rx_ring *rx_ring, @@ -146,12 +140,6 @@ static bool e1000_clean_rx_irq(struct e1000_adapter *adapter, static bool e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, struct e1000_rx_ring *rx_ring, int *work_done, int work_to_do); -#else -static bool e1000_clean_rx_irq(struct e1000_adapter *adapter, - struct e1000_rx_ring *rx_ring); -static bool e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, - struct e1000_rx_ring *rx_ring); -#endif static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter, struct e1000_rx_ring *rx_ring, int cleaned_count); @@ -512,9 +500,8 @@ int e1000_up(struct e1000_adapter *adapter) clear_bit(__E1000_DOWN, &adapter->flags); -#ifdef CONFIG_E1000_NAPI napi_enable(&adapter->napi); -#endif + e1000_irq_enable(adapter); /* fire a link change interrupt to start the watchdog */ @@ -602,9 +589,8 @@ void e1000_down(struct e1000_adapter *adapter) * reschedule our watchdog timer */ set_bit(__E1000_DOWN, &adapter->flags); -#ifdef CONFIG_E1000_NAPI napi_disable(&adapter->napi); -#endif + e1000_irq_disable(adapter); del_timer_sync(&adapter->tx_fifo_stall_timer); @@ -966,9 +952,7 @@ static int __devinit e1000_probe(struct pci_dev *pdev, e1000_set_ethtool_ops(netdev); netdev->tx_timeout = &e1000_tx_timeout; netdev->watchdog_timeo = 5 * HZ; -#ifdef CONFIG_E1000_NAPI netif_napi_add(netdev, &adapter->napi, e1000_clean, 64); -#endif netdev->vlan_rx_register = e1000_vlan_rx_register; netdev->vlan_rx_add_vid = e1000_vlan_rx_add_vid; netdev->vlan_rx_kill_vid = e1000_vlan_rx_kill_vid; @@ -1207,16 +1191,12 @@ err_eeprom: if (hw->flash_address) iounmap(hw->flash_address); err_flashmap: -#ifdef CONFIG_E1000_NAPI for (i = 0; i < adapter->num_rx_queues; i++) dev_put(&adapter->polling_netdev[i]); -#endif kfree(adapter->tx_ring); kfree(adapter->rx_ring); -#ifdef CONFIG_E1000_NAPI kfree(adapter->polling_netdev); -#endif err_sw_init: iounmap(hw->hw_addr); err_ioremap: @@ -1244,9 +1224,7 @@ static void __devexit e1000_remove(struct pci_dev *pdev) struct net_device *netdev = pci_get_drvdata(pdev); struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; -#ifdef CONFIG_E1000_NAPI int i; -#endif cancel_work_sync(&adapter->reset_task); @@ -1256,10 +1234,8 @@ static void __devexit e1000_remove(struct pci_dev *pdev) * would have already happened in close and is redundant. */ e1000_release_hw_control(adapter); -#ifdef CONFIG_E1000_NAPI for (i = 0; i < adapter->num_rx_queues; i++) dev_put(&adapter->polling_netdev[i]); -#endif unregister_netdev(netdev); @@ -1268,9 +1244,7 @@ static void __devexit e1000_remove(struct pci_dev *pdev) kfree(adapter->tx_ring); kfree(adapter->rx_ring); -#ifdef CONFIG_E1000_NAPI kfree(adapter->polling_netdev); -#endif iounmap(hw->hw_addr); if (hw->flash_address) @@ -1296,9 +1270,7 @@ static int __devinit e1000_sw_init(struct e1000_adapter *adapter) struct e1000_hw *hw = &adapter->hw; struct net_device *netdev = adapter->netdev; struct pci_dev *pdev = adapter->pdev; -#ifdef CONFIG_E1000_NAPI int i; -#endif /* PCI config space info */ @@ -1356,14 +1328,12 @@ static int __devinit e1000_sw_init(struct e1000_adapter *adapter) return -ENOMEM; } -#ifdef CONFIG_E1000_NAPI for (i = 0; i < adapter->num_rx_queues; i++) { adapter->polling_netdev[i].priv = adapter; dev_hold(&adapter->polling_netdev[i]); set_bit(__LINK_STATE_START, &adapter->polling_netdev[i].state); } spin_lock_init(&adapter->tx_queue_lock); -#endif /* Explicitly disable IRQ since the NIC can be in any state. */ e1000_irq_disable(adapter); @@ -1398,7 +1368,6 @@ static int __devinit e1000_alloc_queues(struct e1000_adapter *adapter) return -ENOMEM; } -#ifdef CONFIG_E1000_NAPI adapter->polling_netdev = kcalloc(adapter->num_rx_queues, sizeof(struct net_device), GFP_KERNEL); @@ -1407,7 +1376,6 @@ static int __devinit e1000_alloc_queues(struct e1000_adapter *adapter) kfree(adapter->rx_ring); return -ENOMEM; } -#endif return E1000_SUCCESS; } @@ -1472,9 +1440,7 @@ static int e1000_open(struct net_device *netdev) /* From here on the code is the same as e1000_up() */ clear_bit(__E1000_DOWN, &adapter->flags); -#ifdef CONFIG_E1000_NAPI napi_enable(&adapter->napi); -#endif e1000_irq_enable(adapter); @@ -2069,11 +2035,9 @@ static void e1000_configure_rx(struct e1000_adapter *adapter) ctrl_ext = er32(CTRL_EXT); /* Reset delay timers after every interrupt */ ctrl_ext |= E1000_CTRL_EXT_INT_TIMER_CLR; -#ifdef CONFIG_E1000_NAPI /* Auto-Mask interrupts upon ICR access */ ctrl_ext |= E1000_CTRL_EXT_IAME; ew32(IAM, 0xffffffff); -#endif ew32(CTRL_EXT, ctrl_ext); E1000_WRITE_FLUSH(); } @@ -3777,9 +3741,6 @@ static irqreturn_t e1000_intr_msi(int irq, void *data) struct net_device *netdev = data; struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; -#ifndef CONFIG_E1000_NAPI - int i; -#endif u32 icr = er32(ICR); /* in NAPI mode read ICR disables interrupts using IAM */ @@ -3800,7 +3761,6 @@ static irqreturn_t e1000_intr_msi(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } -#ifdef CONFIG_E1000_NAPI if (likely(netif_rx_schedule_prep(netdev, &adapter->napi))) { adapter->total_tx_bytes = 0; adapter->total_tx_packets = 0; @@ -3809,20 +3769,6 @@ static irqreturn_t e1000_intr_msi(int irq, void *data) __netif_rx_schedule(netdev, &adapter->napi); } else e1000_irq_enable(adapter); -#else - adapter->total_tx_bytes = 0; - adapter->total_rx_bytes = 0; - adapter->total_tx_packets = 0; - adapter->total_rx_packets = 0; - - for (i = 0; i < E1000_MAX_INTR; i++) - if (unlikely(!adapter->clean_rx(adapter, adapter->rx_ring) & - !e1000_clean_tx_irq(adapter, adapter->tx_ring))) - break; - - if (likely(adapter->itr_setting & 3)) - e1000_set_itr(adapter); -#endif return IRQ_HANDLED; } @@ -3839,13 +3785,10 @@ static irqreturn_t e1000_intr(int irq, void *data) struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; u32 rctl, icr = er32(ICR); -#ifndef CONFIG_E1000_NAPI - int i; -#endif + if (unlikely(!icr)) return IRQ_NONE; /* Not our interrupt */ -#ifdef CONFIG_E1000_NAPI /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is * not set, then the adapter didn't send an interrupt */ if (unlikely(hw->mac_type >= e1000_82571 && @@ -3854,7 +3797,6 @@ static irqreturn_t e1000_intr(int irq, void *data) /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No * need for the IMC write */ -#endif if (unlikely(icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))) { hw->get_link_status = 1; @@ -3874,7 +3816,6 @@ static irqreturn_t e1000_intr(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } -#ifdef CONFIG_E1000_NAPI if (unlikely(hw->mac_type < e1000_82571)) { /* disable interrupts, without the synchronize_irq bit */ ew32(IMC, ~0); @@ -3890,46 +3831,14 @@ static irqreturn_t e1000_intr(int irq, void *data) /* this really should not happen! if it does it is basically a * bug, but not a hard error, so enable ints and continue */ e1000_irq_enable(adapter); -#else - /* Writing IMC and IMS is needed for 82547. - * Due to Hub Link bus being occupied, an interrupt - * de-assertion message is not able to be sent. - * When an interrupt assertion message is generated later, - * two messages are re-ordered and sent out. - * That causes APIC to think 82547 is in de-assertion - * state, while 82547 is in assertion state, resulting - * in dead lock. Writing IMC forces 82547 into - * de-assertion state. - */ - if (hw->mac_type == e1000_82547 || hw->mac_type == e1000_82547_rev_2) - ew32(IMC, ~0); - - adapter->total_tx_bytes = 0; - adapter->total_rx_bytes = 0; - adapter->total_tx_packets = 0; - adapter->total_rx_packets = 0; - - for (i = 0; i < E1000_MAX_INTR; i++) - if (unlikely(!adapter->clean_rx(adapter, adapter->rx_ring) & - !e1000_clean_tx_irq(adapter, adapter->tx_ring))) - break; - if (likely(adapter->itr_setting & 3)) - e1000_set_itr(adapter); - - if (hw->mac_type == e1000_82547 || hw->mac_type == e1000_82547_rev_2) - e1000_irq_enable(adapter); - -#endif return IRQ_HANDLED; } -#ifdef CONFIG_E1000_NAPI /** * e1000_clean - NAPI Rx polling callback * @adapter: board private structure **/ - static int e1000_clean(struct napi_struct *napi, int budget) { struct e1000_adapter *adapter = container_of(napi, struct e1000_adapter, napi); @@ -3966,12 +3875,10 @@ static int e1000_clean(struct napi_struct *napi, int budget) return work_done; } -#endif /** * e1000_clean_tx_irq - Reclaim resources after transmit completes * @adapter: board private structure **/ - static bool e1000_clean_tx_irq(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring) { @@ -3980,9 +3887,7 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter, struct e1000_tx_desc *tx_desc, *eop_desc; struct e1000_buffer *buffer_info; unsigned int i, eop; -#ifdef CONFIG_E1000_NAPI unsigned int count = 0; -#endif bool cleaned = false; unsigned int total_tx_bytes=0, total_tx_packets=0; @@ -4014,11 +3919,10 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter, eop = tx_ring->buffer_info[i].next_to_watch; eop_desc = E1000_TX_DESC(*tx_ring, eop); -#ifdef CONFIG_E1000_NAPI #define E1000_TX_WEIGHT 64 /* weight of a sort for tx, to avoid endless transmit cleanup */ - if (count++ == E1000_TX_WEIGHT) break; -#endif + if (count++ == E1000_TX_WEIGHT) + break; } tx_ring->next_to_clean = i; @@ -4131,14 +4035,9 @@ static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err, * e1000_clean_rx_irq - Send received data up the network stack; legacy * @adapter: board private structure **/ -#ifdef CONFIG_E1000_NAPI static bool e1000_clean_rx_irq(struct e1000_adapter *adapter, struct e1000_rx_ring *rx_ring, int *work_done, int work_to_do) -#else -static bool e1000_clean_rx_irq(struct e1000_adapter *adapter, - struct e1000_rx_ring *rx_ring) -#endif { struct e1000_hw *hw = &adapter->hw; struct net_device *netdev = adapter->netdev; @@ -4161,11 +4060,10 @@ static bool e1000_clean_rx_irq(struct e1000_adapter *adapter, struct sk_buff *skb; u8 status; -#ifdef CONFIG_E1000_NAPI if (*work_done >= work_to_do) break; (*work_done)++; -#endif + status = rx_desc->status; skb = buffer_info->skb; buffer_info->skb = NULL; @@ -4251,7 +4149,7 @@ static bool e1000_clean_rx_irq(struct e1000_adapter *adapter, le16_to_cpu(rx_desc->csum), skb); skb->protocol = eth_type_trans(skb, netdev); -#ifdef CONFIG_E1000_NAPI + if (unlikely(adapter->vlgrp && (status & E1000_RXD_STAT_VP))) { vlan_hwaccel_receive_skb(skb, adapter->vlgrp, @@ -4259,15 +4157,7 @@ static bool e1000_clean_rx_irq(struct e1000_adapter *adapter, } else { netif_receive_skb(skb); } -#else /* CONFIG_E1000_NAPI */ - if (unlikely(adapter->vlgrp && - (status & E1000_RXD_STAT_VP))) { - vlan_hwaccel_rx(skb, adapter->vlgrp, - le16_to_cpu(rx_desc->special)); - } else { - netif_rx(skb); - } -#endif /* CONFIG_E1000_NAPI */ + netdev->last_rx = jiffies; next_desc: @@ -4301,14 +4191,9 @@ next_desc: * @adapter: board private structure **/ -#ifdef CONFIG_E1000_NAPI static bool e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, struct e1000_rx_ring *rx_ring, int *work_done, int work_to_do) -#else -static bool e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, - struct e1000_rx_ring *rx_ring) -#endif { union e1000_rx_desc_packet_split *rx_desc, *next_rxd; struct net_device *netdev = adapter->netdev; @@ -4331,11 +4216,11 @@ static bool e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, while (staterr & E1000_RXD_STAT_DD) { ps_page = &rx_ring->ps_page[i]; ps_page_dma = &rx_ring->ps_page_dma[i]; -#ifdef CONFIG_E1000_NAPI + if (unlikely(*work_done >= work_to_do)) break; (*work_done)++; -#endif + skb = buffer_info->skb; /* in the packet split case this is header only */ @@ -4437,21 +4322,14 @@ copydone: if (likely(rx_desc->wb.upper.header_status & cpu_to_le16(E1000_RXDPS_HDRSTAT_HDRSP))) adapter->rx_hdr_split++; -#ifdef CONFIG_E1000_NAPI + if (unlikely(adapter->vlgrp && (staterr & E1000_RXD_STAT_VP))) { vlan_hwaccel_receive_skb(skb, adapter->vlgrp, le16_to_cpu(rx_desc->wb.middle.vlan)); } else { netif_receive_skb(skb); } -#else /* CONFIG_E1000_NAPI */ - if (unlikely(adapter->vlgrp && (staterr & E1000_RXD_STAT_VP))) { - vlan_hwaccel_rx(skb, adapter->vlgrp, - le16_to_cpu(rx_desc->wb.middle.vlan)); - } else { - netif_rx(skb); - } -#endif /* CONFIG_E1000_NAPI */ + netdev->last_rx = jiffies; next_desc: @@ -5218,9 +5096,6 @@ static void e1000_netpoll(struct net_device *netdev) disable_irq(adapter->pdev->irq); e1000_intr(adapter->pdev->irq, netdev); -#ifndef CONFIG_E1000_NAPI - adapter->clean_rx(adapter, adapter->rx_ring); -#endif enable_irq(adapter->pdev->irq); } #endif -- cgit v1.2.3 From 78305de2f99e9f43ab860dd95bb430b20e26c695 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 23 Apr 2008 07:20:41 -0400 Subject: Remove mention of semaphores from kernel-locking Since the consensus seems to be to eliminate semaphores where possible, we shouldn't be educating people about how to use them as locks. Use mutexes instead. Semaphores should be described in a separate document if we end up keeping them. Signed-off-by: Matthew Wilcox Acked-by: Rusty Russell --- Documentation/DocBook/kernel-locking.tmpl | 57 +++++++++++++------------------ 1 file changed, 24 insertions(+), 33 deletions(-) (limited to 'Documentation') diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl index 2510763295d0..084f6ad7b7a0 100644 --- a/Documentation/DocBook/kernel-locking.tmpl +++ b/Documentation/DocBook/kernel-locking.tmpl @@ -219,10 +219,10 @@ - Three Main Types of Kernel Locks: Spinlocks, Mutexes and Semaphores + Two Main Types of Kernel Locks: Spinlocks and Mutexes - There are three main types of kernel locks. The fundamental type + There are two main types of kernel locks. The fundamental type is the spinlock (include/asm/spinlock.h), which is a very simple single-holder lock: if you can't get the @@ -239,14 +239,6 @@ can't sleep (see ), and so have to use a spinlock instead. - - The third type is a semaphore - (include/linux/semaphore.h): it - can have more than one holder at any time (the number decided at - initialization time), although it is most commonly used as a - single-holder lock (a mutex). If you can't get a semaphore, your - task will be suspended and later on woken up - just like for mutexes. - Neither type of lock is recursive: see . @@ -278,7 +270,7 @@ - Semaphores still exist, because they are required for + Mutexes still exist, because they are required for synchronization between user contexts, as we will see below. @@ -289,18 +281,17 @@ If you have a data structure which is only ever accessed from - user context, then you can use a simple semaphore - (linux/linux/semaphore.h) to protect it. This - is the most trivial case: you initialize the semaphore to the number - of resources available (usually 1), and call - down_interruptible() to grab the semaphore, and - up() to release it. There is also a - down(), which should be avoided, because it + user context, then you can use a simple mutex + (include/linux/mutex.h) to protect it. This + is the most trivial case: you initialize the mutex. Then you can + call mutex_lock_interruptible() to grab the mutex, + and mutex_unlock() to release it. There is also a + mutex_lock(), which should be avoided, because it will not return if a signal is received. - Example: linux/net/core/netfilter.c allows + Example: net/netfilter/nf_sockopt.c allows registration of new setsockopt() and getsockopt() calls, with nf_register_sockopt(). Registration and @@ -515,7 +506,7 @@ If you are in a process context (any syscall) and want to - lock other process out, use a semaphore. You can take a semaphore + lock other process out, use a mutex. You can take a mutex and sleep (copy_from_user*( or kmalloc(x,GFP_KERNEL)). @@ -662,7 +653,7 @@ SLBH SLBH SLBH -DI +MLI None @@ -692,8 +683,8 @@ spin_lock_bh -DI -down_interruptible +MLI +mutex_lock_interruptible @@ -1310,7 +1301,7 @@ as Alan Cox says, Lock data, not code. There is a coding bug where a piece of code tries to grab a spinlock twice: it will spin forever, waiting for the lock to - be released (spinlocks, rwlocks and semaphores are not + be released (spinlocks, rwlocks and mutexes are not recursive in Linux). This is trivial to diagnose: not a stay-up-five-nights-talk-to-fluffy-code-bunnies kind of problem. @@ -1335,7 +1326,7 @@ as Alan Cox says, Lock data, not code. This complete lockup is easy to diagnose: on SMP boxes the - watchdog timer or compiling with DEBUG_SPINLOCKS set + watchdog timer or compiling with DEBUG_SPINLOCK set (include/linux/spinlock.h) will show this up immediately when it happens. @@ -1558,7 +1549,7 @@ the amount of locking which needs to be done. Read/Write Lock Variants - Both spinlocks and semaphores have read/write variants: + Both spinlocks and mutexes have read/write variants: rwlock_t and struct rw_semaphore. These divide users into two classes: the readers and the writers. If you are only reading the data, you can get a read lock, but to write to @@ -1681,7 +1672,7 @@ the amount of locking which needs to be done. #include <linux/slab.h> #include <linux/string.h> +#include <linux/rcupdate.h> - #include <linux/semaphore.h> + #include <linux/mutex.h> #include <asm/errno.h> struct object @@ -1913,7 +1904,7 @@ machines due to caching. - put_user() + put_user() @@ -1927,13 +1918,13 @@ machines due to caching. - down_interruptible() and - down() + mutex_lock_interruptible() and + mutex_lock() - There is a down_trylock() which can be + There is a mutex_trylock() which can be used inside interrupt context, as it will not sleep. - up() will also never sleep. + mutex_unlock() will also never sleep. @@ -2023,7 +2014,7 @@ machines due to caching. Prior to 2.5, or when CONFIG_PREEMPT is unset, processes in user context inside the kernel would not - preempt each other (ie. you had that CPU until you have it up, + preempt each other (ie. you had that CPU until you gave it up, except for interrupts). With the addition of CONFIG_PREEMPT in 2.5.4, this changed: when in user context, higher priority tasks can "cut in": spinlocks -- cgit v1.2.3 From 2351ec533ed0dd56052ab96988d2161d5ecc8ed9 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 24 Jul 2008 08:09:32 -0400 Subject: Remove asm/semaphore.h All users have now been converted to linux/semaphore.h and we don't need to keep these files around any longer. Signed-off-by: Matthew Wilcox --- Documentation/feature-removal-schedule.txt | 8 -------- include/asm-alpha/semaphore.h | 1 - include/asm-arm/semaphore.h | 1 - include/asm-avr32/semaphore.h | 1 - include/asm-blackfin/semaphore.h | 1 - include/asm-cris/semaphore.h | 1 - include/asm-frv/semaphore.h | 1 - include/asm-h8300/semaphore.h | 1 - include/asm-ia64/semaphore.h | 1 - include/asm-m32r/semaphore.h | 1 - include/asm-m68k/semaphore.h | 1 - include/asm-m68knommu/semaphore.h | 1 - include/asm-mips/semaphore.h | 1 - include/asm-mn10300/semaphore.h | 1 - include/asm-parisc/semaphore.h | 1 - include/asm-powerpc/semaphore.h | 1 - include/asm-s390/semaphore.h | 1 - include/asm-sh/semaphore.h | 1 - include/asm-sparc/semaphore.h | 1 - include/asm-sparc64/semaphore.h | 1 - include/asm-um/semaphore.h | 1 - include/asm-v850/semaphore.h | 1 - include/asm-x86/semaphore.h | 1 - include/asm-xtensa/semaphore.h | 1 - 24 files changed, 31 deletions(-) delete mode 100644 include/asm-alpha/semaphore.h delete mode 100644 include/asm-arm/semaphore.h delete mode 100644 include/asm-avr32/semaphore.h delete mode 100644 include/asm-blackfin/semaphore.h delete mode 100644 include/asm-cris/semaphore.h delete mode 100644 include/asm-frv/semaphore.h delete mode 100644 include/asm-h8300/semaphore.h delete mode 100644 include/asm-ia64/semaphore.h delete mode 100644 include/asm-m32r/semaphore.h delete mode 100644 include/asm-m68k/semaphore.h delete mode 100644 include/asm-m68knommu/semaphore.h delete mode 100644 include/asm-mips/semaphore.h delete mode 100644 include/asm-mn10300/semaphore.h delete mode 100644 include/asm-parisc/semaphore.h delete mode 100644 include/asm-powerpc/semaphore.h delete mode 100644 include/asm-s390/semaphore.h delete mode 100644 include/asm-sh/semaphore.h delete mode 100644 include/asm-sparc/semaphore.h delete mode 100644 include/asm-sparc64/semaphore.h delete mode 100644 include/asm-um/semaphore.h delete mode 100644 include/asm-v850/semaphore.h delete mode 100644 include/asm-x86/semaphore.h delete mode 100644 include/asm-xtensa/semaphore.h (limited to 'Documentation') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 9f73587219e8..09c4a1efb8e3 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -300,14 +300,6 @@ Who: ocfs2-devel@oss.oracle.com --------------------------- -What: asm/semaphore.h -When: 2.6.26 -Why: Implementation became generic; users should now include - linux/semaphore.h instead. -Who: Matthew Wilcox - ---------------------------- - What: SCTP_GET_PEER_ADDRS_NUM_OLD, SCTP_GET_PEER_ADDRS_OLD, SCTP_GET_LOCAL_ADDRS_NUM_OLD, SCTP_GET_LOCAL_ADDRS_OLD When: June 2009 diff --git a/include/asm-alpha/semaphore.h b/include/asm-alpha/semaphore.h deleted file mode 100644 index d9b2034ed1d2..000000000000 --- a/include/asm-alpha/semaphore.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-arm/semaphore.h b/include/asm-arm/semaphore.h deleted file mode 100644 index d9b2034ed1d2..000000000000 --- a/include/asm-arm/semaphore.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-avr32/semaphore.h b/include/asm-avr32/semaphore.h deleted file mode 100644 index d9b2034ed1d2..000000000000 --- a/include/asm-avr32/semaphore.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-blackfin/semaphore.h b/include/asm-blackfin/semaphore.h deleted file mode 100644 index d9b2034ed1d2..000000000000 --- a/include/asm-blackfin/semaphore.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-cris/semaphore.h b/include/asm-cris/semaphore.h deleted file mode 100644 index d9b2034ed1d2..000000000000 --- a/include/asm-cris/semaphore.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-frv/semaphore.h b/include/asm-frv/semaphore.h deleted file mode 100644 index d9b2034ed1d2..000000000000 --- a/include/asm-frv/semaphore.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-h8300/semaphore.h b/include/asm-h8300/semaphore.h deleted file mode 100644 index d9b2034ed1d2..000000000000 --- a/include/asm-h8300/semaphore.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-ia64/semaphore.h b/include/asm-ia64/semaphore.h deleted file mode 100644 index d9b2034ed1d2..000000000000 --- a/include/asm-ia64/semaphore.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-m32r/semaphore.h b/include/asm-m32r/semaphore.h deleted file mode 100644 index d9b2034ed1d2..000000000000 --- a/include/asm-m32r/semaphore.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-m68k/semaphore.h b/include/asm-m68k/semaphore.h deleted file mode 100644 index d9b2034ed1d2..000000000000 --- a/include/asm-m68k/semaphore.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-m68knommu/semaphore.h b/include/asm-m68knommu/semaphore.h deleted file mode 100644 index d9b2034ed1d2..000000000000 --- a/include/asm-m68knommu/semaphore.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-mips/semaphore.h b/include/asm-mips/semaphore.h deleted file mode 100644 index d9b2034ed1d2..000000000000 --- a/include/asm-mips/semaphore.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-mn10300/semaphore.h b/include/asm-mn10300/semaphore.h deleted file mode 100644 index d9b2034ed1d2..000000000000 --- a/include/asm-mn10300/semaphore.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-parisc/semaphore.h b/include/asm-parisc/semaphore.h deleted file mode 100644 index d9b2034ed1d2..000000000000 --- a/include/asm-parisc/semaphore.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-powerpc/semaphore.h b/include/asm-powerpc/semaphore.h deleted file mode 100644 index d9b2034ed1d2..000000000000 --- a/include/asm-powerpc/semaphore.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-s390/semaphore.h b/include/asm-s390/semaphore.h deleted file mode 100644 index d9b2034ed1d2..000000000000 --- a/include/asm-s390/semaphore.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-sh/semaphore.h b/include/asm-sh/semaphore.h deleted file mode 100644 index d9b2034ed1d2..000000000000 --- a/include/asm-sh/semaphore.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-sparc/semaphore.h b/include/asm-sparc/semaphore.h deleted file mode 100644 index d9b2034ed1d2..000000000000 --- a/include/asm-sparc/semaphore.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-sparc64/semaphore.h b/include/asm-sparc64/semaphore.h deleted file mode 100644 index 39362afde5fe..000000000000 --- a/include/asm-sparc64/semaphore.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-um/semaphore.h b/include/asm-um/semaphore.h deleted file mode 100644 index d9b2034ed1d2..000000000000 --- a/include/asm-um/semaphore.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-v850/semaphore.h b/include/asm-v850/semaphore.h deleted file mode 100644 index d9b2034ed1d2..000000000000 --- a/include/asm-v850/semaphore.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-x86/semaphore.h b/include/asm-x86/semaphore.h deleted file mode 100644 index d9b2034ed1d2..000000000000 --- a/include/asm-x86/semaphore.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-xtensa/semaphore.h b/include/asm-xtensa/semaphore.h deleted file mode 100644 index d9b2034ed1d2..000000000000 --- a/include/asm-xtensa/semaphore.h +++ /dev/null @@ -1 +0,0 @@ -#include -- cgit v1.2.3 From 6b74ab97bc12ce74acec900f1d89a4aee2e4d70d Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 23 Jul 2008 21:26:49 -0700 Subject: mm: add a basic debugging framework for memory initialisation Boot initialisation is very complex, with significant numbers of architecture-specific routines, hooks and code ordering. While significant amounts of the initialisation is architecture-independent, it trusts the data received from the architecture layer. This is a mistake, and has resulted in a number of difficult-to-diagnose bugs. This patchset adds some validation and tracing to memory initialisation. It also introduces a few basic defensive measures. The validation code can be explicitly disabled for embedded systems. This patch: Add additional debugging and verification code for memory initialisation. Once enabled, the verification checks are always run and when required additional debugging information may be outputted via a mminit_loglevel= command-line parameter. The verification code is placed in a new file mm/mm_init.c. Ideally other mm initialisation code will be moved here over time. Signed-off-by: Mel Gorman Cc: Christoph Lameter Cc: Andy Whitcroft Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 8 ++++++++ lib/Kconfig.debug | 12 ++++++++++++ mm/Makefile | 1 + mm/internal.h | 27 +++++++++++++++++++++++++++ mm/mm_init.c | 18 ++++++++++++++++++ mm/page_alloc.c | 22 +++++++++++++--------- 6 files changed, 79 insertions(+), 9 deletions(-) create mode 100644 mm/mm_init.c (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 47e7d8794fc6..5e20ccb5a736 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1225,6 +1225,14 @@ and is between 256 and 4096 characters. It is defined in the file mga= [HW,DRM] + mminit_loglevel= + [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this + parameter allows control of the logging verbosity for + the additional memory initialisation checks. A value + of 0 disables mminit logging and a level of 4 will + log everything. Information is printed at KERN_DEBUG + so loglevel=8 may also need to be specified. + mousedev.tap_time= [MOUSE] Maximum time between finger touching and leaving touchpad surface for touch to be considered diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 882c51048993..e1d4764435ed 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -505,6 +505,18 @@ config DEBUG_WRITECOUNT If unsure, say N. +config DEBUG_MEMORY_INIT + bool "Debug memory initialisation" if EMBEDDED + default !EMBEDDED + help + Enable this for additional checks during memory initialisation. + The sanity checks verify aspects of the VM such as the memory model + and other information provided by the architecture. Verbose + information will be printed at KERN_DEBUG loglevel depending + on the mminit_loglevel= command-line option. + + If unsure, say Y + config DEBUG_LIST bool "Debug linked list manipulation" depends on DEBUG_KERNEL diff --git a/mm/Makefile b/mm/Makefile index 18c143b3c46c..4bbc8f094ff0 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o obj-$(CONFIG_SLOB) += slob.o obj-$(CONFIG_SLAB) += slab.o +obj-$(CONFIG_DEBUG_MEMORY_INIT) += mm_init.o obj-$(CONFIG_SLUB) += slub.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_FS_XIP) += filemap_xip.o diff --git a/mm/internal.h b/mm/internal.h index 0034e947e4bc..a7ee05253294 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -59,4 +59,31 @@ static inline unsigned long page_order(struct page *page) #define __paginginit __init #endif +/* Memory initialisation debug and verification */ +enum mminit_level { + MMINIT_WARNING, + MMINIT_VERIFY, + MMINIT_TRACE +}; + +#ifdef CONFIG_DEBUG_MEMORY_INIT + +extern int mminit_loglevel; + +#define mminit_dprintk(level, prefix, fmt, arg...) \ +do { \ + if (level < mminit_loglevel) { \ + printk(level <= MMINIT_WARNING ? KERN_WARNING : KERN_DEBUG); \ + printk(KERN_CONT "mminit::" prefix " " fmt, ##arg); \ + } \ +} while (0) + +#else + +static inline void mminit_dprintk(enum mminit_level level, + const char *prefix, const char *fmt, ...) +{ +} + +#endif /* CONFIG_DEBUG_MEMORY_INIT */ #endif diff --git a/mm/mm_init.c b/mm/mm_init.c new file mode 100644 index 000000000000..c01d8dfec817 --- /dev/null +++ b/mm/mm_init.c @@ -0,0 +1,18 @@ +/* + * mm_init.c - Memory initialisation verification and debugging + * + * Copyright 2008 IBM Corporation, 2008 + * Author Mel Gorman + * + */ +#include +#include + +int __meminitdata mminit_loglevel; + +static __init int set_mminit_loglevel(char *str) +{ + get_option(&str, &mminit_loglevel); + return 0; +} +early_param("mminit_loglevel", set_mminit_loglevel); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 79ac4afc908c..0908352ba727 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2975,7 +2975,8 @@ void __init sparse_memory_present_with_active_regions(int nid) void __init push_node_boundaries(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn) { - printk(KERN_DEBUG "Entering push_node_boundaries(%u, %lu, %lu)\n", + mminit_dprintk(MMINIT_TRACE, "zoneboundary", + "Entering push_node_boundaries(%u, %lu, %lu)\n", nid, start_pfn, end_pfn); /* Initialise the boundary for this node if necessary */ @@ -2993,7 +2994,8 @@ void __init push_node_boundaries(unsigned int nid, static void __meminit account_node_boundary(unsigned int nid, unsigned long *start_pfn, unsigned long *end_pfn) { - printk(KERN_DEBUG "Entering account_node_boundary(%u, %lu, %lu)\n", + mminit_dprintk(MMINIT_TRACE, "zoneboundary", + "Entering account_node_boundary(%u, %lu, %lu)\n", nid, *start_pfn, *end_pfn); /* Return if boundary information has not been provided */ @@ -3368,8 +3370,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT; if (realsize >= memmap_pages) { realsize -= memmap_pages; - printk(KERN_DEBUG - " %s zone: %lu pages used for memmap\n", + mminit_dprintk(MMINIT_TRACE, "memmap_init", + "%s zone: %lu pages used for memmap\n", zone_names[j], memmap_pages); } else printk(KERN_WARNING @@ -3379,7 +3381,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, /* Account for reserved pages */ if (j == 0 && realsize > dma_reserve) { realsize -= dma_reserve; - printk(KERN_DEBUG " %s zone: %lu pages reserved\n", + mminit_dprintk(MMINIT_TRACE, "memmap_init", + "%s zone: %lu pages reserved\n", zone_names[0], dma_reserve); } @@ -3520,10 +3523,11 @@ void __init add_active_range(unsigned int nid, unsigned long start_pfn, { int i; - printk(KERN_DEBUG "Entering add_active_range(%d, %#lx, %#lx) " - "%d entries of %d used\n", - nid, start_pfn, end_pfn, - nr_nodemap_entries, MAX_ACTIVE_REGIONS); + mminit_dprintk(MMINIT_TRACE, "memory_register", + "Entering add_active_range(%d, %#lx, %#lx) " + "%d entries of %d used\n", + nid, start_pfn, end_pfn, + nr_nodemap_entries, MAX_ACTIVE_REGIONS); /* Merge with existing active regions if possible */ for (i = 0; i < nr_nodemap_entries; i++) { -- cgit v1.2.3 From 28b2ee20c7cba812b6f2ccf6d722cf86d00a84dc Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Wed, 23 Jul 2008 21:27:05 -0700 Subject: access_process_vm device memory infrastructure In order to be able to debug things like the X server and programs using the PPC Cell SPUs, the debugger needs to be able to access device memory through ptrace and /proc/pid/mem. This patch: Add the generic_access_phys access function and put the hooks in place to allow access_process_vm to access device or PPC Cell SPU memory. [riel@redhat.com: Add documentation for the vm_ops->access function] Signed-off-by: Rik van Riel Signed-off-by: Benjamin Herrensmidt Cc: Dave Airlie Cc: Hugh Dickins Cc: Paul Mackerras Cc: Arnd Bergmann Acked-by: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/Locking | 7 ++ arch/Kconfig | 3 + arch/x86/Kconfig | 1 + arch/x86/mm/ioremap.c | 8 +++ include/asm-x86/io_32.h | 2 + include/asm-x86/io_64.h | 2 + include/linux/mm.h | 8 +++ mm/memory.c | 131 ++++++++++++++++++++++++++++++++------ 8 files changed, 144 insertions(+), 18 deletions(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 8b22d7d8b991..680fb566b928 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -510,6 +510,7 @@ prototypes: void (*close)(struct vm_area_struct*); int (*fault)(struct vm_area_struct*, struct vm_fault *); int (*page_mkwrite)(struct vm_area_struct *, struct page *); + int (*access)(struct vm_area_struct *, unsigned long, void*, int, int); locking rules: BKL mmap_sem PageLocked(page) @@ -517,6 +518,7 @@ open: no yes close: no yes fault: no yes page_mkwrite: no yes no +access: no yes ->page_mkwrite() is called when a previously read-only page is about to become writeable. The file system is responsible for @@ -525,6 +527,11 @@ taking to lock out truncate, the page range should be verified to be within i_size. The page mapping should also be checked that it is not NULL. + ->access() is called when get_user_pages() fails in +acces_process_vm(), typically used to debug a process through +/proc/pid/mem or ptrace. This function is needed only for +VM_IO | VM_PFNMAP VMAs. + ================================================================================ Dubious stuff diff --git a/arch/Kconfig b/arch/Kconfig index 4d5ebbc1e72b..6093c0be58b0 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -31,6 +31,9 @@ config KRETPROBES def_bool y depends on KPROBES && HAVE_KRETPROBES +config HAVE_IOREMAP_PROT + def_bool n + config HAVE_KPROBES def_bool n diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 03980cb04291..b2ddfcf01728 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -21,6 +21,7 @@ config X86 select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_IDE select HAVE_OPROFILE + select HAVE_IOREMAP_PROT select HAVE_KPROBES select HAVE_KRETPROBES select HAVE_DYNAMIC_FTRACE diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 24c1d3c30186..016f335bbeea 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -330,6 +330,14 @@ static void __iomem *ioremap_default(resource_size_t phys_addr, return (void __iomem *)ret; } +void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, + unsigned long prot_val) +{ + return __ioremap_caller(phys_addr, size, (prot_val & _PAGE_CACHE_MASK), + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_prot); + /** * iounmap - Free a IO remapping * @addr: virtual address from ioremap_* diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h index 4df44ed54077..e876d89ac156 100644 --- a/include/asm-x86/io_32.h +++ b/include/asm-x86/io_32.h @@ -110,6 +110,8 @@ static inline void *phys_to_virt(unsigned long address) */ extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); +extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, + unsigned long prot_val); /* * The default ioremap() behavior is non-cached: diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h index ddd8058a5026..22995c5c5adc 100644 --- a/include/asm-x86/io_64.h +++ b/include/asm-x86/io_64.h @@ -175,6 +175,8 @@ extern void early_iounmap(void *addr, unsigned long size); */ extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); +extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, + unsigned long prot_val); /* * The default ioremap() behavior is non-cached: diff --git a/include/linux/mm.h b/include/linux/mm.h index eb815cfc1b35..5c7f8f64f70e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -170,6 +170,12 @@ struct vm_operations_struct { /* notification that a previously read-only page is about to become * writable, if an error is returned it will cause a SIGBUS */ int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page); + + /* called by access_process_vm when get_user_pages() fails, typically + * for use by special VMAs that can switch between memory and hardware + */ + int (*access)(struct vm_area_struct *vma, unsigned long addr, + void *buf, int len, int write); #ifdef CONFIG_NUMA /* * set_policy() op must add a reference to any non-NULL @new mempolicy @@ -771,6 +777,8 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma); void unmap_mapping_range(struct address_space *mapping, loff_t const holebegin, loff_t const holelen, int even_cows); +int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, + void *buf, int len, int write); static inline void unmap_shared_mapping_range(struct address_space *mapping, loff_t const holebegin, loff_t const holelen) diff --git a/mm/memory.c b/mm/memory.c index 46dbed4b7446..87350321e66f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2751,6 +2751,86 @@ int in_gate_area_no_task(unsigned long addr) #endif /* __HAVE_ARCH_GATE_AREA */ +#ifdef CONFIG_HAVE_IOREMAP_PROT +static resource_size_t follow_phys(struct vm_area_struct *vma, + unsigned long address, unsigned int flags, + unsigned long *prot) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *ptep, pte; + spinlock_t *ptl; + resource_size_t phys_addr = 0; + struct mm_struct *mm = vma->vm_mm; + + VM_BUG_ON(!(vma->vm_flags & (VM_IO | VM_PFNMAP))); + + pgd = pgd_offset(mm, address); + if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) + goto no_page_table; + + pud = pud_offset(pgd, address); + if (pud_none(*pud) || unlikely(pud_bad(*pud))) + goto no_page_table; + + pmd = pmd_offset(pud, address); + if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) + goto no_page_table; + + /* We cannot handle huge page PFN maps. Luckily they don't exist. */ + if (pmd_huge(*pmd)) + goto no_page_table; + + ptep = pte_offset_map_lock(mm, pmd, address, &ptl); + if (!ptep) + goto out; + + pte = *ptep; + if (!pte_present(pte)) + goto unlock; + if ((flags & FOLL_WRITE) && !pte_write(pte)) + goto unlock; + phys_addr = pte_pfn(pte); + phys_addr <<= PAGE_SHIFT; /* Shift here to avoid overflow on PAE */ + + *prot = pgprot_val(pte_pgprot(pte)); + +unlock: + pte_unmap_unlock(ptep, ptl); +out: + return phys_addr; +no_page_table: + return 0; +} + +int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, + void *buf, int len, int write) +{ + resource_size_t phys_addr; + unsigned long prot = 0; + void *maddr; + int offset = addr & (PAGE_SIZE-1); + + if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) + return -EINVAL; + + phys_addr = follow_phys(vma, addr, write, &prot); + + if (!phys_addr) + return -EINVAL; + + maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot); + if (write) + memcpy_toio(maddr + offset, buf, len); + else + memcpy_fromio(buf, maddr + offset, len); + iounmap(maddr); + + return len; +} +#endif + /* * Access another process' address space. * Source/target buffer must be kernel space, @@ -2760,7 +2840,6 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in { struct mm_struct *mm; struct vm_area_struct *vma; - struct page *page; void *old_buf = buf; mm = get_task_mm(tsk); @@ -2772,28 +2851,44 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in while (len) { int bytes, ret, offset; void *maddr; + struct page *page = NULL; ret = get_user_pages(tsk, mm, addr, 1, write, 1, &page, &vma); - if (ret <= 0) - break; - - bytes = len; - offset = addr & (PAGE_SIZE-1); - if (bytes > PAGE_SIZE-offset) - bytes = PAGE_SIZE-offset; - - maddr = kmap(page); - if (write) { - copy_to_user_page(vma, page, addr, - maddr + offset, buf, bytes); - set_page_dirty_lock(page); + if (ret <= 0) { + /* + * Check if this is a VM_IO | VM_PFNMAP VMA, which + * we can access using slightly different code. + */ +#ifdef CONFIG_HAVE_IOREMAP_PROT + vma = find_vma(mm, addr); + if (!vma) + break; + if (vma->vm_ops && vma->vm_ops->access) + ret = vma->vm_ops->access(vma, addr, buf, + len, write); + if (ret <= 0) +#endif + break; + bytes = ret; } else { - copy_from_user_page(vma, page, addr, - buf, maddr + offset, bytes); + bytes = len; + offset = addr & (PAGE_SIZE-1); + if (bytes > PAGE_SIZE-offset) + bytes = PAGE_SIZE-offset; + + maddr = kmap(page); + if (write) { + copy_to_user_page(vma, page, addr, + maddr + offset, buf, bytes); + set_page_dirty_lock(page); + } else { + copy_from_user_page(vma, page, addr, + buf, maddr + offset, bytes); + } + kunmap(page); + page_cache_release(page); } - kunmap(page); - page_cache_release(page); len -= bytes; buf += bytes; addr += bytes; -- cgit v1.2.3 From a47a126ad5ea072aca3e611ed8f8dc6adad24bab Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Jul 2008 21:27:38 -0700 Subject: vmallocinfo: add NUMA information Christoph recently added /proc/vmallocinfo file to get information about vmalloc allocations. This patch adds NUMA specific information, giving number of pages allocated on each memory node. This should help to check that vmalloc() is able to respect NUMA policies. Example of output on a four nodes machine (one cpu per node) 1) network hash tables are evenly spreaded on four nodes (OK) (Same point for inodes and dentries hash tables) 2) iptables tables (x_tables) are correctly allocated on each cpu node (OK). 3) sys_swapon() allocates its memory from one node only. 4) each loaded module is using memory on one node. Sysadmins could tune their setup to change points 3) and 4) if necessary. grep "pages=" /proc/vmallocinfo 0xffffc20000000000-0xffffc20000201000 2101248 alloc_large_system_hash+0x204/0x2c0 pages=512 vmalloc N0=128 N1=128 N2=128 N3=128 0xffffc20000201000-0xffffc20000302000 1052672 alloc_large_system_hash+0x204/0x2c0 pages=256 vmalloc N0=64 N1=64 N2=64 N3=64 0xffffc2000031a000-0xffffc2000031d000 12288 alloc_large_system_hash+0x204/0x2c0 pages=2 vmalloc N1=1 N2=1 0xffffc2000031f000-0xffffc2000032b000 49152 cramfs_uncompress_init+0x2e/0x80 pages=11 vmalloc N0=3 N1=3 N2=2 N3=3 0xffffc2000033e000-0xffffc20000341000 12288 sys_swapon+0x640/0xac0 pages=2 vmalloc N0=2 0xffffc20000341000-0xffffc20000344000 12288 xt_alloc_table_info+0xfe/0x130 [x_tables] pages=2 vmalloc N0=2 0xffffc20000344000-0xffffc20000347000 12288 xt_alloc_table_info+0xfe/0x130 [x_tables] pages=2 vmalloc N1=2 0xffffc20000347000-0xffffc2000034a000 12288 xt_alloc_table_info+0xfe/0x130 [x_tables] pages=2 vmalloc N2=2 0xffffc2000034a000-0xffffc2000034d000 12288 xt_alloc_table_info+0xfe/0x130 [x_tables] pages=2 vmalloc N3=2 0xffffc20004381000-0xffffc20004402000 528384 alloc_large_system_hash+0x204/0x2c0 pages=128 vmalloc N0=32 N1=32 N2=32 N3=32 0xffffc20004402000-0xffffc20004803000 4198400 alloc_large_system_hash+0x204/0x2c0 pages=1024 vmalloc vpages N0=256 N1=256 N2=256 N3=256 0xffffc20004803000-0xffffc20004904000 1052672 alloc_large_system_hash+0x204/0x2c0 pages=256 vmalloc N0=64 N1=64 N2=64 N3=64 0xffffc20004904000-0xffffc20004bec000 3047424 sys_swapon+0x640/0xac0 pages=743 vmalloc vpages N0=743 0xffffffffa0000000-0xffffffffa000f000 61440 sys_init_module+0xc27/0x1d00 pages=14 vmalloc N1=14 0xffffffffa000f000-0xffffffffa0014000 20480 sys_init_module+0xc27/0x1d00 pages=4 vmalloc N0=4 0xffffffffa0014000-0xffffffffa0017000 12288 sys_init_module+0xc27/0x1d00 pages=2 vmalloc N0=2 0xffffffffa0017000-0xffffffffa0022000 45056 sys_init_module+0xc27/0x1d00 pages=10 vmalloc N1=10 0xffffffffa0022000-0xffffffffa0028000 24576 sys_init_module+0xc27/0x1d00 pages=5 vmalloc N3=5 0xffffffffa0028000-0xffffffffa0050000 163840 sys_init_module+0xc27/0x1d00 pages=39 vmalloc N1=39 0xffffffffa0050000-0xffffffffa0052000 8192 sys_init_module+0xc27/0x1d00 pages=1 vmalloc N1=1 0xffffffffa0052000-0xffffffffa0056000 16384 sys_init_module+0xc27/0x1d00 pages=3 vmalloc N1=3 0xffffffffa0056000-0xffffffffa0081000 176128 sys_init_module+0xc27/0x1d00 pages=42 vmalloc N3=42 0xffffffffa0081000-0xffffffffa00ae000 184320 sys_init_module+0xc27/0x1d00 pages=44 vmalloc N3=44 0xffffffffa00ae000-0xffffffffa00b1000 12288 sys_init_module+0xc27/0x1d00 pages=2 vmalloc N3=2 0xffffffffa00b1000-0xffffffffa00b9000 32768 sys_init_module+0xc27/0x1d00 pages=7 vmalloc N0=7 0xffffffffa00b9000-0xffffffffa00c4000 45056 sys_init_module+0xc27/0x1d00 pages=10 vmalloc N3=10 0xffffffffa00c6000-0xffffffffa00e0000 106496 sys_init_module+0xc27/0x1d00 pages=25 vmalloc N2=25 0xffffffffa00e0000-0xffffffffa00f1000 69632 sys_init_module+0xc27/0x1d00 pages=16 vmalloc N2=16 0xffffffffa00f1000-0xffffffffa00f4000 12288 sys_init_module+0xc27/0x1d00 pages=2 vmalloc N3=2 0xffffffffa00f4000-0xffffffffa00f7000 12288 sys_init_module+0xc27/0x1d00 pages=2 vmalloc N3=2 [akpm@linux-foundation.org: fix comment] Signed-off-by: Eric Dumazet Cc: Christoph Lameter Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.txt | 44 ++++++++++++++++++++++++++++++++++++++ fs/proc/proc_misc.c | 15 +++++++++++-- mm/vmalloc.c | 20 +++++++++++++++++ 3 files changed, 77 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 7f268f327d75..8c6384bdfed4 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -296,6 +296,7 @@ Table 1-4: Kernel info in /proc uptime System uptime version Kernel version video bttv info of video resources (2.4) + vmallocinfo Show vmalloced areas .............................................................................. You can, for example, check which interrupts are currently in use and what @@ -557,6 +558,49 @@ VmallocTotal: total size of vmalloc memory area VmallocUsed: amount of vmalloc area which is used VmallocChunk: largest contigious block of vmalloc area which is free +.............................................................................. + +vmallocinfo: + +Provides information about vmalloced/vmaped areas. One line per area, +containing the virtual address range of the area, size in bytes, +caller information of the creator, and optional information depending +on the kind of area : + + pages=nr number of pages + phys=addr if a physical address was specified + ioremap I/O mapping (ioremap() and friends) + vmalloc vmalloc() area + vmap vmap()ed pages + user VM_USERMAP area + vpages buffer for pages pointers was vmalloced (huge area) + N=nr (Only on NUMA kernels) + Number of pages allocated on memory node + +> cat /proc/vmallocinfo +0xffffc20000000000-0xffffc20000201000 2101248 alloc_large_system_hash+0x204 ... + /0x2c0 pages=512 vmalloc N0=128 N1=128 N2=128 N3=128 +0xffffc20000201000-0xffffc20000302000 1052672 alloc_large_system_hash+0x204 ... + /0x2c0 pages=256 vmalloc N0=64 N1=64 N2=64 N3=64 +0xffffc20000302000-0xffffc20000304000 8192 acpi_tb_verify_table+0x21/0x4f... + phys=7fee8000 ioremap +0xffffc20000304000-0xffffc20000307000 12288 acpi_tb_verify_table+0x21/0x4f... + phys=7fee7000 ioremap +0xffffc2000031d000-0xffffc2000031f000 8192 init_vdso_vars+0x112/0x210 +0xffffc2000031f000-0xffffc2000032b000 49152 cramfs_uncompress_init+0x2e ... + /0x80 pages=11 vmalloc N0=3 N1=3 N2=2 N3=3 +0xffffc2000033a000-0xffffc2000033d000 12288 sys_swapon+0x640/0xac0 ... + pages=2 vmalloc N1=2 +0xffffc20000347000-0xffffc2000034c000 20480 xt_alloc_table_info+0xfe ... + /0x130 [x_tables] pages=4 vmalloc N0=4 +0xffffffffa0000000-0xffffffffa000f000 61440 sys_init_module+0xc27/0x1d00 ... + pages=14 vmalloc N2=14 +0xffffffffa000f000-0xffffffffa0014000 20480 sys_init_module+0xc27/0x1d00 ... + pages=4 vmalloc N1=4 +0xffffffffa0014000-0xffffffffa0017000 12288 sys_init_module+0xc27/0x1d00 ... + pages=2 vmalloc N1=2 +0xffffffffa0017000-0xffffffffa0022000 45056 sys_init_module+0xc27/0x1d00 ... + pages=10 vmalloc N0=10 1.3 IDE devices in /proc/ide ---------------------------- diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index b14f43d25e9e..ded969862960 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -464,14 +464,25 @@ static const struct file_operations proc_slabstats_operations = { #ifdef CONFIG_MMU static int vmalloc_open(struct inode *inode, struct file *file) { - return seq_open(file, &vmalloc_op); + unsigned int *ptr = NULL; + int ret; + + if (NUMA_BUILD) + ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL); + ret = seq_open(file, &vmalloc_op); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = ptr; + } else + kfree(ptr); + return ret; } static const struct file_operations proc_vmalloc_operations = { .open = vmalloc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_private, }; #endif diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 6e45b0f3d125..35f293816294 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -931,6 +931,25 @@ static void s_stop(struct seq_file *m, void *p) read_unlock(&vmlist_lock); } +static void show_numa_info(struct seq_file *m, struct vm_struct *v) +{ + if (NUMA_BUILD) { + unsigned int nr, *counters = m->private; + + if (!counters) + return; + + memset(counters, 0, nr_node_ids * sizeof(unsigned int)); + + for (nr = 0; nr < v->nr_pages; nr++) + counters[page_to_nid(v->pages[nr])]++; + + for_each_node_state(nr, N_HIGH_MEMORY) + if (counters[nr]) + seq_printf(m, " N%u=%u", nr, counters[nr]); + } +} + static int s_show(struct seq_file *m, void *p) { struct vm_struct *v = p; @@ -967,6 +986,7 @@ static int s_show(struct seq_file *m, void *p) if (v->flags & VM_VPAGES) seq_printf(m, " vpages"); + show_numa_info(m, v); seq_putc(m, '\n'); return 0; } -- cgit v1.2.3 From ff7ea79cf7c3a481851bd4b2185fdeb6ce4afa29 Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Wed, 23 Jul 2008 21:27:39 -0700 Subject: mm: create /sys/kernel/mm Add a kobject to create /sys/kernel/mm when sysfs is mounted. The kobject will exist regardless. This will allow for the hugepage related sysfs directories to exist under the mm "subsystem" directory. Add an ABI file appropriately. [kosaki.motohiro@jp.fujitsu.com: fix build] Signed-off-by: Nishanth Aravamudan Cc: Nick Piggin Cc: Mel Gorman Signed-off-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/ABI/testing/sysfs-kernel-mm | 6 ++++++ include/linux/kobject.h | 2 ++ mm/mm_init.c | 16 ++++++++++++++++ 3 files changed, 24 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-kernel-mm (limited to 'Documentation') diff --git a/Documentation/ABI/testing/sysfs-kernel-mm b/Documentation/ABI/testing/sysfs-kernel-mm new file mode 100644 index 000000000000..190d523ac159 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-mm @@ -0,0 +1,6 @@ +What: /sys/kernel/mm +Date: July 2008 +Contact: Nishanth Aravamudan , VM maintainers +Description: + /sys/kernel/mm/ should contain any and all VM + related information in /sys/kernel/. diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 60f0d418ae32..5437ac0276e2 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -186,6 +186,8 @@ extern struct kobject *kset_find_obj(struct kset *, const char *); /* The global /sys/kernel/ kobject for people to chain off of */ extern struct kobject *kernel_kobj; +/* The global /sys/kernel/mm/ kobject for people to chain off of */ +extern struct kobject *mm_kobj; /* The global /sys/hypervisor/ kobject for people to chain off of */ extern struct kobject *hypervisor_kobj; /* The global /sys/power/ kobject for people to chain off of */ diff --git a/mm/mm_init.c b/mm/mm_init.c index eaf0d3b47099..c6af41ea9994 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -7,6 +7,8 @@ */ #include #include +#include +#include #include "internal.h" #ifdef CONFIG_DEBUG_MEMORY_INIT @@ -134,3 +136,17 @@ static __init int set_mminit_loglevel(char *str) } early_param("mminit_loglevel", set_mminit_loglevel); #endif /* CONFIG_DEBUG_MEMORY_INIT */ + +struct kobject *mm_kobj; +EXPORT_SYMBOL_GPL(mm_kobj); + +static int __init mm_sysfs_init(void) +{ + mm_kobj = kobject_create_and_add("mm", kernel_kobj); + if (!mm_kobj) + return -ENOMEM; + + return 0; +} + +__initcall(mm_sysfs_init); -- cgit v1.2.3 From a3437870160cf2caaac6bdd76c7377a5a4145a8c Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Wed, 23 Jul 2008 21:27:44 -0700 Subject: hugetlb: new sysfs interface Provide new hugepages user APIs that are more suited to multiple hstates in sysfs. There is a new directory, /sys/kernel/hugepages. Underneath that directory there will be a directory per-supported hugepage size, e.g.: /sys/kernel/hugepages/hugepages-64kB /sys/kernel/hugepages/hugepages-16384kB /sys/kernel/hugepages/hugepages-16777216kB corresponding to 64k, 16m and 16g respectively. Within each hugepages-size directory there are a number of files, corresponding to the tracked counters in the hstate, e.g.: /sys/kernel/hugepages/hugepages-64/nr_hugepages /sys/kernel/hugepages/hugepages-64/nr_overcommit_hugepages /sys/kernel/hugepages/hugepages-64/free_hugepages /sys/kernel/hugepages/hugepages-64/resv_hugepages /sys/kernel/hugepages/hugepages-64/surplus_hugepages Of these files, the first two are read-write and the latter three are read-only. The size of the hugepage being manipulated is trivially deducible from the enclosing directory and is always expressed in kB (to match meminfo). [dave@linux.vnet.ibm.com: fix build] [nacc@us.ibm.com: hugetlb: hang off of /sys/kernel/mm rather than /sys/kernel] [nacc@us.ibm.com: hugetlb: remove CONFIG_SYSFS dependency] Acked-by: Greg Kroah-Hartman Signed-off-by: Nishanth Aravamudan Signed-off-by: Nick Piggin Cc: Dave Hansen Signed-off-by: Nishanth Aravamudan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .../ABI/testing/sysfs-kernel-mm-hugepages | 15 ++ Documentation/vm/hugetlbpage.txt | 23 ++ include/linux/hugetlb.h | 2 + mm/hugetlb.c | 288 ++++++++++++++++----- 4 files changed, 262 insertions(+), 66 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-kernel-mm-hugepages (limited to 'Documentation') diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-hugepages b/Documentation/ABI/testing/sysfs-kernel-mm-hugepages new file mode 100644 index 000000000000..e21c00571cf4 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-mm-hugepages @@ -0,0 +1,15 @@ +What: /sys/kernel/mm/hugepages/ +Date: June 2008 +Contact: Nishanth Aravamudan , hugetlb maintainers +Description: + /sys/kernel/mm/hugepages/ contains a number of subdirectories + of the form hugepages-kB, where is the page size + of the hugepages supported by the kernel/CPU combination. + + Under these directories are a number of files: + nr_hugepages + nr_overcommit_hugepages + free_hugepages + surplus_hugepages + resv_hugepages + See Documentation/vm/hugetlbpage.txt for details. diff --git a/Documentation/vm/hugetlbpage.txt b/Documentation/vm/hugetlbpage.txt index 3102b81bef88..8a5b5763f0fe 100644 --- a/Documentation/vm/hugetlbpage.txt +++ b/Documentation/vm/hugetlbpage.txt @@ -95,6 +95,29 @@ this condition holds, however, no more surplus huge pages will be allowed on the system until one of the two sysctls are increased sufficiently, or the surplus huge pages go out of use and are freed. +With support for multiple hugepage pools at run-time available, much of +the hugepage userspace interface has been duplicated in sysfs. The above +information applies to the default hugepage size (which will be +controlled by the proc interfaces for backwards compatibility). The root +hugepage control directory is + + /sys/kernel/mm/hugepages + +For each hugepage size supported by the running kernel, a subdirectory +will exist, of the form + + hugepages-${size}kB + +Inside each of these directories, the same set of files will exist: + + nr_hugepages + nr_overcommit_hugepages + free_hugepages + resv_hugepages + surplus_hugepages + +which function as described above for the default hugepage-sized case. + If the user applications are going to request hugepages using mmap system call, then it is required that system administrator mount a file system of type hugetlbfs: diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index ba9263e631b9..58c0de32e7f0 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -164,6 +164,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, #ifdef CONFIG_HUGETLB_PAGE +#define HSTATE_NAME_LEN 32 /* Defines one hugetlb page size */ struct hstate { int hugetlb_next_nid; @@ -179,6 +180,7 @@ struct hstate { unsigned int nr_huge_pages_node[MAX_NUMNODES]; unsigned int free_huge_pages_node[MAX_NUMNODES]; unsigned int surplus_huge_pages_node[MAX_NUMNODES]; + char name[HSTATE_NAME_LEN]; }; void __init hugetlb_add_hstate(unsigned order); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 4cf7a90e9140..bb49ce5d0067 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -942,72 +943,6 @@ static void __init report_hugepages(void) } } -static int __init hugetlb_init(void) -{ - BUILD_BUG_ON(HPAGE_SHIFT == 0); - - if (!size_to_hstate(HPAGE_SIZE)) { - hugetlb_add_hstate(HUGETLB_PAGE_ORDER); - parsed_hstate->max_huge_pages = default_hstate_max_huge_pages; - } - default_hstate_idx = size_to_hstate(HPAGE_SIZE) - hstates; - - hugetlb_init_hstates(); - - report_hugepages(); - - return 0; -} -module_init(hugetlb_init); - -/* Should be called on processing a hugepagesz=... option */ -void __init hugetlb_add_hstate(unsigned order) -{ - struct hstate *h; - if (size_to_hstate(PAGE_SIZE << order)) { - printk(KERN_WARNING "hugepagesz= specified twice, ignoring\n"); - return; - } - BUG_ON(max_hstate >= HUGE_MAX_HSTATE); - BUG_ON(order == 0); - h = &hstates[max_hstate++]; - h->order = order; - h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1); - hugetlb_init_one_hstate(h); - parsed_hstate = h; -} - -static int __init hugetlb_setup(char *s) -{ - unsigned long *mhp; - - /* - * !max_hstate means we haven't parsed a hugepagesz= parameter yet, - * so this hugepages= parameter goes to the "default hstate". - */ - if (!max_hstate) - mhp = &default_hstate_max_huge_pages; - else - mhp = &parsed_hstate->max_huge_pages; - - if (sscanf(s, "%lu", mhp) <= 0) - *mhp = 0; - - return 1; -} -__setup("hugepages=", hugetlb_setup); - -static unsigned int cpuset_mems_nr(unsigned int *array) -{ - int node; - unsigned int nr = 0; - - for_each_node_mask(node, cpuset_current_mems_allowed) - nr += array[node]; - - return nr; -} - #ifdef CONFIG_SYSCTL #ifdef CONFIG_HIGHMEM static void try_to_free_low(struct hstate *h, unsigned long count) @@ -1105,6 +1040,227 @@ out: return ret; } +#define HSTATE_ATTR_RO(_name) \ + static struct kobj_attribute _name##_attr = __ATTR_RO(_name) + +#define HSTATE_ATTR(_name) \ + static struct kobj_attribute _name##_attr = \ + __ATTR(_name, 0644, _name##_show, _name##_store) + +static struct kobject *hugepages_kobj; +static struct kobject *hstate_kobjs[HUGE_MAX_HSTATE]; + +static struct hstate *kobj_to_hstate(struct kobject *kobj) +{ + int i; + for (i = 0; i < HUGE_MAX_HSTATE; i++) + if (hstate_kobjs[i] == kobj) + return &hstates[i]; + BUG(); + return NULL; +} + +static ssize_t nr_hugepages_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct hstate *h = kobj_to_hstate(kobj); + return sprintf(buf, "%lu\n", h->nr_huge_pages); +} +static ssize_t nr_hugepages_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + int err; + unsigned long input; + struct hstate *h = kobj_to_hstate(kobj); + + err = strict_strtoul(buf, 10, &input); + if (err) + return 0; + + h->max_huge_pages = set_max_huge_pages(h, input); + + return count; +} +HSTATE_ATTR(nr_hugepages); + +static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct hstate *h = kobj_to_hstate(kobj); + return sprintf(buf, "%lu\n", h->nr_overcommit_huge_pages); +} +static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + int err; + unsigned long input; + struct hstate *h = kobj_to_hstate(kobj); + + err = strict_strtoul(buf, 10, &input); + if (err) + return 0; + + spin_lock(&hugetlb_lock); + h->nr_overcommit_huge_pages = input; + spin_unlock(&hugetlb_lock); + + return count; +} +HSTATE_ATTR(nr_overcommit_hugepages); + +static ssize_t free_hugepages_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct hstate *h = kobj_to_hstate(kobj); + return sprintf(buf, "%lu\n", h->free_huge_pages); +} +HSTATE_ATTR_RO(free_hugepages); + +static ssize_t resv_hugepages_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct hstate *h = kobj_to_hstate(kobj); + return sprintf(buf, "%lu\n", h->resv_huge_pages); +} +HSTATE_ATTR_RO(resv_hugepages); + +static ssize_t surplus_hugepages_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct hstate *h = kobj_to_hstate(kobj); + return sprintf(buf, "%lu\n", h->surplus_huge_pages); +} +HSTATE_ATTR_RO(surplus_hugepages); + +static struct attribute *hstate_attrs[] = { + &nr_hugepages_attr.attr, + &nr_overcommit_hugepages_attr.attr, + &free_hugepages_attr.attr, + &resv_hugepages_attr.attr, + &surplus_hugepages_attr.attr, + NULL, +}; + +static struct attribute_group hstate_attr_group = { + .attrs = hstate_attrs, +}; + +static int __init hugetlb_sysfs_add_hstate(struct hstate *h) +{ + int retval; + + hstate_kobjs[h - hstates] = kobject_create_and_add(h->name, + hugepages_kobj); + if (!hstate_kobjs[h - hstates]) + return -ENOMEM; + + retval = sysfs_create_group(hstate_kobjs[h - hstates], + &hstate_attr_group); + if (retval) + kobject_put(hstate_kobjs[h - hstates]); + + return retval; +} + +static void __init hugetlb_sysfs_init(void) +{ + struct hstate *h; + int err; + + hugepages_kobj = kobject_create_and_add("hugepages", mm_kobj); + if (!hugepages_kobj) + return; + + for_each_hstate(h) { + err = hugetlb_sysfs_add_hstate(h); + if (err) + printk(KERN_ERR "Hugetlb: Unable to add hstate %s", + h->name); + } +} + +static void __exit hugetlb_exit(void) +{ + struct hstate *h; + + for_each_hstate(h) { + kobject_put(hstate_kobjs[h - hstates]); + } + + kobject_put(hugepages_kobj); +} +module_exit(hugetlb_exit); + +static int __init hugetlb_init(void) +{ + BUILD_BUG_ON(HPAGE_SHIFT == 0); + + if (!size_to_hstate(HPAGE_SIZE)) { + hugetlb_add_hstate(HUGETLB_PAGE_ORDER); + parsed_hstate->max_huge_pages = default_hstate_max_huge_pages; + } + default_hstate_idx = size_to_hstate(HPAGE_SIZE) - hstates; + + hugetlb_init_hstates(); + + report_hugepages(); + + hugetlb_sysfs_init(); + + return 0; +} +module_init(hugetlb_init); + +/* Should be called on processing a hugepagesz=... option */ +void __init hugetlb_add_hstate(unsigned order) +{ + struct hstate *h; + if (size_to_hstate(PAGE_SIZE << order)) { + printk(KERN_WARNING "hugepagesz= specified twice, ignoring\n"); + return; + } + BUG_ON(max_hstate >= HUGE_MAX_HSTATE); + BUG_ON(order == 0); + h = &hstates[max_hstate++]; + h->order = order; + h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1); + snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB", + huge_page_size(h)/1024); + hugetlb_init_one_hstate(h); + parsed_hstate = h; +} + +static int __init hugetlb_setup(char *s) +{ + unsigned long *mhp; + + /* + * !max_hstate means we haven't parsed a hugepagesz= parameter yet, + * so this hugepages= parameter goes to the "default hstate". + */ + if (!max_hstate) + mhp = &default_hstate_max_huge_pages; + else + mhp = &parsed_hstate->max_huge_pages; + + if (sscanf(s, "%lu", mhp) <= 0) + *mhp = 0; + + return 1; +} +__setup("hugepages=", hugetlb_setup); + +static unsigned int cpuset_mems_nr(unsigned int *array) +{ + int node; + unsigned int nr = 0; + + for_each_node_mask(node, cpuset_current_mems_allowed) + nr += array[node]; + + return nr; +} + int hugetlb_sysctl_handler(struct ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, loff_t *ppos) -- cgit v1.2.3 From b4718e628dbf68a2dee23b5709e2aa3190409c56 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 23 Jul 2008 21:27:51 -0700 Subject: x86: add hugepagesz option on 64-bit Add an hugepagesz=... option similar to IA64, PPC etc. to x86-64. This finally allows to select GB pages for hugetlbfs in x86 now that all the infrastructure is in place. Signed-off-by: Andi Kleen Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 11 +++++++++-- arch/x86/mm/hugetlbpage.c | 17 +++++++++++++++++ include/asm-x86/page.h | 2 ++ 3 files changed, 28 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 5e20ccb5a736..d55fd88fd0a9 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -774,8 +774,15 @@ and is between 256 and 4096 characters. It is defined in the file hisax= [HW,ISDN] See Documentation/isdn/README.HiSax. - hugepages= [HW,X86-32,IA-64] Maximal number of HugeTLB pages. - hugepagesz= [HW,IA-64,PPC] The size of the HugeTLB pages. + hugepages= [HW,X86-32,IA-64] HugeTLB pages to allocate at boot. + hugepagesz= [HW,IA-64,PPC,X86-64] The size of the HugeTLB pages. + On x86 this option can be specified multiple times + interleaved with hugepages= to reserve huge pages + of different sizes. Valid pages sizes on x86-64 + are 2M (when the CPU supports "pse") and 1G (when the + CPU supports the "pdpe1gb" cpuinfo flag) + Note that 1GB pages can only be allocated at boot time + using hugepages= and not freed afterwards. i8042.direct [HW] Put keyboard port into non-translated mode i8042.dumbkbd [HW] Pretend that controller can only read data from diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index b7a65a07af03..8f307d914c2e 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -425,3 +425,20 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, #endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/ +#ifdef CONFIG_X86_64 +static __init int setup_hugepagesz(char *opt) +{ + unsigned long ps = memparse(opt, &opt); + if (ps == PMD_SIZE) { + hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); + } else if (ps == PUD_SIZE && cpu_has_gbpages) { + hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); + } else { + printk(KERN_ERR "hugepagesz: Unsupported page size %lu M\n", + ps >> 20); + return 0; + } + return 1; +} +__setup("hugepagesz=", setup_hugepagesz); +#endif diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h index 6c846228948d..6e02098b1605 100644 --- a/include/asm-x86/page.h +++ b/include/asm-x86/page.h @@ -32,6 +32,8 @@ #define HPAGE_MASK (~(HPAGE_SIZE - 1)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) +#define HUGE_MAX_HSTATE 2 + /* to align the pointer to the (next) page boundary */ #define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) -- cgit v1.2.3 From e11bfbfcb08ef4223b863799897c19cdf7c5bc00 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 23 Jul 2008 21:27:52 -0700 Subject: hugetlb: override default huge page size Allow configurations with the default huge page size which is different to the traditional HPAGE_SIZE size. The default huge page size is the one represented in the legacy /proc ABIs, SHM, and which is defaulted to when mounting hugetlbfs filesystems. This is implemented with a new kernel option default_hugepagesz=, which defaults to HPAGE_SIZE if not specified. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 7 +++++++ mm/hugetlb.c | 23 +++++++++++++++++------ 2 files changed, 24 insertions(+), 6 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index d55fd88fd0a9..30278e9e5211 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -783,6 +783,13 @@ and is between 256 and 4096 characters. It is defined in the file CPU supports the "pdpe1gb" cpuinfo flag) Note that 1GB pages can only be allocated at boot time using hugepages= and not freed afterwards. + default_hugepagesz= + [same as hugepagesz=] The size of the default + HugeTLB page size. This is the size represented by + the legacy /proc/ hugepages APIs, used for SHM, and + default size when mounting hugetlbfs filesystems. + Defaults to the default architecture's huge page size + if not specified. i8042.direct [HW] Put keyboard port into non-translated mode i8042.dumbkbd [HW] Pretend that controller can only read data from diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 107c1ce223cb..2a2f6e869401 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -34,6 +34,7 @@ struct hstate hstates[HUGE_MAX_HSTATE]; /* for command line parsing */ static struct hstate * __initdata parsed_hstate; static unsigned long __initdata default_hstate_max_huge_pages; +static unsigned long __initdata default_hstate_size; #define for_each_hstate(h) \ for ((h) = hstates; (h) < &hstates[max_hstate]; (h)++) @@ -1288,11 +1289,14 @@ static int __init hugetlb_init(void) { BUILD_BUG_ON(HPAGE_SHIFT == 0); - if (!size_to_hstate(HPAGE_SIZE)) { - hugetlb_add_hstate(HUGETLB_PAGE_ORDER); - parsed_hstate->max_huge_pages = default_hstate_max_huge_pages; + if (!size_to_hstate(default_hstate_size)) { + default_hstate_size = HPAGE_SIZE; + if (!size_to_hstate(default_hstate_size)) + hugetlb_add_hstate(HUGETLB_PAGE_ORDER); } - default_hstate_idx = size_to_hstate(HPAGE_SIZE) - hstates; + default_hstate_idx = size_to_hstate(default_hstate_size) - hstates; + if (default_hstate_max_huge_pages) + default_hstate.max_huge_pages = default_hstate_max_huge_pages; hugetlb_init_hstates(); @@ -1332,7 +1336,7 @@ void __init hugetlb_add_hstate(unsigned order) parsed_hstate = h; } -static int __init hugetlb_setup(char *s) +static int __init hugetlb_nrpages_setup(char *s) { unsigned long *mhp; static unsigned long *last_mhp; @@ -1367,7 +1371,14 @@ static int __init hugetlb_setup(char *s) return 1; } -__setup("hugepages=", hugetlb_setup); +__setup("hugepages=", hugetlb_nrpages_setup); + +static int __init hugetlb_default_setup(char *s) +{ + default_hstate_size = memparse(s, &s); + return 1; +} +__setup("default_hugepagesz=", hugetlb_default_setup); static unsigned int cpuset_mems_nr(unsigned int *array) { -- cgit v1.2.3 From 0d9ea75443dc7e37843e656b8ebc947a6d16d618 Mon Sep 17 00:00:00 2001 From: Jon Tollefson Date: Wed, 23 Jul 2008 21:27:56 -0700 Subject: powerpc: support multiple hugepage sizes Instead of using the variable mmu_huge_psize to keep track of the huge page size we use an array of MMU_PAGE_* values. For each supported huge page size we need to know the hugepte_shift value and have a pgtable_cache. The hstate or an mmu_huge_psizes index is passed to functions so that they know which huge page size they should use. The hugepage sizes 16M and 64K are setup(if available on the hardware) so that they don't have to be set on the boot cmd line in order to use them. The number of 16G pages have to be specified at boot-time though (e.g. hugepagesz=16G hugepages=5). Signed-off-by: Jon Tollefson Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 10 +- arch/powerpc/mm/hash_utils_64.c | 9 +- arch/powerpc/mm/hugetlbpage.c | 274 +++++++++++++++++++++++------------- arch/powerpc/mm/init_64.c | 8 +- arch/powerpc/mm/tlb_64.c | 2 +- include/asm-powerpc/hugetlb.h | 5 +- include/asm-powerpc/mmu-hash64.h | 4 +- include/asm-powerpc/page_64.h | 1 + include/asm-powerpc/pgalloc-64.h | 4 +- 9 files changed, 199 insertions(+), 118 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 30278e9e5211..01a2992b5754 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -776,11 +776,11 @@ and is between 256 and 4096 characters. It is defined in the file hugepages= [HW,X86-32,IA-64] HugeTLB pages to allocate at boot. hugepagesz= [HW,IA-64,PPC,X86-64] The size of the HugeTLB pages. - On x86 this option can be specified multiple times - interleaved with hugepages= to reserve huge pages - of different sizes. Valid pages sizes on x86-64 - are 2M (when the CPU supports "pse") and 1G (when the - CPU supports the "pdpe1gb" cpuinfo flag) + On x86-64 and powerpc, this option can be specified + multiple times interleaved with hugepages= to reserve + huge pages of different sizes. Valid pages sizes on + x86-64 are 2M (when the CPU supports "pse") and 1G + (when the CPU supports the "pdpe1gb" cpuinfo flag) Note that 1GB pages can only be allocated at boot time using hugepages= and not freed afterwards. default_hugepagesz= diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index ae4c717243a5..5ce5a4dcd008 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -103,7 +103,6 @@ int mmu_kernel_ssize = MMU_SEGSIZE_256M; int mmu_highuser_ssize = MMU_SEGSIZE_256M; u16 mmu_slb_size = 64; #ifdef CONFIG_HUGETLB_PAGE -int mmu_huge_psize = MMU_PAGE_16M; unsigned int HPAGE_SHIFT; #endif #ifdef CONFIG_PPC_64K_PAGES @@ -460,15 +459,15 @@ static void __init htab_init_page_sizes(void) /* Reserve 16G huge page memory sections for huge pages */ of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL); -/* Init large page size. Currently, we pick 16M or 1M depending +/* Set default large page size. Currently, we pick 16M or 1M depending * on what is available */ if (mmu_psize_defs[MMU_PAGE_16M].shift) - set_huge_psize(MMU_PAGE_16M); + HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift; /* With 4k/4level pagetables, we can't (for now) cope with a * huge page size < PMD_SIZE */ else if (mmu_psize_defs[MMU_PAGE_1M].shift) - set_huge_psize(MMU_PAGE_1M); + HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift; #endif /* CONFIG_HUGETLB_PAGE */ } @@ -889,7 +888,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) #ifdef CONFIG_HUGETLB_PAGE /* Handle hugepage regions */ - if (HPAGE_SHIFT && psize == mmu_huge_psize) { + if (HPAGE_SHIFT && mmu_huge_psizes[psize]) { DBG_LOW(" -> huge page !\n"); return hash_huge_page(mm, access, ea, vsid, local, trap); } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 19b1a9cec6d5..fb42c4dd3217 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -37,15 +37,30 @@ static unsigned long gpage_freearray[MAX_NUMBER_GPAGES]; static unsigned nr_gpages; -unsigned int hugepte_shift; -#define PTRS_PER_HUGEPTE (1 << hugepte_shift) -#define HUGEPTE_TABLE_SIZE (sizeof(pte_t) << hugepte_shift) +/* Array of valid huge page sizes - non-zero value(hugepte_shift) is + * stored for the huge page sizes that are valid. + */ +unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */ + +#define hugepte_shift mmu_huge_psizes +#define PTRS_PER_HUGEPTE(psize) (1 << hugepte_shift[psize]) +#define HUGEPTE_TABLE_SIZE(psize) (sizeof(pte_t) << hugepte_shift[psize]) + +#define HUGEPD_SHIFT(psize) (mmu_psize_to_shift(psize) \ + + hugepte_shift[psize]) +#define HUGEPD_SIZE(psize) (1UL << HUGEPD_SHIFT(psize)) +#define HUGEPD_MASK(psize) (~(HUGEPD_SIZE(psize)-1)) -#define HUGEPD_SHIFT (HPAGE_SHIFT + hugepte_shift) -#define HUGEPD_SIZE (1UL << HUGEPD_SHIFT) -#define HUGEPD_MASK (~(HUGEPD_SIZE-1)) +/* Subtract one from array size because we don't need a cache for 4K since + * is not a huge page size */ +#define huge_pgtable_cache(psize) (pgtable_cache[HUGEPTE_CACHE_NUM \ + + psize-1]) +#define HUGEPTE_CACHE_NAME(psize) (huge_pgtable_cache_name[psize]) -#define huge_pgtable_cache (pgtable_cache[HUGEPTE_CACHE_NUM]) +static const char *huge_pgtable_cache_name[MMU_PAGE_COUNT] = { + "unused_4K", "hugepte_cache_64K", "unused_64K_AP", + "hugepte_cache_1M", "hugepte_cache_16M", "hugepte_cache_16G" +}; /* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() * will choke on pointers to hugepte tables, which is handy for @@ -56,24 +71,49 @@ typedef struct { unsigned long pd; } hugepd_t; #define hugepd_none(hpd) ((hpd).pd == 0) +static inline int shift_to_mmu_psize(unsigned int shift) +{ + switch (shift) { +#ifndef CONFIG_PPC_64K_PAGES + case PAGE_SHIFT_64K: + return MMU_PAGE_64K; +#endif + case PAGE_SHIFT_16M: + return MMU_PAGE_16M; + case PAGE_SHIFT_16G: + return MMU_PAGE_16G; + } + return -1; +} + +static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) +{ + if (mmu_psize_defs[mmu_psize].shift) + return mmu_psize_defs[mmu_psize].shift; + BUG(); +} + static inline pte_t *hugepd_page(hugepd_t hpd) { BUG_ON(!(hpd.pd & HUGEPD_OK)); return (pte_t *)(hpd.pd & ~HUGEPD_OK); } -static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr) +static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, + struct hstate *hstate) { - unsigned long idx = ((addr >> HPAGE_SHIFT) & (PTRS_PER_HUGEPTE-1)); + unsigned int shift = huge_page_shift(hstate); + int psize = shift_to_mmu_psize(shift); + unsigned long idx = ((addr >> shift) & (PTRS_PER_HUGEPTE(psize)-1)); pte_t *dir = hugepd_page(*hpdp); return dir + idx; } static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, - unsigned long address) + unsigned long address, unsigned int psize) { - pte_t *new = kmem_cache_alloc(huge_pgtable_cache, + pte_t *new = kmem_cache_alloc(huge_pgtable_cache(psize), GFP_KERNEL|__GFP_REPEAT); if (! new) @@ -81,7 +121,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, spin_lock(&mm->page_table_lock); if (!hugepd_none(*hpdp)) - kmem_cache_free(huge_pgtable_cache, new); + kmem_cache_free(huge_pgtable_cache(psize), new); else hpdp->pd = (unsigned long)new | HUGEPD_OK; spin_unlock(&mm->page_table_lock); @@ -90,21 +130,22 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, /* Base page size affects how we walk hugetlb page tables */ #ifdef CONFIG_PPC_64K_PAGES -#define hpmd_offset(pud, addr) pmd_offset(pud, addr) -#define hpmd_alloc(mm, pud, addr) pmd_alloc(mm, pud, addr) +#define hpmd_offset(pud, addr, h) pmd_offset(pud, addr) +#define hpmd_alloc(mm, pud, addr, h) pmd_alloc(mm, pud, addr) #else static inline -pmd_t *hpmd_offset(pud_t *pud, unsigned long addr) +pmd_t *hpmd_offset(pud_t *pud, unsigned long addr, struct hstate *hstate) { - if (HPAGE_SHIFT == PAGE_SHIFT_64K) + if (huge_page_shift(hstate) == PAGE_SHIFT_64K) return pmd_offset(pud, addr); else return (pmd_t *) pud; } static inline -pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr) +pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr, + struct hstate *hstate) { - if (HPAGE_SHIFT == PAGE_SHIFT_64K) + if (huge_page_shift(hstate) == PAGE_SHIFT_64K) return pmd_alloc(mm, pud, addr); else return (pmd_t *) pud; @@ -128,8 +169,9 @@ void add_gpage(unsigned long addr, unsigned long page_size, } /* Moves the gigantic page addresses from the temporary list to the - * huge_boot_pages list. */ -int alloc_bootmem_huge_page(struct hstate *h) + * huge_boot_pages list. + */ +int alloc_bootmem_huge_page(struct hstate *hstate) { struct huge_bootmem_page *m; if (nr_gpages == 0) @@ -137,7 +179,7 @@ int alloc_bootmem_huge_page(struct hstate *h) m = phys_to_virt(gpage_freearray[--nr_gpages]); gpage_freearray[nr_gpages] = 0; list_add(&m->list, &huge_boot_pages); - m->hstate = h; + m->hstate = hstate; return 1; } @@ -149,17 +191,25 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) pud_t *pu; pmd_t *pm; - BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); + unsigned int psize; + unsigned int shift; + unsigned long sz; + struct hstate *hstate; + psize = get_slice_psize(mm, addr); + shift = mmu_psize_to_shift(psize); + sz = ((1UL) << shift); + hstate = size_to_hstate(sz); - addr &= HPAGE_MASK; + addr &= hstate->mask; pg = pgd_offset(mm, addr); if (!pgd_none(*pg)) { pu = pud_offset(pg, addr); if (!pud_none(*pu)) { - pm = hpmd_offset(pu, addr); + pm = hpmd_offset(pu, addr, hstate); if (!pmd_none(*pm)) - return hugepte_offset((hugepd_t *)pm, addr); + return hugepte_offset((hugepd_t *)pm, addr, + hstate); } } @@ -173,16 +223,20 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, pud_t *pu; pmd_t *pm; hugepd_t *hpdp = NULL; + struct hstate *hstate; + unsigned int psize; + hstate = size_to_hstate(sz); - BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); + psize = get_slice_psize(mm, addr); + BUG_ON(!mmu_huge_psizes[psize]); - addr &= HPAGE_MASK; + addr &= hstate->mask; pg = pgd_offset(mm, addr); pu = pud_alloc(mm, pg, addr); if (pu) { - pm = hpmd_alloc(mm, pu, addr); + pm = hpmd_alloc(mm, pu, addr, hstate); if (pm) hpdp = (hugepd_t *)pm; } @@ -190,10 +244,10 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, if (! hpdp) return NULL; - if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr)) + if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, psize)) return NULL; - return hugepte_offset(hpdp, addr); + return hugepte_offset(hpdp, addr, hstate); } int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) @@ -201,19 +255,22 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) return 0; } -static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp) +static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp, + unsigned int psize) { pte_t *hugepte = hugepd_page(*hpdp); hpdp->pd = 0; tlb->need_flush = 1; - pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, HUGEPTE_CACHE_NUM, + pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, + HUGEPTE_CACHE_NUM+psize-1, PGF_CACHENUM_MASK)); } static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, unsigned long addr, unsigned long end, - unsigned long floor, unsigned long ceiling) + unsigned long floor, unsigned long ceiling, + unsigned int psize) { pmd_t *pmd; unsigned long next; @@ -225,7 +282,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, next = pmd_addr_end(addr, end); if (pmd_none(*pmd)) continue; - free_hugepte_range(tlb, (hugepd_t *)pmd); + free_hugepte_range(tlb, (hugepd_t *)pmd, psize); } while (pmd++, addr = next, addr != end); start &= PUD_MASK; @@ -251,6 +308,9 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, pud_t *pud; unsigned long next; unsigned long start; + unsigned int shift; + unsigned int psize = get_slice_psize(tlb->mm, addr); + shift = mmu_psize_to_shift(psize); start = addr; pud = pud_offset(pgd, addr); @@ -259,16 +319,18 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, #ifdef CONFIG_PPC_64K_PAGES if (pud_none_or_clear_bad(pud)) continue; - hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling); + hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling, + psize); #else - if (HPAGE_SHIFT == PAGE_SHIFT_64K) { + if (shift == PAGE_SHIFT_64K) { if (pud_none_or_clear_bad(pud)) continue; - hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling); + hugetlb_free_pmd_range(tlb, pud, addr, next, floor, + ceiling, psize); } else { if (pud_none(*pud)) continue; - free_hugepte_range(tlb, (hugepd_t *)pud); + free_hugepte_range(tlb, (hugepd_t *)pud, psize); } #endif } while (pud++, addr = next, addr != end); @@ -336,27 +398,29 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, * now has no other vmas using it, so can be freed, we don't * bother to round floor or end up - the tests don't need that. */ + unsigned int psize = get_slice_psize(tlb->mm, addr); - addr &= HUGEPD_MASK; + addr &= HUGEPD_MASK(psize); if (addr < floor) { - addr += HUGEPD_SIZE; + addr += HUGEPD_SIZE(psize); if (!addr) return; } if (ceiling) { - ceiling &= HUGEPD_MASK; + ceiling &= HUGEPD_MASK(psize); if (!ceiling) return; } if (end - 1 > ceiling - 1) - end -= HUGEPD_SIZE; + end -= HUGEPD_SIZE(psize); if (addr > end - 1) return; start = addr; pgd = pgd_offset(tlb->mm, addr); do { - BUG_ON(get_slice_psize(tlb->mm, addr) != mmu_huge_psize); + psize = get_slice_psize(tlb->mm, addr); + BUG_ON(!mmu_huge_psizes[psize]); next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; @@ -373,7 +437,11 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, * necessary anymore if we make hpte_need_flush() get the * page size from the slices */ - pte_update(mm, addr & HPAGE_MASK, ptep, ~0UL, 1); + unsigned int psize = get_slice_psize(mm, addr); + unsigned int shift = mmu_psize_to_shift(psize); + unsigned long sz = ((1UL) << shift); + struct hstate *hstate = size_to_hstate(sz); + pte_update(mm, addr & hstate->mask, ptep, ~0UL, 1); } *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); } @@ -390,14 +458,19 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) { pte_t *ptep; struct page *page; + unsigned int mmu_psize = get_slice_psize(mm, address); - if (get_slice_psize(mm, address) != mmu_huge_psize) + /* Verify it is a huge page else bail. */ + if (!mmu_huge_psizes[mmu_psize]) return ERR_PTR(-EINVAL); ptep = huge_pte_offset(mm, address); page = pte_page(*ptep); - if (page) - page += (address % HPAGE_SIZE) / PAGE_SIZE; + if (page) { + unsigned int shift = mmu_psize_to_shift(mmu_psize); + unsigned long sz = ((1UL) << shift); + page += (address % sz) / PAGE_SIZE; + } return page; } @@ -425,15 +498,16 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { - return slice_get_unmapped_area(addr, len, flags, - mmu_huge_psize, 1, 0); + struct hstate *hstate = hstate_file(file); + int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); + return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); } /* * Called by asm hashtable.S for doing lazy icache flush */ static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, - pte_t pte, int trap) + pte_t pte, int trap, unsigned long sz) { struct page *page; int i; @@ -446,7 +520,7 @@ static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, /* page is dirty */ if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { if (trap == 0x400) { - for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) + for (i = 0; i < (sz / PAGE_SIZE); i++) __flush_dcache_icache(page_address(page+i)); set_bit(PG_arch_1, &page->flags); } else { @@ -462,11 +536,16 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, { pte_t *ptep; unsigned long old_pte, new_pte; - unsigned long va, rflags, pa; + unsigned long va, rflags, pa, sz; long slot; int err = 1; int ssize = user_segment_size(ea); + unsigned int mmu_psize; + int shift; + mmu_psize = get_slice_psize(mm, ea); + if (!mmu_huge_psizes[mmu_psize]) + goto out; ptep = huge_pte_offset(mm, ea); /* Search the Linux page table for a match with va */ @@ -509,30 +588,32 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, rflags = 0x2 | (!(new_pte & _PAGE_RW)); /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); + shift = mmu_psize_to_shift(mmu_psize); + sz = ((1UL) << shift); if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) /* No CPU has hugepages but lacks no execute, so we * don't need to worry about that case */ rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte), - trap); + trap, sz); /* Check if pte already has an hpte (case 2) */ if (unlikely(old_pte & _PAGE_HASHPTE)) { /* There MIGHT be an HPTE for this pte */ unsigned long hash, slot; - hash = hpt_hash(va, HPAGE_SHIFT, ssize); + hash = hpt_hash(va, shift, ssize); if (old_pte & _PAGE_F_SECOND) hash = ~hash; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += (old_pte & _PAGE_F_GIX) >> 12; - if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_huge_psize, + if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_psize, ssize, local) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } if (likely(!(old_pte & _PAGE_HASHPTE))) { - unsigned long hash = hpt_hash(va, HPAGE_SHIFT, ssize); + unsigned long hash = hpt_hash(va, shift, ssize); unsigned long hpte_group; pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; @@ -553,7 +634,7 @@ repeat: /* Insert into the hash table, primary slot */ slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0, - mmu_huge_psize, ssize); + mmu_psize, ssize); /* Primary is full, try the secondary */ if (unlikely(slot == -1)) { @@ -561,7 +642,7 @@ repeat: HPTES_PER_GROUP) & ~0x7UL; slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, HPTE_V_SECONDARY, - mmu_huge_psize, ssize); + mmu_psize, ssize); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * @@ -598,66 +679,50 @@ void set_huge_psize(int psize) (mmu_psize_defs[psize].shift > MIN_HUGEPTE_SHIFT || mmu_psize_defs[psize].shift == PAGE_SHIFT_64K || mmu_psize_defs[psize].shift == PAGE_SHIFT_16G)) { - /* Return if huge page size is the same as the - * base page size. */ - if (mmu_psize_defs[psize].shift == PAGE_SHIFT) + /* Return if huge page size has already been setup or is the + * same as the base page size. */ + if (mmu_huge_psizes[psize] || + mmu_psize_defs[psize].shift == PAGE_SHIFT) return; + hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT); - HPAGE_SHIFT = mmu_psize_defs[psize].shift; - mmu_huge_psize = psize; - - switch (HPAGE_SHIFT) { + switch (mmu_psize_defs[psize].shift) { case PAGE_SHIFT_64K: /* We only allow 64k hpages with 4k base page, * which was checked above, and always put them * at the PMD */ - hugepte_shift = PMD_SHIFT; + hugepte_shift[psize] = PMD_SHIFT; break; case PAGE_SHIFT_16M: /* 16M pages can be at two different levels * of pagestables based on base page size */ if (PAGE_SHIFT == PAGE_SHIFT_64K) - hugepte_shift = PMD_SHIFT; + hugepte_shift[psize] = PMD_SHIFT; else /* 4k base page */ - hugepte_shift = PUD_SHIFT; + hugepte_shift[psize] = PUD_SHIFT; break; case PAGE_SHIFT_16G: /* 16G pages are always at PGD level */ - hugepte_shift = PGDIR_SHIFT; + hugepte_shift[psize] = PGDIR_SHIFT; break; } - hugepte_shift -= HPAGE_SHIFT; + hugepte_shift[psize] -= mmu_psize_defs[psize].shift; } else - HPAGE_SHIFT = 0; + hugepte_shift[psize] = 0; } static int __init hugepage_setup_sz(char *str) { unsigned long long size; - int mmu_psize = -1; + int mmu_psize; int shift; size = memparse(str, &str); shift = __ffs(size); - switch (shift) { -#ifndef CONFIG_PPC_64K_PAGES - case PAGE_SHIFT_64K: - mmu_psize = MMU_PAGE_64K; - break; -#endif - case PAGE_SHIFT_16M: - mmu_psize = MMU_PAGE_16M; - break; - case PAGE_SHIFT_16G: - mmu_psize = MMU_PAGE_16G; - break; - } - - if (mmu_psize >= 0 && mmu_psize_defs[mmu_psize].shift) { + mmu_psize = shift_to_mmu_psize(shift); + if (mmu_psize >= 0 && mmu_psize_defs[mmu_psize].shift) set_huge_psize(mmu_psize); - hugetlb_add_hstate(shift - PAGE_SHIFT); - } else printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size); @@ -672,16 +737,31 @@ static void zero_ctor(struct kmem_cache *cache, void *addr) static int __init hugetlbpage_init(void) { + unsigned int psize; + if (!cpu_has_feature(CPU_FTR_16M_PAGE)) return -ENODEV; - - huge_pgtable_cache = kmem_cache_create("hugepte_cache", - HUGEPTE_TABLE_SIZE, - HUGEPTE_TABLE_SIZE, - 0, - zero_ctor); - if (! huge_pgtable_cache) - panic("hugetlbpage_init(): could not create hugepte cache\n"); + /* Add supported huge page sizes. Need to change HUGE_MAX_HSTATE + * and adjust PTE_NONCACHE_NUM if the number of supported huge page + * sizes changes. + */ + set_huge_psize(MMU_PAGE_16M); + set_huge_psize(MMU_PAGE_64K); + set_huge_psize(MMU_PAGE_16G); + + for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { + if (mmu_huge_psizes[psize]) { + huge_pgtable_cache(psize) = kmem_cache_create( + HUGEPTE_CACHE_NAME(psize), + HUGEPTE_TABLE_SIZE(psize), + HUGEPTE_TABLE_SIZE(psize), + 0, + zero_ctor); + if (!huge_pgtable_cache(psize)) + panic("hugetlbpage_init(): could not create %s"\ + "\n", HUGEPTE_CACHE_NAME(psize)); + } + } return 0; } diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 6ef63caca682..a41bc5aa2043 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -153,10 +153,10 @@ static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { }; #ifdef CONFIG_HUGETLB_PAGE -/* Hugepages need one extra cache, initialized in hugetlbpage.c. We - * can't put into the tables above, because HPAGE_SHIFT is not compile - * time constant. */ -struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+1]; +/* Hugepages need an extra cache per hugepagesize, initialized in + * hugetlbpage.c. We can't put into the tables above, because HPAGE_SHIFT + * is not compile time constant. */ +struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+MMU_PAGE_COUNT]; #else struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; #endif diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c index a01b5c608ff9..409fcc7b63ce 100644 --- a/arch/powerpc/mm/tlb_64.c +++ b/arch/powerpc/mm/tlb_64.c @@ -147,7 +147,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, */ if (huge) { #ifdef CONFIG_HUGETLB_PAGE - psize = mmu_huge_psize; + psize = get_slice_psize(mm, addr);; #else BUG(); psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ diff --git a/include/asm-powerpc/hugetlb.h b/include/asm-powerpc/hugetlb.h index ca37c4af27b1..26f0d0ab27a5 100644 --- a/include/asm-powerpc/hugetlb.h +++ b/include/asm-powerpc/hugetlb.h @@ -24,9 +24,10 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, static inline int prepare_hugepage_range(struct file *file, unsigned long addr, unsigned long len) { - if (len & ~HPAGE_MASK) + struct hstate *h = hstate_file(file); + if (len & ~huge_page_mask(h)) return -EINVAL; - if (addr & ~HPAGE_MASK) + if (addr & ~huge_page_mask(h)) return -EINVAL; return 0; } diff --git a/include/asm-powerpc/mmu-hash64.h b/include/asm-powerpc/mmu-hash64.h index b61181aa7746..19c7a9403490 100644 --- a/include/asm-powerpc/mmu-hash64.h +++ b/include/asm-powerpc/mmu-hash64.h @@ -194,9 +194,9 @@ extern int mmu_ci_restrictions; #ifdef CONFIG_HUGETLB_PAGE /* - * The page size index of the huge pages for use by hugetlbfs + * The page size indexes of the huge pages for use by hugetlbfs */ -extern int mmu_huge_psize; +extern unsigned int mmu_huge_psizes[MMU_PAGE_COUNT]; #endif /* CONFIG_HUGETLB_PAGE */ diff --git a/include/asm-powerpc/page_64.h b/include/asm-powerpc/page_64.h index 02fd80710e9d..043bfdfe4f73 100644 --- a/include/asm-powerpc/page_64.h +++ b/include/asm-powerpc/page_64.h @@ -90,6 +90,7 @@ extern unsigned int HPAGE_SHIFT; #define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) #define HPAGE_MASK (~(HPAGE_SIZE - 1)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) +#define HUGE_MAX_HSTATE 3 #endif /* __ASSEMBLY__ */ diff --git a/include/asm-powerpc/pgalloc-64.h b/include/asm-powerpc/pgalloc-64.h index 68980990f62a..812a1d8f35cb 100644 --- a/include/asm-powerpc/pgalloc-64.h +++ b/include/asm-powerpc/pgalloc-64.h @@ -22,7 +22,7 @@ extern struct kmem_cache *pgtable_cache[]; #define PUD_CACHE_NUM 1 #define PMD_CACHE_NUM 1 #define HUGEPTE_CACHE_NUM 2 -#define PTE_NONCACHE_NUM 3 /* from GFP rather than kmem_cache */ +#define PTE_NONCACHE_NUM 7 /* from GFP rather than kmem_cache */ static inline pgd_t *pgd_alloc(struct mm_struct *mm) { @@ -119,7 +119,7 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) __free_page(ptepage); } -#define PGF_CACHENUM_MASK 0x3 +#define PGF_CACHENUM_MASK 0x7 typedef struct pgtable_free { unsigned long val; -- cgit v1.2.3 From 5c755e9fd813810680abd56ec09a5f90143e815b Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Wed, 23 Jul 2008 21:28:19 -0700 Subject: memory-hotplug: add sysfs removable attribute for hotplug memory remove Memory may be hot-removed on a per-memory-block basis, particularly on POWER where the SPARSEMEM section size often matches the memory-block size. A user-level agent must be able to identify which sections of memory are likely to be removable before attempting the potentially expensive operation. This patch adds a file called "removable" to the memory directory in sysfs to help such an agent. In this patch, a memory block is considered removable if; o It contains only MOVABLE pageblocks o It contains only pageblocks with free pages regardless of pageblock type On the other hand, a memory block starting with a PageReserved() page will never be considered removable. Without this patch, the user-agent is forced to choose a memory block to remove randomly. Sample output of the sysfs files: ./memory/memory0/removable: 0 ./memory/memory1/removable: 0 ./memory/memory2/removable: 0 ./memory/memory3/removable: 0 ./memory/memory4/removable: 0 ./memory/memory5/removable: 0 ./memory/memory6/removable: 0 ./memory/memory7/removable: 1 ./memory/memory8/removable: 0 ./memory/memory9/removable: 0 ./memory/memory10/removable: 0 ./memory/memory11/removable: 0 ./memory/memory12/removable: 0 ./memory/memory13/removable: 0 ./memory/memory14/removable: 0 ./memory/memory15/removable: 0 ./memory/memory16/removable: 0 ./memory/memory17/removable: 1 ./memory/memory18/removable: 1 ./memory/memory19/removable: 1 ./memory/memory20/removable: 1 ./memory/memory21/removable: 1 ./memory/memory22/removable: 1 Signed-off-by: Badari Pulavarty Signed-off-by: Mel Gorman Acked-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/ABI/testing/sysfs-devices-memory | 24 +++++++++++ drivers/base/memory.c | 19 ++++++++ include/linux/memory_hotplug.h | 12 ++++++ mm/memory_hotplug.c | 60 ++++++++++++++++++++++++++ 4 files changed, 115 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-devices-memory (limited to 'Documentation') diff --git a/Documentation/ABI/testing/sysfs-devices-memory b/Documentation/ABI/testing/sysfs-devices-memory new file mode 100644 index 000000000000..7a16fe1e2270 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-devices-memory @@ -0,0 +1,24 @@ +What: /sys/devices/system/memory +Date: June 2008 +Contact: Badari Pulavarty +Description: + The /sys/devices/system/memory contains a snapshot of the + internal state of the kernel memory blocks. Files could be + added or removed dynamically to represent hot-add/remove + operations. + +Users: hotplug memory add/remove tools + https://w3.opensource.ibm.com/projects/powerpc-utils/ + +What: /sys/devices/system/memory/memoryX/removable +Date: June 2008 +Contact: Badari Pulavarty +Description: + The file /sys/devices/system/memory/memoryX/removable + indicates whether this memory block is removable or not. + This is useful for a user-level agent to determine + identify removable sections of the memory before attempting + potentially expensive hot-remove memory operation + +Users: hotplug memory remove tools + https://w3.opensource.ibm.com/projects/powerpc-utils/ diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 4d4e0e7b6e92..855ed1a9f97b 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -100,6 +100,21 @@ static ssize_t show_mem_phys_index(struct sys_device *dev, return sprintf(buf, "%08lx\n", mem->phys_index); } +/* + * Show whether the section of memory is likely to be hot-removable + */ +static ssize_t show_mem_removable(struct sys_device *dev, char *buf) +{ + unsigned long start_pfn; + int ret; + struct memory_block *mem = + container_of(dev, struct memory_block, sysdev); + + start_pfn = section_nr_to_pfn(mem->phys_index); + ret = is_mem_section_removable(start_pfn, PAGES_PER_SECTION); + return sprintf(buf, "%d\n", ret); +} + /* * online, offline, going offline, etc. */ @@ -262,6 +277,7 @@ static ssize_t show_phys_device(struct sys_device *dev, static SYSDEV_ATTR(phys_index, 0444, show_mem_phys_index, NULL); static SYSDEV_ATTR(state, 0644, show_mem_state, store_mem_state); static SYSDEV_ATTR(phys_device, 0444, show_phys_device, NULL); +static SYSDEV_ATTR(removable, 0444, show_mem_removable, NULL); #define mem_create_simple_file(mem, attr_name) \ sysdev_create_file(&mem->sysdev, &attr_##attr_name) @@ -350,6 +366,8 @@ static int add_memory_block(unsigned long node_id, struct mem_section *section, ret = mem_create_simple_file(mem, state); if (!ret) ret = mem_create_simple_file(mem, phys_device); + if (!ret) + ret = mem_create_simple_file(mem, removable); return ret; } @@ -394,6 +412,7 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section, mem_remove_simple_file(mem, phys_index); mem_remove_simple_file(mem, state); mem_remove_simple_file(mem, phys_device); + mem_remove_simple_file(mem, removable); unregister_memory(mem, section); return 0; diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 3628e5088f64..763ba81fc0f0 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -199,6 +199,18 @@ extern int walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg, int (*func)(unsigned long, unsigned long, void *)); +#ifdef CONFIG_MEMORY_HOTREMOVE + +extern int is_mem_section_removable(unsigned long pfn, unsigned long nr_pages); + +#else +static inline int is_mem_section_removable(unsigned long pfn, + unsigned long nr_pages) +{ + return 0; +} +#endif /* CONFIG_MEMORY_HOTREMOVE */ + extern int add_memory(int nid, u64 start, u64 size); extern int arch_add_memory(int nid, u64 start, u64 size); extern int remove_memory(u64 start, u64 size); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 93aba78dc8b6..89fee2dcb039 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -522,6 +522,66 @@ error: EXPORT_SYMBOL_GPL(add_memory); #ifdef CONFIG_MEMORY_HOTREMOVE +/* + * A free page on the buddy free lists (not the per-cpu lists) has PageBuddy + * set and the size of the free page is given by page_order(). Using this, + * the function determines if the pageblock contains only free pages. + * Due to buddy contraints, a free page at least the size of a pageblock will + * be located at the start of the pageblock + */ +static inline int pageblock_free(struct page *page) +{ + return PageBuddy(page) && page_order(page) >= pageblock_order; +} + +/* Return the start of the next active pageblock after a given page */ +static struct page *next_active_pageblock(struct page *page) +{ + int pageblocks_stride; + + /* Ensure the starting page is pageblock-aligned */ + BUG_ON(page_to_pfn(page) & (pageblock_nr_pages - 1)); + + /* Move forward by at least 1 * pageblock_nr_pages */ + pageblocks_stride = 1; + + /* If the entire pageblock is free, move to the end of free page */ + if (pageblock_free(page)) + pageblocks_stride += page_order(page) - pageblock_order; + + return page + (pageblocks_stride * pageblock_nr_pages); +} + +/* Checks if this range of memory is likely to be hot-removable. */ +int is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) +{ + int type; + struct page *page = pfn_to_page(start_pfn); + struct page *end_page = page + nr_pages; + + /* Check the starting page of each pageblock within the range */ + for (; page < end_page; page = next_active_pageblock(page)) { + type = get_pageblock_migratetype(page); + + /* + * A pageblock containing MOVABLE or free pages is considered + * removable + */ + if (type != MIGRATE_MOVABLE && !pageblock_free(page)) + return 0; + + /* + * A pageblock starting with a PageReserved page is not + * considered removable. + */ + if (PageReserved(page)) + return 0; + } + + /* All pageblocks in the memory block are likely to be hot-removable */ + return 1; +} + /* * Confirm all pages in a range [start, end) is belongs to the same zone. */ -- cgit v1.2.3 From 77437fd4e61f87cc94d9314baa5cbf50e3ccdf54 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Wed, 23 Jul 2008 21:28:33 -0700 Subject: pm: boot time suspend selftest Boot-time test for system suspend states (STR or standby). The generic RTC framework triggers wakeup alarms, which are used to exit those states. - Measures some aspects of suspend time ... this uses "jiffies" until someone converts it to use a timebase that works properly even while timer IRQs are disabled. - Triggered by a command line parameter. By default nothing even vaguely troublesome will happen, but "test_suspend=mem" will give you a brief STR test during system boot. (Or you may need to use "test_suspend=standby" instead, if your hardware needs that.) This isn't without problems. It fires early enough during boot that for example both PCMCIA and MMC stacks have misbehaved. The workaround in those cases was to boot without such media cards inserted. [matthltc@us.ibm.com: fix compile failure in boot time suspend selftest] Signed-off-by: David Brownell Cc: Ingo Molnar Cc: Pavel Machek Cc: "Rafael J. Wysocki" Signed-off-by: Matt Helsley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 9 +- kernel/power/Kconfig | 11 ++ kernel/power/main.c | 194 +++++++++++++++++++++++++++++++++++- 3 files changed, 212 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 01a2992b5754..4d705713cabc 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -87,7 +87,8 @@ parameter is applicable: SH SuperH architecture is enabled. SMP The kernel is an SMP kernel. SPARC Sparc architecture is enabled. - SWSUSP Software suspend is enabled. + SWSUSP Software suspend (hibernation) is enabled. + SUSPEND System suspend states are enabled. TS Appropriate touchscreen support is enabled. USB USB support is enabled. USBHID USB Human Interface Device support is enabled. @@ -2123,6 +2124,12 @@ and is between 256 and 4096 characters. It is defined in the file tdfx= [HW,DRM] + test_suspend= [SUSPEND] + Specify "mem" (for Suspend-to-RAM) or "standby" (for + standby suspend) as the system sleep state to briefly + enter during system startup. The system is woken from + this state using a wakeup-capable RTC alarm. + thash_entries= [KNL,NET] Set number of hash buckets for TCP connection diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 59dfdf1e1d20..dcd165f92a88 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -94,6 +94,17 @@ config SUSPEND powered and thus its contents are preserved, such as the suspend-to-RAM state (e.g. the ACPI S3 state). +config PM_TEST_SUSPEND + bool "Test suspend/resume and wakealarm during bootup" + depends on SUSPEND && PM_DEBUG && RTC_LIB=y + ---help--- + This option will let you suspend your machine during bootup, and + make it wake up a few seconds later using an RTC wakeup alarm. + Enable this with a kernel parameter like "test_suspend=mem". + + You probably want to have your system's RTC driver statically + linked, ensuring that it's available when this test runs. + config SUSPEND_FREEZER bool "Enable freezer for suspend to RAM/standby" \ if ARCH_WANTS_FREEZER_CONTROL || BROKEN diff --git a/kernel/power/main.c b/kernel/power/main.c index 3398f4651aa1..95bff23ecdaa 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -132,6 +132,61 @@ static inline int suspend_test(int level) { return 0; } #ifdef CONFIG_SUSPEND +#ifdef CONFIG_PM_TEST_SUSPEND + +/* + * We test the system suspend code by setting an RTC wakealarm a short + * time in the future, then suspending. Suspending the devices won't + * normally take long ... some systems only need a few milliseconds. + * + * The time it takes is system-specific though, so when we test this + * during system bootup we allow a LOT of time. + */ +#define TEST_SUSPEND_SECONDS 5 + +static unsigned long suspend_test_start_time; + +static void suspend_test_start(void) +{ + /* FIXME Use better timebase than "jiffies", ideally a clocksource. + * What we want is a hardware counter that will work correctly even + * during the irqs-are-off stages of the suspend/resume cycle... + */ + suspend_test_start_time = jiffies; +} + +static void suspend_test_finish(const char *label) +{ + long nj = jiffies - suspend_test_start_time; + unsigned msec; + + msec = jiffies_to_msecs(abs(nj)); + pr_info("PM: %s took %d.%03d seconds\n", label, + msec / 1000, msec % 1000); + + /* Warning on suspend means the RTC alarm period needs to be + * larger -- the system was sooo slooowwww to suspend that the + * alarm (should have) fired before the system went to sleep! + * + * Warning on either suspend or resume also means the system + * has some performance issues. The stack dump of a WARN_ON + * is more likely to get the right attention than a printk... + */ + WARN_ON(msec > (TEST_SUSPEND_SECONDS * 1000)); +} + +#else + +static void suspend_test_start(void) +{ +} + +static void suspend_test_finish(const char *label) +{ +} + +#endif + /* This is just an arbitrary number */ #define FREE_PAGE_NUMBER (100) @@ -266,12 +321,13 @@ int suspend_devices_and_enter(suspend_state_t state) goto Close; } suspend_console(); + suspend_test_start(); error = device_suspend(PMSG_SUSPEND); if (error) { printk(KERN_ERR "PM: Some devices failed to suspend\n"); goto Recover_platform; } - + suspend_test_finish("suspend devices"); if (suspend_test(TEST_DEVICES)) goto Recover_platform; @@ -293,7 +349,9 @@ int suspend_devices_and_enter(suspend_state_t state) if (suspend_ops->finish) suspend_ops->finish(); Resume_devices: + suspend_test_start(); device_resume(PMSG_RESUME); + suspend_test_finish("resume devices"); resume_console(); Close: if (suspend_ops->end) @@ -521,3 +579,137 @@ static int __init pm_init(void) } core_initcall(pm_init); + + +#ifdef CONFIG_PM_TEST_SUSPEND + +#include + +/* + * To test system suspend, we need a hands-off mechanism to resume the + * system. RTCs wake alarms are a common self-contained mechanism. + */ + +static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state) +{ + static char err_readtime[] __initdata = + KERN_ERR "PM: can't read %s time, err %d\n"; + static char err_wakealarm [] __initdata = + KERN_ERR "PM: can't set %s wakealarm, err %d\n"; + static char err_suspend[] __initdata = + KERN_ERR "PM: suspend test failed, error %d\n"; + static char info_test[] __initdata = + KERN_INFO "PM: test RTC wakeup from '%s' suspend\n"; + + unsigned long now; + struct rtc_wkalrm alm; + int status; + + /* this may fail if the RTC hasn't been initialized */ + status = rtc_read_time(rtc, &alm.time); + if (status < 0) { + printk(err_readtime, rtc->dev.bus_id, status); + return; + } + rtc_tm_to_time(&alm.time, &now); + + memset(&alm, 0, sizeof alm); + rtc_time_to_tm(now + TEST_SUSPEND_SECONDS, &alm.time); + alm.enabled = true; + + status = rtc_set_alarm(rtc, &alm); + if (status < 0) { + printk(err_wakealarm, rtc->dev.bus_id, status); + return; + } + + if (state == PM_SUSPEND_MEM) { + printk(info_test, pm_states[state]); + status = pm_suspend(state); + if (status == -ENODEV) + state = PM_SUSPEND_STANDBY; + } + if (state == PM_SUSPEND_STANDBY) { + printk(info_test, pm_states[state]); + status = pm_suspend(state); + } + if (status < 0) + printk(err_suspend, status); +} + +static int __init has_wakealarm(struct device *dev, void *name_ptr) +{ + struct rtc_device *candidate = to_rtc_device(dev); + + if (!candidate->ops->set_alarm) + return 0; + if (!device_may_wakeup(candidate->dev.parent)) + return 0; + + *(char **)name_ptr = dev->bus_id; + return 1; +} + +/* + * Kernel options like "test_suspend=mem" force suspend/resume sanity tests + * at startup time. They're normally disabled, for faster boot and because + * we can't know which states really work on this particular system. + */ +static suspend_state_t test_state __initdata = PM_SUSPEND_ON; + +static char warn_bad_state[] __initdata = + KERN_WARNING "PM: can't test '%s' suspend state\n"; + +static int __init setup_test_suspend(char *value) +{ + unsigned i; + + /* "=mem" ==> "mem" */ + value++; + for (i = 0; i < PM_SUSPEND_MAX; i++) { + if (!pm_states[i]) + continue; + if (strcmp(pm_states[i], value) != 0) + continue; + test_state = (__force suspend_state_t) i; + return 0; + } + printk(warn_bad_state, value); + return 0; +} +__setup("test_suspend", setup_test_suspend); + +static int __init test_suspend(void) +{ + static char warn_no_rtc[] __initdata = + KERN_WARNING "PM: no wakealarm-capable RTC driver is ready\n"; + + char *pony = NULL; + struct rtc_device *rtc = NULL; + + /* PM is initialized by now; is that state testable? */ + if (test_state == PM_SUSPEND_ON) + goto done; + if (!valid_state(test_state)) { + printk(warn_bad_state, pm_states[test_state]); + goto done; + } + + /* RTCs have initialized by now too ... can we use one? */ + class_find_device(rtc_class, NULL, &pony, has_wakealarm); + if (pony) + rtc = rtc_class_open(pony); + if (!rtc) { + printk(warn_no_rtc); + goto done; + } + + /* go for it */ + test_wakealarm(rtc, test_state); + rtc_class_close(rtc); +done: + return 0; +} +late_initcall(test_suspend); + +#endif /* CONFIG_PM_TEST_SUSPEND */ -- cgit v1.2.3 From 40b4ac33b4d1bdd5cbeb2241be2399c550fa3696 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 23 Jul 2008 21:28:36 -0700 Subject: pm: remove obsolete piece of PM documentation Remove some obsolete PM documentation. The majority of contents of Documentation/power/pm.txt are outdated. Remove the outdated parts of this file and move the rest to Documentation/power/apm-acpi.txt . Update the index in Documentation/power/ as appropriate. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Acked-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/power/00-INDEX | 4 +- Documentation/power/apm-acpi.txt | 32 +++++ Documentation/power/pm.txt | 257 --------------------------------------- 3 files changed, 34 insertions(+), 259 deletions(-) create mode 100644 Documentation/power/apm-acpi.txt delete mode 100644 Documentation/power/pm.txt (limited to 'Documentation') diff --git a/Documentation/power/00-INDEX b/Documentation/power/00-INDEX index a55d7f1c836d..fb742c213c9e 100644 --- a/Documentation/power/00-INDEX +++ b/Documentation/power/00-INDEX @@ -1,5 +1,7 @@ 00-INDEX - This file +apm-acpi.txt + - basic info about the APM and ACPI support. basic-pm-debugging.txt - Debugging suspend and resume devices.txt @@ -14,8 +16,6 @@ notifiers.txt - Registering suspend notifiers in device drivers pci.txt - How the PCI Subsystem Does Power Management -pm.txt - - info on Linux power management support. pm_qos_interface.txt - info on Linux PM Quality of Service interface power_supply_class.txt diff --git a/Documentation/power/apm-acpi.txt b/Documentation/power/apm-acpi.txt new file mode 100644 index 000000000000..1bd799dc17e8 --- /dev/null +++ b/Documentation/power/apm-acpi.txt @@ -0,0 +1,32 @@ +APM or ACPI? +------------ +If you have a relatively recent x86 mobile, desktop, or server system, +odds are it supports either Advanced Power Management (APM) or +Advanced Configuration and Power Interface (ACPI). ACPI is the newer +of the two technologies and puts power management in the hands of the +operating system, allowing for more intelligent power management than +is possible with BIOS controlled APM. + +The best way to determine which, if either, your system supports is to +build a kernel with both ACPI and APM enabled (as of 2.3.x ACPI is +enabled by default). If a working ACPI implementation is found, the +ACPI driver will override and disable APM, otherwise the APM driver +will be used. + +No, sorry, you cannot have both ACPI and APM enabled and running at +once. Some people with broken ACPI or broken APM implementations +would like to use both to get a full set of working features, but you +simply cannot mix and match the two. Only one power management +interface can be in control of the machine at once. Think about it.. + +User-space Daemons +------------------ +Both APM and ACPI rely on user-space daemons, apmd and acpid +respectively, to be completely functional. Obtain both of these +daemons from your Linux distribution or from the Internet (see below) +and be sure that they are started sometime in the system boot process. +Go ahead and start both. If ACPI or APM is not available on your +system the associated daemon will exit gracefully. + + apmd: http://worldvisions.ca/~apenwarr/apmd/ + acpid: http://acpid.sf.net/ diff --git a/Documentation/power/pm.txt b/Documentation/power/pm.txt deleted file mode 100644 index be841507e43f..000000000000 --- a/Documentation/power/pm.txt +++ /dev/null @@ -1,257 +0,0 @@ - Linux Power Management Support - -This document briefly describes how to use power management with your -Linux system and how to add power management support to Linux drivers. - -APM or ACPI? ------------- -If you have a relatively recent x86 mobile, desktop, or server system, -odds are it supports either Advanced Power Management (APM) or -Advanced Configuration and Power Interface (ACPI). ACPI is the newer -of the two technologies and puts power management in the hands of the -operating system, allowing for more intelligent power management than -is possible with BIOS controlled APM. - -The best way to determine which, if either, your system supports is to -build a kernel with both ACPI and APM enabled (as of 2.3.x ACPI is -enabled by default). If a working ACPI implementation is found, the -ACPI driver will override and disable APM, otherwise the APM driver -will be used. - -No, sorry, you cannot have both ACPI and APM enabled and running at -once. Some people with broken ACPI or broken APM implementations -would like to use both to get a full set of working features, but you -simply cannot mix and match the two. Only one power management -interface can be in control of the machine at once. Think about it.. - -User-space Daemons ------------------- -Both APM and ACPI rely on user-space daemons, apmd and acpid -respectively, to be completely functional. Obtain both of these -daemons from your Linux distribution or from the Internet (see below) -and be sure that they are started sometime in the system boot process. -Go ahead and start both. If ACPI or APM is not available on your -system the associated daemon will exit gracefully. - - apmd: http://worldvisions.ca/~apenwarr/apmd/ - acpid: http://acpid.sf.net/ - -Driver Interface -- OBSOLETE, DO NOT USE! -----------------************************* - -Note: pm_register(), pm_access(), pm_dev_idle() and friends are -obsolete. Please do not use them. Instead you should properly hook -your driver into the driver model, and use its suspend()/resume() -callbacks to do this kind of stuff. - -If you are writing a new driver or maintaining an old driver, it -should include power management support. Without power management -support, a single driver may prevent a system with power management -capabilities from ever being able to suspend (safely). - -Overview: -1) Register each instance of a device with "pm_register" -2) Call "pm_access" before accessing the hardware. - (this will ensure that the hardware is awake and ready) -3) Your "pm_callback" is called before going into a - suspend state (ACPI D1-D3) or after resuming (ACPI D0) - from a suspend. -4) Call "pm_dev_idle" when the device is not being used - (optional but will improve device idle detection) -5) When unloaded, unregister the device with "pm_unregister" - -/* - * Description: Register a device with the power-management subsystem - * - * Parameters: - * type - device type (PCI device, system device, ...) - * id - instance number or unique identifier - * cback - request handler callback (suspend, resume, ...) - * - * Returns: Registered PM device or NULL on error - * - * Examples: - * dev = pm_register(PM_SYS_DEV, PM_SYS_VGA, vga_callback); - * - * struct pci_dev *pci_dev = pci_find_dev(...); - * dev = pm_register(PM_PCI_DEV, PM_PCI_ID(pci_dev), callback); - */ -struct pm_dev *pm_register(pm_dev_t type, unsigned long id, pm_callback cback); - -/* - * Description: Unregister a device with the power management subsystem - * - * Parameters: - * dev - PM device previously returned from pm_register - */ -void pm_unregister(struct pm_dev *dev); - -/* - * Description: Unregister all devices with a matching callback function - * - * Parameters: - * cback - previously registered request callback - * - * Notes: Provided for easier porting from old APM interface - */ -void pm_unregister_all(pm_callback cback); - -/* - * Power management request callback - * - * Parameters: - * dev - PM device previously returned from pm_register - * rqst - request type - * data - data, if any, associated with the request - * - * Returns: 0 if the request is successful - * EINVAL if the request is not supported - * EBUSY if the device is now busy and cannot handle the request - * ENOMEM if the device was unable to handle the request due to memory - * - * Details: The device request callback will be called before the - * device/system enters a suspend state (ACPI D1-D3) or - * or after the device/system resumes from suspend (ACPI D0). - * For PM_SUSPEND, the ACPI D-state being entered is passed - * as the "data" argument to the callback. The device - * driver should save (PM_SUSPEND) or restore (PM_RESUME) - * device context when the request callback is called. - * - * Once a driver returns 0 (success) from a suspend - * request, it should not process any further requests or - * access the device hardware until a call to "pm_access" is made. - */ -typedef int (*pm_callback)(struct pm_dev *dev, pm_request_t rqst, void *data); - -Driver Details --------------- -This is just a quick Q&A as a stopgap until a real driver writers' -power management guide is available. - -Q: When is a device suspended? - -Devices can be suspended based on direct user request (eg. laptop lid -closes), system power policy (eg. sleep after 30 minutes of console -inactivity), or device power policy (eg. power down device after 5 -minutes of inactivity) - -Q: Must a driver honor a suspend request? - -No, a driver can return -EBUSY from a suspend request and this -will stop the system from suspending. When a suspend request -fails, all suspended devices are resumed and the system continues -to run. Suspend can be retried at a later time. - -Q: Can the driver block suspend/resume requests? - -Yes, a driver can delay its return from a suspend or resume -request until the device is ready to handle requests. It -is advantageous to return as quickly as possible from a -request as suspend/resume are done serially. - -Q: What context is a suspend/resume initiated from? - -A suspend or resume is initiated from a kernel thread context. -It is safe to block, allocate memory, initiate requests -or anything else you can do within the kernel. - -Q: Will requests continue to arrive after a suspend? - -Possibly. It is the driver's responsibility to queue(*), -fail, or drop any requests that arrive after returning -success to a suspend request. It is important that the -driver not access its device until after it receives -a resume request as the device's bus may no longer -be active. - -(*) If a driver queues requests for processing after - resume be aware that the device, network, etc. - might be in a different state than at suspend time. - It's probably better to drop requests unless - the driver is a storage device. - -Q: Do I have to manage bus-specific power management registers - -No. It is the responsibility of the bus driver to manage -PCI, USB, etc. power management registers. The bus driver -or the power management subsystem will also enable any -wake-on functionality that the device has. - -Q: So, really, what do I need to do to support suspend/resume? - -You need to save any device context that would -be lost if the device was powered off and then restore -it at resume time. When ACPI is active, there are -three levels of device suspend states; D1, D2, and D3. -(The suspend state is passed as the "data" argument -to the device callback.) With D3, the device is powered -off and loses all context, D1 and D2 are shallower power -states and require less device context to be saved. To -play it safe, just save everything at suspend and restore -everything at resume. - -Q: Where do I store device context for suspend? - -Anywhere in memory, kmalloc a buffer or store it -in the device descriptor. You are guaranteed that the -contents of memory will be restored and accessible -before resume, even when the system suspends to disk. - -Q: What do I need to do for ACPI vs. APM vs. etc? - -Drivers need not be aware of the specific power management -technology that is active. They just need to be aware -of when the overlying power management system requests -that they suspend or resume. - -Q: What about device dependencies? - -When a driver registers a device, the power management -subsystem uses the information provided to build a -tree of device dependencies (eg. USB device X is on -USB controller Y which is on PCI bus Z) When power -management wants to suspend a device, it first sends -a suspend request to its driver, then the bus driver, -and so on up to the system bus. Device resumes -proceed in the opposite direction. - -Q: Who do I contact for additional information about - enabling power management for my specific driver/device? - -ACPI Development mailing list: linux-acpi@vger.kernel.org - -System Interface -- OBSOLETE, DO NOT USE! -----------------************************* -If you are providing new power management support to Linux (ie. -adding support for something like APM or ACPI), you should -communicate with drivers through the existing generic power -management interface. - -/* - * Send a request to all devices - * - * Parameters: - * rqst - request type - * data - data, if any, associated with the request - * - * Returns: 0 if the request is successful - * See "pm_callback" return for errors - * - * Details: Walk list of registered devices and call pm_send - * for each until complete or an error is encountered. - * If an error is encountered for a suspend request, - * return all devices to the state they were in before - * the suspend request. - */ -int pm_send_all(pm_request_t rqst, void *data); - -/* - * Find a matching device - * - * Parameters: - * type - device type (PCI device, system device, or 0 to match all devices) - * from - previous match or NULL to start from the beginning - * - * Returns: Matching device or NULL if none found - */ -struct pm_dev *pm_find(pm_dev_t type, struct pm_dev *from); -- cgit v1.2.3 From bdfe6b7c681669148dae4db27eb24ee5408ba371 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 23 Jul 2008 21:28:41 -0700 Subject: pm: acpi hibernation: utilize hardware signature ACPI defines a hardware signature. BIOS calculates the signature according to hardware configure and if hardware changes while hibernated, the signature will change. In that case, S4 resume should fail. Still, there may be systems on which this mechanism does not work correctly, so it is better to provide a workaround for them. For this reason, add a new switch to the acpi_sleep= command line argument allowing one to disable hardware signature checking. [shaohua.li@intel.com: build fix] Signed-off-by: Shaohua Li Signed-off-by: Rafael J. Wysocki Cc: Andi Kleen Cc: Len Brown Acked-by: Pavel Machek Cc: Cc: Shaohua Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 4 +++- arch/x86/kernel/acpi/sleep.c | 4 ++++ drivers/acpi/sleep/main.c | 22 ++++++++++++++++++++++ include/linux/acpi.h | 1 + 4 files changed, 30 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4d705713cabc..497a98dafdaa 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -148,10 +148,12 @@ and is between 256 and 4096 characters. It is defined in the file default: 0 acpi_sleep= [HW,ACPI] Sleep options - Format: { s3_bios, s3_mode, s3_beep, old_ordering } + Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig, old_ordering } See Documentation/power/video.txt for s3_bios and s3_mode. s3_beep is for debugging; it makes the PC's speaker beep as soon as the kernel's real-mode entry point is called. + s4_nohwsig prevents ACPI hardware signature from being + used during resume from hibernation. old_ordering causes the ACPI 1.0 ordering of the _PTS control method, wrt putting devices into low power states, to be enforced (the ACPI 2.0 ordering of _PTS is diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index a3ddad18aaa3..fa2161d5003b 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -150,6 +150,10 @@ static int __init acpi_sleep_setup(char *str) acpi_realmode_flags |= 2; if (strncmp(str, "s3_beep", 7) == 0) acpi_realmode_flags |= 4; +#ifdef CONFIG_HIBERNATION + if (strncmp(str, "s4_nohwsig", 10) == 0) + acpi_no_s4_hw_signature(); +#endif if (strncmp(str, "old_ordering", 12) == 0) acpi_old_suspend_ordering(); str = strchr(str, ','); diff --git a/drivers/acpi/sleep/main.c b/drivers/acpi/sleep/main.c index 0489a7d1d42c..313507accf18 100644 --- a/drivers/acpi/sleep/main.c +++ b/drivers/acpi/sleep/main.c @@ -283,6 +283,15 @@ static struct platform_suspend_ops acpi_suspend_ops_old = { #endif /* CONFIG_SUSPEND */ #ifdef CONFIG_HIBERNATION +static unsigned long s4_hardware_signature; +static struct acpi_table_facs *facs; +static bool nosigcheck; + +void __init acpi_no_s4_hw_signature(void) +{ + nosigcheck = true; +} + static int acpi_hibernation_begin(void) { acpi_target_sleep_state = ACPI_STATE_S4; @@ -316,6 +325,12 @@ static void acpi_hibernation_leave(void) acpi_enable(); /* Reprogram control registers and execute _BFS */ acpi_leave_sleep_state_prep(ACPI_STATE_S4); + /* Check the hardware signature */ + if (facs && s4_hardware_signature != facs->hardware_signature) { + printk(KERN_EMERG "ACPI: Hardware changed while hibernated, " + "cannot resume!\n"); + panic("ACPI S4 hardware signature mismatch"); + } } static void acpi_pm_enable_gpes(void) @@ -544,6 +559,13 @@ int __init acpi_sleep_init(void) &acpi_hibernation_ops_old : &acpi_hibernation_ops); sleep_states[ACPI_STATE_S4] = 1; printk(" S4"); + if (!nosigcheck) { + acpi_get_table_by_index(ACPI_TABLE_INDEX_FACS, + (struct acpi_table_header **)&facs); + if (facs) + s4_hardware_signature = + facs->hardware_signature; + } } #endif status = acpi_get_sleep_type_data(ACPI_STATE_S5, &type_a, &type_b); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index a17177639376..702f79dad16a 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -236,6 +236,7 @@ int acpi_check_mem_region(resource_size_t start, resource_size_t n, const char *name); #ifdef CONFIG_PM_SLEEP +void __init acpi_no_s4_hw_signature(void); void __init acpi_old_suspend_ordering(void); #endif /* CONFIG_PM_SLEEP */ #else /* CONFIG_ACPI */ -- cgit v1.2.3 From a90ed92ed852a3d4b8a6f20b10bba771997f5ede Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Wed, 23 Jul 2008 21:31:22 -0700 Subject: tridentfb: documentation update Make the tridentfb documentation closer to current state of the tridentfb driver. Fix also some formatting. Signed-off-by: Krzysztof Helt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/fb/tridentfb.txt | 46 ++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 15 deletions(-) (limited to 'Documentation') diff --git a/Documentation/fb/tridentfb.txt b/Documentation/fb/tridentfb.txt index 8a6c8a43e6a3..45d9de5b13a3 100644 --- a/Documentation/fb/tridentfb.txt +++ b/Documentation/fb/tridentfb.txt @@ -3,11 +3,25 @@ Tridentfb is a framebuffer driver for some Trident chip based cards. The following list of chips is thought to be supported although not all are tested: -those from the Image series with Cyber in their names - accelerated -those with Blade in their names (Blade3D,CyberBlade...) - accelerated -the newer CyberBladeXP family - nonaccelerated - -Only PCI/AGP based cards are supported, none of the older Tridents. +those from the TGUI series 9440/96XX and with Cyber in their names +those from the Image series and with Cyber in their names +those with Blade in their names (Blade3D,CyberBlade...) +the newer CyberBladeXP family + +All families are accelerated. Only PCI/AGP based cards are supported, +none of the older Tridents. +The driver supports 8, 16 and 32 bits per pixel depths. +The TGUI family requires a line length to be power of 2 if acceleration +is enabled. This means that range of possible resolutions and bpp is +limited comparing to the range if acceleration is disabled (see list +of parameters below). + +Known bugs: +1. The driver randomly locks up on 3DImage975 chip with acceleration + enabled. The same happens in X11 (Xorg). +2. The ramdac speeds require some more fine tuning. It is possible to + switch resolution which the chip does not support at some depths for + older chips. How to use it? ============== @@ -17,12 +31,11 @@ video=tridentfb The parameters for tridentfb are concatenated with a ':' as in this example. -video=tridentfb:800x600,bpp=16,noaccel +video=tridentfb:800x600-16@75,noaccel The second level parameters that tridentfb understands are: noaccel - turns off acceleration (when it doesn't work for your card) -accel - force text acceleration (for boards which by default are noacceled) fp - use flat panel related stuff crt - assume monitor is present instead of fp @@ -31,21 +44,24 @@ center - for flat panels and resolutions smaller than native size center the image, otherwise use stretch -memsize - integer value in Kb, use if your card's memory size is misdetected. +memsize - integer value in KB, use if your card's memory size is misdetected. look at the driver output to see what it says when initializing. -memdiff - integer value in Kb,should be nonzero if your card reports - more memory than it actually has.For instance mine is 192K less than + +memdiff - integer value in KB, should be nonzero if your card reports + more memory than it actually has. For instance mine is 192K less than detection says in all three BIOS selectable situations 2M, 4M, 8M. Only use if your video memory is taken from main memory hence of - configurable size.Otherwise use memsize. - If in some modes which barely fit the memory you see garbage at the bottom - this might help by not letting change to that mode anymore. + configurable size. Otherwise use memsize. + If in some modes which barely fit the memory you see garbage + at the bottom this might help by not letting change to that mode + anymore. nativex - the width in pixels of the flat panel.If you know it (usually 1024 800 or 1280) and it is not what the driver seems to detect use it. -bpp - bits per pixel (8,16 or 32) -mode - a mode name like 800x600 (as described in Documentation/fb/modedb.txt) +bpp - bits per pixel (8,16 or 32) +mode - a mode name like 800x600-8@75 as described in + Documentation/fb/modedb.txt Using insane values for the above parameters will probably result in driver misbehaviour so take care(for instance memsize=12345678 or memdiff=23784 or -- cgit v1.2.3 From 4a25e41831ee851c1365d8b41decc22493b18e6d Mon Sep 17 00:00:00 2001 From: Nobuhiro Iwamatsu Date: Wed, 23 Jul 2008 21:31:46 -0700 Subject: video: sh7760fb: SH7760/SH7763 LCDC framebuffer driver Framebuffer driver for the SH7760/SH7763 integrated LCD controller. Signed-off-by: Manuel Lauss Signed-off-by: Nobuhiro Iwamatsu Reviewed-by: Paul Mundt Cc: Krzysztof Helt Cc: "Antonino A. Daplas" Cc: Siegfried Schaefer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/fb/sh7760fb.txt | 131 +++++++++ drivers/video/Kconfig | 13 + drivers/video/Makefile | 1 + drivers/video/sh7760fb.c | 658 ++++++++++++++++++++++++++++++++++++++++++ include/asm-sh/sh7760fb.h | 197 +++++++++++++ 5 files changed, 1000 insertions(+) create mode 100644 Documentation/fb/sh7760fb.txt create mode 100644 drivers/video/sh7760fb.c create mode 100644 include/asm-sh/sh7760fb.h (limited to 'Documentation') diff --git a/Documentation/fb/sh7760fb.txt b/Documentation/fb/sh7760fb.txt new file mode 100644 index 000000000000..c87bfe5c630a --- /dev/null +++ b/Documentation/fb/sh7760fb.txt @@ -0,0 +1,131 @@ +SH7760/SH7763 integrated LCDC Framebuffer driver +================================================ + +0. Overwiew +----------- +The SH7760/SH7763 have an integrated LCD Display controller (LCDC) which +supports (in theory) resolutions ranging from 1x1 to 1024x1024, +with color depths ranging from 1 to 16 bits, on STN, DSTN and TFT Panels. + +Caveats: +* Framebuffer memory must be a large chunk allocated at the top + of Area3 (HW requirement). Because of this requirement you should NOT + make the driver a module since at runtime it may become impossible to + get a large enough contiguous chunk of memory. + +* The driver does not support changing resolution while loaded + (displays aren't hotpluggable anyway) + +* Heavy flickering may be observed + a) if you're using 15/16bit color modes at >= 640x480 px resolutions, + b) during PCMCIA (or any other slow bus) activity. + +* Rotation works only 90degress clockwise, and only if horizontal + resolution is <= 320 pixels. + +files: drivers/video/sh7760fb.c + include/asm-sh/sh7760fb.h + Documentation/fb/sh7760fb.txt + +1. Platform setup +----------------- +SH7760: + Video data is fetched via the DMABRG DMA engine, so you have to + configure the SH DMAC for DMABRG mode (write 0x94808080 to the + DMARSRA register somewhere at boot). + + PFC registers PCCR and PCDR must be set to peripheral mode. + (write zeros to both). + +The driver does NOT do the above for you since board setup is, well, job +of the board setup code. + +2. Panel definitions +-------------------- +The LCDC must explicitly be told about the type of LCD panel +attached. Data must be wrapped in a "struct sh7760fb_platdata" and +passed to the driver as platform_data. + +Suggest you take a closer look at the SH7760 Manual, Section 30. +(http://documentation.renesas.com/eng/products/mpumcu/e602291_sh7760.pdf) + +The following code illustrates what needs to be done to +get the framebuffer working on a 640x480 TFT: + +====================== cut here ====================================== + +#include +#include + +/* + * NEC NL6440bc26-01 640x480 TFT + * dotclock 25175 kHz + * Xres 640 Yres 480 + * Htotal 800 Vtotal 525 + * HsynStart 656 VsynStart 490 + * HsynLenn 30 VsynLenn 2 + * + * The linux framebuffer layer does not use the syncstart/synclen + * values but right/left/upper/lower margin values. The comments + * for the x_margin explain how to calculate those from given + * panel sync timings. + */ +static struct fb_videomode nl6448bc26 = { + .name = "NL6448BC26", + .refresh = 60, + .xres = 640, + .yres = 480, + .pixclock = 39683, /* in picoseconds! */ + .hsync_len = 30, + .vsync_len = 2, + .left_margin = 114, /* HTOT - (HSYNSLEN + HSYNSTART) */ + .right_margin = 16, /* HSYNSTART - XRES */ + .upper_margin = 33, /* VTOT - (VSYNLEN + VSYNSTART) */ + .lower_margin = 10, /* VSYNSTART - YRES */ + .sync = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, + .vmode = FB_VMODE_NONINTERLACED, + .flag = 0, +}; + +static struct sh7760fb_platdata sh7760fb_nl6448 = { + .def_mode = &nl6448bc26, + .ldmtr = LDMTR_TFT_COLOR_16, /* 16bit TFT panel */ + .lddfr = LDDFR_8BPP, /* we want 8bit output */ + .ldpmmr = 0x0070, + .ldpspr = 0x0500, + .ldaclnr = 0, + .ldickr = LDICKR_CLKSRC(LCDC_CLKSRC_EXTERNAL) | + LDICKR_CLKDIV(1), + .rotate = 0, + .novsync = 1, + .blank = NULL, +}; + +/* SH7760: + * 0xFE300800: 256 * 4byte xRGB palette ram + * 0xFE300C00: 42 bytes ctrl registers + */ +static struct resource sh7760_lcdc_res[] = { + [0] = { + .start = 0xFE300800, + .end = 0xFE300CFF, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = 65, + .end = 65, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device sh7760_lcdc_dev = { + .dev = { + .platform_data = &sh7760fb_nl6448, + }, + .name = "sh7760-lcdc", + .id = -1, + .resource = sh7760_lcdc_res, + .num_resources = ARRAY_SIZE(sh7760_lcdc_res), +}; + +====================== cut here ====================================== diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index c1b7db843411..70d135e0cc47 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -1991,6 +1991,19 @@ config FB_COBALT tristate "Cobalt server LCD frame buffer support" depends on FB && MIPS_COBALT +config FB_SH7760 + bool "SH7760/SH7763 LCDC support" + depends on FB && (CPU_SUBTYPE_SH7760 || CPU_SUBTYPE_SH7763) + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + Support for the SH7760/SH7763 integrated (D)STN/TFT LCD Controller. + Supports display resolutions up to 1024x1024 pixel, grayscale and + color operation, with depths ranging from 1 bpp to 8 bpp monochrome + and 8, 15 or 16 bpp color; 90 degrees clockwise display rotation for + panels <= 320 pixel horizontal resolution. + config FB_VIRTUAL tristate "Virtual Frame Buffer support (ONLY FOR TESTING!)" depends on FB diff --git a/drivers/video/Makefile b/drivers/video/Makefile index 358032597757..0ebc1bfd2514 100644 --- a/drivers/video/Makefile +++ b/drivers/video/Makefile @@ -106,6 +106,7 @@ obj-$(CONFIG_FB_PMAGB_B) += pmagb-b-fb.o obj-$(CONFIG_FB_MAXINE) += maxinefb.o obj-$(CONFIG_FB_METRONOME) += metronomefb.o obj-$(CONFIG_FB_S1D13XXX) += s1d13xxxfb.o +obj-$(CONFIG_FB_SH7760) += sh7760fb.o obj-$(CONFIG_FB_IMX) += imxfb.o obj-$(CONFIG_FB_S3C2410) += s3c2410fb.o obj-$(CONFIG_FB_FSL_DIU) += fsl-diu-fb.o diff --git a/drivers/video/sh7760fb.c b/drivers/video/sh7760fb.c new file mode 100644 index 000000000000..4d0e28c5790b --- /dev/null +++ b/drivers/video/sh7760fb.c @@ -0,0 +1,658 @@ +/* + * SH7760/SH7763 LCDC Framebuffer driver. + * + * (c) 2006-2008 MSC Vertriebsges.m.b.H., + * Manuel Lauss + * (c) 2008 Nobuhiro Iwamatsu + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file COPYING in the main directory of this + * archive for more details. + * + * PLEASE HAVE A LOOK AT Documentation/fb/sh7760fb.txt! + * + * Thanks to Siegfried Schaefer + * for his original source and testing! + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +struct sh7760fb_par { + void __iomem *base; + int irq; + + struct sh7760fb_platdata *pd; /* display information */ + + dma_addr_t fbdma; /* physical address */ + + int rot; /* rotation enabled? */ + + u32 pseudo_palette[16]; + + struct platform_device *dev; + struct resource *ioarea; + struct completion vsync; /* vsync irq event */ +}; + +static irqreturn_t sh7760fb_irq(int irq, void *data) +{ + struct completion *c = data; + + complete(c); + + return IRQ_HANDLED; +} + +static void sh7760fb_wait_vsync(struct fb_info *info) +{ + struct sh7760fb_par *par = info->par; + + if (par->pd->novsync) + return; + + iowrite16(ioread16(par->base + LDINTR) & ~VINT_CHECK, + par->base + LDINTR); + + if (par->irq < 0) { + /* poll for vert. retrace: status bit is sticky */ + while (!(ioread16(par->base + LDINTR) & VINT_CHECK)) + cpu_relax(); + } else { + /* a "wait_for_irq_event(par->irq)" would be extremely nice */ + init_completion(&par->vsync); + enable_irq(par->irq); + wait_for_completion(&par->vsync); + disable_irq_nosync(par->irq); + } +} + +/* wait_for_lps - wait until power supply has reached a certain state. */ +static int wait_for_lps(struct sh7760fb_par *par, int val) +{ + int i = 100; + while (--i && ((ioread16(par->base + LDPMMR) & 3) != val)) + msleep(1); + + if (i <= 0) + return -ETIMEDOUT; + + return 0; +} + +/* en/disable the LCDC */ +static int sh7760fb_blank(int blank, struct fb_info *info) +{ + struct sh7760fb_par *par = info->par; + struct sh7760fb_platdata *pd = par->pd; + unsigned short cntr = ioread16(par->base + LDCNTR); + unsigned short intr = ioread16(par->base + LDINTR); + int lps; + + if (blank == FB_BLANK_UNBLANK) { + intr |= VINT_START; + cntr = LDCNTR_DON2 | LDCNTR_DON; + lps = 3; + } else { + intr &= ~VINT_START; + cntr = LDCNTR_DON2; + lps = 0; + } + + if (pd->blank) + pd->blank(blank); + + iowrite16(intr, par->base + LDINTR); + iowrite16(cntr, par->base + LDCNTR); + + return wait_for_lps(par, lps); +} + +/* set color registers */ +static int sh7760fb_setcmap(struct fb_cmap *cmap, struct fb_info *info) +{ + struct sh7760fb_par *par = info->par; + u32 s = cmap->start; + u32 l = cmap->len; + u16 *r = cmap->red; + u16 *g = cmap->green; + u16 *b = cmap->blue; + u32 col, tmo; + int ret; + + ret = 0; + + sh7760fb_wait_vsync(info); + + /* request palette access */ + iowrite16(LDPALCR_PALEN, par->base + LDPALCR); + + /* poll for access grant */ + tmo = 100; + while (!(ioread16(par->base + LDPALCR) & LDPALCR_PALS) && (--tmo)) + cpu_relax(); + + if (!tmo) { + ret = 1; + dev_dbg(info->dev, "no palette access!\n"); + goto out; + } + + while (l && (s < 256)) { + col = ((*r) & 0xff) << 16; + col |= ((*g) & 0xff) << 8; + col |= ((*b) & 0xff); + col &= SH7760FB_PALETTE_MASK; + + if (s < 16) + ((u32 *) (info->pseudo_palette))[s] = s; + + s++; + l--; + r++; + g++; + b++; + } +out: + iowrite16(0, par->base + LDPALCR); + return ret; +} + +static void encode_fix(struct fb_fix_screeninfo *fix, struct fb_info *info, + unsigned long stride) +{ + memset(fix, 0, sizeof(struct fb_fix_screeninfo)); + strcpy(fix->id, "sh7760-lcdc"); + + fix->smem_start = (unsigned long)info->screen_base; + fix->smem_len = info->screen_size; + + fix->line_length = stride; +} + +static int sh7760fb_get_color_info(struct device *dev, + u16 lddfr, int *bpp, int *gray) +{ + int lbpp, lgray; + + lgray = lbpp = 0; + + switch (lddfr & LDDFR_COLOR_MASK) { + case LDDFR_1BPP_MONO: + lgray = 1; + lbpp = 1; + break; + case LDDFR_2BPP_MONO: + lgray = 1; + lbpp = 2; + break; + case LDDFR_4BPP_MONO: + lgray = 1; + case LDDFR_4BPP: + lbpp = 4; + break; + case LDDFR_6BPP_MONO: + lgray = 1; + case LDDFR_8BPP: + lbpp = 8; + break; + case LDDFR_16BPP_RGB555: + case LDDFR_16BPP_RGB565: + lbpp = 16; + lgray = 0; + break; + default: + dev_dbg(dev, "unsupported LDDFR bit depth.\n"); + return -EINVAL; + } + + if (bpp) + *bpp = lbpp; + if (gray) + *gray = lgray; + + return 0; +} + +static int sh7760fb_check_var(struct fb_var_screeninfo *var, + struct fb_info *info) +{ + struct fb_fix_screeninfo *fix = &info->fix; + struct sh7760fb_par *par = info->par; + int ret, bpp; + + /* get color info from register value */ + ret = sh7760fb_get_color_info(info->dev, par->pd->lddfr, &bpp, NULL); + if (ret) + return ret; + + var->bits_per_pixel = bpp; + + if ((var->grayscale) && (var->bits_per_pixel == 1)) + fix->visual = FB_VISUAL_MONO10; + else if (var->bits_per_pixel >= 15) + fix->visual = FB_VISUAL_TRUECOLOR; + else + fix->visual = FB_VISUAL_PSEUDOCOLOR; + + /* TODO: add some more validation here */ + return 0; +} + +/* + * sh7760fb_set_par - set videomode. + * + * NOTE: The rotation, grayscale and DSTN codepaths are + * totally untested! + */ +static int sh7760fb_set_par(struct fb_info *info) +{ + struct sh7760fb_par *par = info->par; + struct fb_videomode *vm = par->pd->def_mode; + unsigned long sbase, dstn_off, ldsarl, stride; + unsigned short hsynp, hsynw, htcn, hdcn; + unsigned short vsynp, vsynw, vtln, vdln; + unsigned short lddfr, ldmtr; + int ret, bpp, gray; + + par->rot = par->pd->rotate; + + /* rotate only works with xres <= 320 */ + if (par->rot && (vm->xres > 320)) { + dev_dbg(info->dev, "rotation disabled due to display size\n"); + par->rot = 0; + } + + /* calculate LCDC reg vals from display parameters */ + hsynp = vm->right_margin + vm->xres; + hsynw = vm->hsync_len; + htcn = vm->left_margin + hsynp + hsynw; + hdcn = vm->xres; + vsynp = vm->lower_margin + vm->yres; + vsynw = vm->vsync_len; + vtln = vm->upper_margin + vsynp + vsynw; + vdln = vm->yres; + + /* get color info from register value */ + ret = sh7760fb_get_color_info(info->dev, par->pd->lddfr, &bpp, &gray); + if (ret) + return ret; + + dev_dbg(info->dev, "%dx%d %dbpp %s (orientation %s)\n", hdcn, + vdln, bpp, gray ? "grayscale" : "color", + par->rot ? "rotated" : "normal"); + +#ifdef CONFIG_CPU_LITTLE_ENDIAN + lddfr = par->pd->lddfr | (1 << 8); +#else + lddfr = par->pd->lddfr & ~(1 << 8); +#endif + + ldmtr = par->pd->ldmtr; + + if (!(vm->sync & FB_SYNC_HOR_HIGH_ACT)) + ldmtr |= LDMTR_CL1POL; + if (!(vm->sync & FB_SYNC_VERT_HIGH_ACT)) + ldmtr |= LDMTR_FLMPOL; + + /* shut down LCDC before changing display parameters */ + sh7760fb_blank(FB_BLANK_POWERDOWN, info); + + iowrite16(par->pd->ldickr, par->base + LDICKR); /* pixclock */ + iowrite16(ldmtr, par->base + LDMTR); /* polarities */ + iowrite16(lddfr, par->base + LDDFR); /* color/depth */ + iowrite16((par->rot ? 1 << 13 : 0), par->base + LDSMR); /* rotate */ + iowrite16(par->pd->ldpmmr, par->base + LDPMMR); /* Power Management */ + iowrite16(par->pd->ldpspr, par->base + LDPSPR); /* Power Supply Ctrl */ + + /* display resolution */ + iowrite16(((htcn >> 3) - 1) | (((hdcn >> 3) - 1) << 8), + par->base + LDHCNR); + iowrite16(vdln - 1, par->base + LDVDLNR); + iowrite16(vtln - 1, par->base + LDVTLNR); + /* h/v sync signals */ + iowrite16((vsynp - 1) | ((vsynw - 1) << 12), par->base + LDVSYNR); + iowrite16(((hsynp >> 3) - 1) | (((hsynw >> 3) - 1) << 12), + par->base + LDHSYNR); + /* AC modulation sig */ + iowrite16(par->pd->ldaclnr, par->base + LDACLNR); + + stride = (par->rot) ? vtln : hdcn; + if (!gray) + stride *= (bpp + 7) >> 3; + else { + if (bpp == 1) + stride >>= 3; + else if (bpp == 2) + stride >>= 2; + else if (bpp == 4) + stride >>= 1; + /* 6 bpp == 8 bpp */ + } + + /* if rotated, stride must be power of 2 */ + if (par->rot) { + unsigned long bit = 1 << 31; + while (bit) { + if (stride & bit) + break; + bit >>= 1; + } + if (stride & ~bit) + stride = bit << 1; /* not P-o-2, round up */ + } + iowrite16(stride, par->base + LDLAOR); + + /* set display mem start address */ + sbase = (unsigned long)par->fbdma; + if (par->rot) + sbase += (hdcn - 1) * stride; + + iowrite32(sbase, par->base + LDSARU); + + /* + * for DSTN need to set address for lower half. + * I (mlau) don't know which address to set it to, + * so I guessed at (stride * yres/2). + */ + if (((ldmtr & 0x003f) >= LDMTR_DSTN_MONO_8) && + ((ldmtr & 0x003f) <= LDMTR_DSTN_COLOR_16)) { + + dev_dbg(info->dev, " ***** DSTN untested! *****\n"); + + dstn_off = stride; + if (par->rot) + dstn_off *= hdcn >> 1; + else + dstn_off *= vdln >> 1; + + ldsarl = sbase + dstn_off; + } else + ldsarl = 0; + + iowrite32(ldsarl, par->base + LDSARL); /* mem for lower half of DSTN */ + + encode_fix(&info->fix, info, stride); + sh7760fb_check_var(&info->var, info); + + sh7760fb_blank(FB_BLANK_UNBLANK, info); /* panel on! */ + + dev_dbg(info->dev, "hdcn : %6d htcn : %6d\n", hdcn, htcn); + dev_dbg(info->dev, "hsynw : %6d hsynp : %6d\n", hsynw, hsynp); + dev_dbg(info->dev, "vdln : %6d vtln : %6d\n", vdln, vtln); + dev_dbg(info->dev, "vsynw : %6d vsynp : %6d\n", vsynw, vsynp); + dev_dbg(info->dev, "clksrc: %6d clkdiv: %6d\n", + (par->pd->ldickr >> 12) & 3, par->pd->ldickr & 0x1f); + dev_dbg(info->dev, "ldpmmr: 0x%04x ldpspr: 0x%04x\n", par->pd->ldpmmr, + par->pd->ldpspr); + dev_dbg(info->dev, "ldmtr : 0x%04x lddfr : 0x%04x\n", ldmtr, lddfr); + dev_dbg(info->dev, "ldlaor: %ld\n", stride); + dev_dbg(info->dev, "ldsaru: 0x%08lx ldsarl: 0x%08lx\n", sbase, ldsarl); + + return 0; +} + +static struct fb_ops sh7760fb_ops = { + .owner = THIS_MODULE, + .fb_blank = sh7760fb_blank, + .fb_check_var = sh7760fb_check_var, + .fb_setcmap = sh7760fb_setcmap, + .fb_set_par = sh7760fb_set_par, + .fb_fillrect = cfb_fillrect, + .fb_copyarea = cfb_copyarea, + .fb_imageblit = cfb_imageblit, +}; + +static void sh7760fb_free_mem(struct fb_info *info) +{ + struct sh7760fb_par *par = info->par; + + if (!info->screen_base) + return; + + dma_free_coherent(info->dev, info->screen_size, + info->screen_base, par->fbdma); + + par->fbdma = 0; + info->screen_base = NULL; + info->screen_size = 0; +} + +/* allocate the framebuffer memory. This memory must be in Area3, + * (dictated by the DMA engine) and contiguous, at a 512 byte boundary. + */ +static int sh7760fb_alloc_mem(struct fb_info *info) +{ + struct sh7760fb_par *par = info->par; + void *fbmem; + unsigned long vram; + int ret, bpp; + + if (info->screen_base) + return 0; + + /* get color info from register value */ + ret = sh7760fb_get_color_info(info->dev, par->pd->lddfr, &bpp, NULL); + if (ret) { + printk(KERN_ERR "colinfo\n"); + return ret; + } + + /* min VRAM: xres_min = 16, yres_min = 1, bpp = 1: 2byte -> 1 page + max VRAM: xres_max = 1024, yres_max = 1024, bpp = 16: 2MB */ + + vram = info->var.xres * info->var.yres; + if (info->var.grayscale) { + if (bpp == 1) + vram >>= 3; + else if (bpp == 2) + vram >>= 2; + else if (bpp == 4) + vram >>= 1; + } else if (bpp > 8) + vram *= 2; + if ((vram < 1) || (vram > 1024 * 2048)) { + dev_dbg(info->dev, "too much VRAM required. Check settings\n"); + return -ENODEV; + } + + if (vram < PAGE_SIZE) + vram = PAGE_SIZE; + + fbmem = dma_alloc_coherent(info->dev, vram, &par->fbdma, GFP_KERNEL); + + if (!fbmem) + return -ENOMEM; + + if ((par->fbdma & SH7760FB_DMA_MASK) != SH7760FB_DMA_MASK) { + sh7760fb_free_mem(info); + dev_err(info->dev, "kernel gave me memory at 0x%08lx, which is" + "unusable for the LCDC\n", (unsigned long)par->fbdma); + return -ENOMEM; + } + + info->screen_base = fbmem; + info->screen_size = vram; + + return 0; +} + +static int __devinit sh7760fb_probe(struct platform_device *pdev) +{ + struct fb_info *info; + struct resource *res; + struct sh7760fb_par *par; + int ret; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (unlikely(res == NULL)) { + dev_err(&pdev->dev, "invalid resource\n"); + return -EINVAL; + } + + info = framebuffer_alloc(sizeof(struct sh7760fb_par), &pdev->dev); + if (!info) + return -ENOMEM; + + par = info->par; + par->dev = pdev; + + par->pd = pdev->dev.platform_data; + if (!par->pd) { + dev_dbg(info->dev, "no display setup data!\n"); + ret = -ENODEV; + goto out_fb; + } + + par->ioarea = request_mem_region(res->start, + (res->end - res->start), pdev->name); + if (!par->ioarea) { + dev_err(&pdev->dev, "mmio area busy\n"); + ret = -EBUSY; + goto out_fb; + } + + par->base = ioremap_nocache(res->start, res->end - res->start + 1); + if (!par->base) { + dev_err(&pdev->dev, "cannot remap\n"); + ret = -ENODEV; + goto out_res; + } + + iowrite16(0, par->base + LDINTR); /* disable vsync irq */ + par->irq = platform_get_irq(pdev, 0); + if (par->irq >= 0) { + ret = request_irq(par->irq, sh7760fb_irq, 0, + "sh7760-lcdc", &par->vsync); + if (ret) { + dev_err(&pdev->dev, "cannot grab IRQ\n"); + par->irq = -ENXIO; + } else + disable_irq_nosync(par->irq); + } + + fb_videomode_to_var(&info->var, par->pd->def_mode); + + ret = sh7760fb_alloc_mem(info); + if (ret) { + dev_dbg(info->dev, "framebuffer memory allocation failed!\n"); + goto out_unmap; + } + + info->pseudo_palette = par->pseudo_palette; + + /* fixup color register bitpositions. These are fixed by hardware */ + info->var.red.offset = 11; + info->var.red.length = 5; + info->var.red.msb_right = 0; + + info->var.green.offset = 5; + info->var.green.length = 6; + info->var.green.msb_right = 0; + + info->var.blue.offset = 0; + info->var.blue.length = 5; + info->var.blue.msb_right = 0; + + info->var.transp.offset = 0; + info->var.transp.length = 0; + info->var.transp.msb_right = 0; + + /* set the DON2 bit now, before cmap allocation, as it will randomize + * palette memory. + */ + iowrite16(LDCNTR_DON2, par->base + LDCNTR); + info->fbops = &sh7760fb_ops; + + ret = fb_alloc_cmap(&info->cmap, 256, 0); + if (ret) { + dev_dbg(info->dev, "Unable to allocate cmap memory\n"); + goto out_mem; + } + + ret = register_framebuffer(info); + if (ret < 0) { + dev_dbg(info->dev, "cannot register fb!\n"); + goto out_cmap; + } + platform_set_drvdata(pdev, info); + + printk(KERN_INFO "%s: memory at phys 0x%08lx-0x%08lx, size %ld KiB\n", + pdev->name, + (unsigned long)par->fbdma, + (unsigned long)(par->fbdma + info->screen_size - 1), + info->screen_size >> 10); + + return 0; + +out_cmap: + sh7760fb_blank(FB_BLANK_POWERDOWN, info); + fb_dealloc_cmap(&info->cmap); +out_mem: + sh7760fb_free_mem(info); +out_unmap: + if (par->irq >= 0) + free_irq(par->irq, &par->vsync); + iounmap(par->base); +out_res: + release_resource(par->ioarea); + kfree(par->ioarea); +out_fb: + framebuffer_release(info); + return ret; +} + +static int __devexit sh7760fb_remove(struct platform_device *dev) +{ + struct fb_info *info = platform_get_drvdata(dev); + struct sh7760fb_par *par = info->par; + + sh7760fb_blank(FB_BLANK_POWERDOWN, info); + unregister_framebuffer(info); + fb_dealloc_cmap(&info->cmap); + sh7760fb_free_mem(info); + if (par->irq >= 0) + free_irq(par->irq, par); + iounmap(par->base); + release_resource(par->ioarea); + kfree(par->ioarea); + framebuffer_release(info); + platform_set_drvdata(dev, NULL); + + return 0; +} + +static struct platform_driver sh7760_lcdc_driver = { + .driver = { + .name = "sh7760-lcdc", + .owner = THIS_MODULE, + }, + .probe = sh7760fb_probe, + .remove = __devexit_p(sh7760fb_remove), +}; + +static int __init sh7760fb_init(void) +{ + return platform_driver_register(&sh7760_lcdc_driver); +} + +static void __exit sh7760fb_exit(void) +{ + platform_driver_unregister(&sh7760_lcdc_driver); +} + +module_init(sh7760fb_init); +module_exit(sh7760fb_exit); + +MODULE_AUTHOR("Nobuhiro Iwamatsu, Manuel Lauss"); +MODULE_DESCRIPTION("FBdev for SH7760/63 integrated LCD Controller"); +MODULE_LICENSE("GPL"); diff --git a/include/asm-sh/sh7760fb.h b/include/asm-sh/sh7760fb.h new file mode 100644 index 000000000000..8767f61aceca --- /dev/null +++ b/include/asm-sh/sh7760fb.h @@ -0,0 +1,197 @@ +/* + * sh7760fb.h -- platform data for SH7760/SH7763 LCDC framebuffer driver. + * + * (c) 2006-2008 MSC Vertriebsges.m.b.H., + * Manuel Lauss + * (c) 2008 Nobuhiro Iwamatsu + */ + +#ifndef _ASM_SH_SH7760FB_H +#define _ASM_SH_SH7760FB_H + +/* + * some bits of the colormap registers should be written as zero. + * create a mask for that. + */ +#define SH7760FB_PALETTE_MASK 0x00f8fcf8 + +/* The LCDC dma engine always sets bits 27-26 to 1: this is Area3 */ +#define SH7760FB_DMA_MASK 0x0C000000 + +/* palette */ +#define LDPR(x) (((x) << 2)) + +/* framebuffer registers and bits */ +#define LDICKR 0x400 +#define LDMTR 0x402 +/* see sh7760fb.h for LDMTR bits */ +#define LDDFR 0x404 +#define LDDFR_PABD (1 << 8) +#define LDDFR_COLOR_MASK 0x7F +#define LDSMR 0x406 +#define LDSMR_ROT (1 << 13) +#define LDSARU 0x408 +#define LDSARL 0x40c +#define LDLAOR 0x410 +#define LDPALCR 0x412 +#define LDPALCR_PALS (1 << 4) +#define LDPALCR_PALEN (1 << 0) +#define LDHCNR 0x414 +#define LDHSYNR 0x416 +#define LDVDLNR 0x418 +#define LDVTLNR 0x41a +#define LDVSYNR 0x41c +#define LDACLNR 0x41e +#define LDINTR 0x420 +#define LDPMMR 0x424 +#define LDPSPR 0x426 +#define LDCNTR 0x428 +#define LDCNTR_DON (1 << 0) +#define LDCNTR_DON2 (1 << 4) + +#ifdef CONFIG_CPU_SUBTYPE_SH7763 +# define LDLIRNR 0x440 +/* LDINTR bit */ +# define LDINTR_MINTEN (1 << 15) +# define LDINTR_FINTEN (1 << 14) +# define LDINTR_VSINTEN (1 << 13) +# define LDINTR_VEINTEN (1 << 12) +# define LDINTR_MINTS (1 << 11) +# define LDINTR_FINTS (1 << 10) +# define LDINTR_VSINTS (1 << 9) +# define LDINTR_VEINTS (1 << 8) +# define VINT_START (LDINTR_VSINTEN) +# define VINT_CHECK (LDINTR_VSINTS) +#else +/* LDINTR bit */ +# define LDINTR_VINTSEL (1 << 12) +# define LDINTR_VINTE (1 << 8) +# define LDINTR_VINTS (1 << 0) +# define VINT_START (LDINTR_VINTSEL) +# define VINT_CHECK (LDINTR_VINTS) +#endif + +/* HSYNC polarity inversion */ +#define LDMTR_FLMPOL (1 << 15) + +/* VSYNC polarity inversion */ +#define LDMTR_CL1POL (1 << 14) + +/* DISPLAY-ENABLE polarity inversion */ +#define LDMTR_DISPEN_LOWACT (1 << 13) + +/* DISPLAY DATA BUS polarity inversion */ +#define LDMTR_DPOL_LOWACT (1 << 12) + +/* AC modulation signal enable */ +#define LDMTR_MCNT (1 << 10) + +/* Disable output of HSYNC during VSYNC period */ +#define LDMTR_CL1CNT (1 << 9) + +/* Disable output of VSYNC during VSYNC period */ +#define LDMTR_CL2CNT (1 << 8) + +/* Display types supported by the LCDC */ +#define LDMTR_STN_MONO_4 0x00 +#define LDMTR_STN_MONO_8 0x01 +#define LDMTR_STN_COLOR_4 0x08 +#define LDMTR_STN_COLOR_8 0x09 +#define LDMTR_STN_COLOR_12 0x0A +#define LDMTR_STN_COLOR_16 0x0B +#define LDMTR_DSTN_MONO_8 0x11 +#define LDMTR_DSTN_MONO_16 0x13 +#define LDMTR_DSTN_COLOR_8 0x19 +#define LDMTR_DSTN_COLOR_12 0x1A +#define LDMTR_DSTN_COLOR_16 0x1B +#define LDMTR_TFT_COLOR_16 0x2B + +/* framebuffer color layout */ +#define LDDFR_1BPP_MONO 0x00 +#define LDDFR_2BPP_MONO 0x01 +#define LDDFR_4BPP_MONO 0x02 +#define LDDFR_6BPP_MONO 0x04 +#define LDDFR_4BPP 0x0A +#define LDDFR_8BPP 0x0C +#define LDDFR_16BPP_RGB555 0x1D +#define LDDFR_16BPP_RGB565 0x2D + +/* LCDC Pixclock sources */ +#define LCDC_CLKSRC_BUSCLOCK 0 +#define LCDC_CLKSRC_PERIPHERAL 1 +#define LCDC_CLKSRC_EXTERNAL 2 + +#define LDICKR_CLKSRC(x) \ + (((x) & 3) << 12) + +/* LCDC pixclock input divider. Set to 1 at a minimum! */ +#define LDICKR_CLKDIV(x) \ + ((x) & 0x1f) + +struct sh7760fb_platdata { + + /* Set this member to a valid fb_videmode for the display you + * wish to use. The following members must be initialized: + * xres, yres, hsync_len, vsync_len, sync, + * {left,right,upper,lower}_margin. + * The driver uses the above members to calculate register values + * and memory requirements. Other members are ignored but may + * be used by other framebuffer layer components. + */ + struct fb_videomode *def_mode; + + /* LDMTR includes display type and signal polarity. The + * HSYNC/VSYNC polarities are derived from the fb_var_screeninfo + * data above; however the polarities of the following signals + * must be encoded in the ldmtr member: + * Display Enable signal (default high-active) DISPEN_LOWACT + * Display Data signals (default high-active) DPOL_LOWACT + * AC Modulation signal (default off) MCNT + * Hsync-During-Vsync suppression (default off) CL1CNT + * Vsync-during-vsync suppression (default off) CL2CNT + * NOTE: also set a display type! + * (one of LDMTR_{STN,DSTN,TFT}_{MONO,COLOR}_{4,8,12,16}) + */ + u16 ldmtr; + + /* LDDFR controls framebuffer image format (depth, organization) + * Use ONE of the LDDFR_?BPP_* macros! + */ + u16 lddfr; + + /* LDPMMR and LDPSPR control the timing of the power signals + * for the display. Please read the SH7760 Hardware Manual, + * Chapters 30.3.17, 30.3.18 and 30.4.6! + */ + u16 ldpmmr; + u16 ldpspr; + + /* LDACLNR contains the line numbers after which the AC modulation + * signal is to toggle. Set to ZERO for TFTs or displays which + * do not need it. (Chapter 30.3.15 in SH7760 Hardware Manual). + */ + u16 ldaclnr; + + /* LDICKR contains information on pixelclock source and config. + * Please use the LDICKR_CLKSRC() and LDICKR_CLKDIV() macros. + * minimal value for CLKDIV() must be 1!. + */ + u16 ldickr; + + /* set this member to 1 if you wish to use the LCDC's hardware + * rotation function. This is limited to displays <= 320x200 + * pixels resolution! + */ + int rotate; /* set to 1 to rotate 90 CCW */ + + /* set this to 1 to suppress vsync irq use. */ + int novsync; + + /* blanking hook for platform. Set this if your platform can do + * more than the LCDC in terms of blanking (e.g. disable clock + * generator / backlight power supply / etc. + */ + void (*blank) (int); +}; + +#endif /* _ASM_SH_SH7760FB_H */ -- cgit v1.2.3