diff options
Diffstat (limited to 'Documentation')
-rw-r--r-- | Documentation/00-INDEX | 4 | ||||
-rw-r--r-- | Documentation/DMA-API-HOWTO.txt | 126 | ||||
-rw-r--r-- | Documentation/DMA-API.txt | 12 | ||||
-rw-r--r-- | Documentation/arm64/memory.txt | 12 | ||||
-rw-r--r-- | Documentation/cgroups/memory.txt | 4 | ||||
-rw-r--r-- | Documentation/devicetree/bindings/arm/atmel-at91.txt | 2 | ||||
-rw-r--r-- | Documentation/devicetree/bindings/input/touchscreen/egalax-ts.txt | 19 | ||||
-rw-r--r-- | Documentation/devicetree/bindings/net/mdio-gpio.txt | 9 | ||||
-rw-r--r-- | Documentation/devicetree/bindings/pinctrl/nvidia,tegra20-pinmux.txt | 2 | ||||
-rw-r--r-- | Documentation/devicetree/bindings/pinctrl/nvidia,tegra30-pinmux.txt | 2 | ||||
-rw-r--r-- | Documentation/filesystems/proc.txt | 16 | ||||
-rw-r--r-- | Documentation/firmware_class/README | 26 | ||||
-rw-r--r-- | Documentation/hwmon/fam15h_power | 2 | ||||
-rw-r--r-- | Documentation/networking/netdev-features.txt | 2 | ||||
-rw-r--r-- | Documentation/networking/vxlan.txt | 4 | ||||
-rw-r--r-- | Documentation/zh_CN/IRQ.txt | 39 | ||||
-rw-r--r-- | Documentation/zh_CN/arm64/booting.txt | 156 | ||||
-rw-r--r-- | Documentation/zh_CN/arm64/memory.txt | 93 |
18 files changed, 501 insertions, 29 deletions
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index f54273e2ac97..ceb1ff735469 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX @@ -210,6 +210,8 @@ local_ops.txt - semantics and behavior of local atomic operations. lockdep-design.txt - documentation on the runtime locking correctness validator. +lockup-watchdogs.txt + - info on soft and hard lockup detectors (aka nmi_watchdog). logo.gif - full colour GIF image of Linux logo (penguin - Tux). logo.txt @@ -240,8 +242,6 @@ netlabel/ - directory with information on the NetLabel subsystem. networking/ - directory with info on various aspects of networking with Linux. -nmi_watchdog.txt - - info on NMI watchdog for SMP systems. nommu-mmap.txt - documentation about no-mmu memory mapping support. numastat.txt diff --git a/Documentation/DMA-API-HOWTO.txt b/Documentation/DMA-API-HOWTO.txt index a0b6250add79..4a4fb295ceef 100644 --- a/Documentation/DMA-API-HOWTO.txt +++ b/Documentation/DMA-API-HOWTO.txt @@ -468,11 +468,46 @@ To map a single region, you do: size_t size = buffer->len; dma_handle = dma_map_single(dev, addr, size, direction); + if (dma_mapping_error(dma_handle)) { + /* + * reduce current DMA mapping usage, + * delay and try again later or + * reset driver. + */ + goto map_error_handling; + } and to unmap it: dma_unmap_single(dev, dma_handle, size, direction); +You should call dma_mapping_error() as dma_map_single() could fail and return +error. Not all dma implementations support dma_mapping_error() interface. +However, it is a good practice to call dma_mapping_error() interface, which +will invoke the generic mapping error check interface. Doing so will ensure +that the mapping code will work correctly on all dma implementations without +any dependency on the specifics of the underlying implementation. Using the +returned address without checking for errors could result in failures ranging +from panics to silent data corruption. Couple of example of incorrect ways to +check for errors that make assumptions about the underlying dma implementation +are as follows and these are applicable to dma_map_page() as well. + +Incorrect example 1: + dma_addr_t dma_handle; + + dma_handle = dma_map_single(dev, addr, size, direction); + if ((dma_handle & 0xffff != 0) || (dma_handle >= 0x1000000)) { + goto map_error; + } + +Incorrect example 2: + dma_addr_t dma_handle; + + dma_handle = dma_map_single(dev, addr, size, direction); + if (dma_handle == DMA_ERROR_CODE) { + goto map_error; + } + You should call dma_unmap_single when the DMA activity is finished, e.g. from the interrupt which told you that the DMA transfer is done. @@ -489,6 +524,14 @@ Specifically: size_t size = buffer->len; dma_handle = dma_map_page(dev, page, offset, size, direction); + if (dma_mapping_error(dma_handle)) { + /* + * reduce current DMA mapping usage, + * delay and try again later or + * reset driver. + */ + goto map_error_handling; + } ... @@ -496,6 +539,12 @@ Specifically: Here, "offset" means byte offset within the given page. +You should call dma_mapping_error() as dma_map_page() could fail and return +error as outlined under the dma_map_single() discussion. + +You should call dma_unmap_page when the DMA activity is finished, e.g. +from the interrupt which told you that the DMA transfer is done. + With scatterlists, you map a region gathered from several regions by: int i, count = dma_map_sg(dev, sglist, nents, direction); @@ -578,6 +627,14 @@ to use the dma_sync_*() interfaces. dma_addr_t mapping; mapping = dma_map_single(cp->dev, buffer, len, DMA_FROM_DEVICE); + if (dma_mapping_error(dma_handle)) { + /* + * reduce current DMA mapping usage, + * delay and try again later or + * reset driver. + */ + goto map_error_handling; + } cp->rx_buf = buffer; cp->rx_len = len; @@ -658,6 +715,75 @@ failure can be determined by: * delay and try again later or * reset driver. */ + goto map_error_handling; + } + +- unmap pages that are already mapped, when mapping error occurs in the middle + of a multiple page mapping attempt. These example are applicable to + dma_map_page() as well. + +Example 1: + dma_addr_t dma_handle1; + dma_addr_t dma_handle2; + + dma_handle1 = dma_map_single(dev, addr, size, direction); + if (dma_mapping_error(dev, dma_handle1)) { + /* + * reduce current DMA mapping usage, + * delay and try again later or + * reset driver. + */ + goto map_error_handling1; + } + dma_handle2 = dma_map_single(dev, addr, size, direction); + if (dma_mapping_error(dev, dma_handle2)) { + /* + * reduce current DMA mapping usage, + * delay and try again later or + * reset driver. + */ + goto map_error_handling2; + } + + ... + + map_error_handling2: + dma_unmap_single(dma_handle1); + map_error_handling1: + +Example 2: (if buffers are allocated a loop, unmap all mapped buffers when + mapping error is detected in the middle) + + dma_addr_t dma_addr; + dma_addr_t array[DMA_BUFFERS]; + int save_index = 0; + + for (i = 0; i < DMA_BUFFERS; i++) { + + ... + + dma_addr = dma_map_single(dev, addr, size, direction); + if (dma_mapping_error(dev, dma_addr)) { + /* + * reduce current DMA mapping usage, + * delay and try again later or + * reset driver. + */ + goto map_error_handling; + } + array[i].dma_addr = dma_addr; + save_index++; + } + + ... + + map_error_handling: + + for (i = 0; i < save_index; i++) { + + ... + + dma_unmap_single(array[i].dma_addr); } Networking drivers must call dev_kfree_skb to free the socket buffer diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index 66bd97a95f10..78a6c569d204 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt @@ -678,3 +678,15 @@ out of dma_debug_entries. These entries are preallocated at boot. The number of preallocated entries is defined per architecture. If it is too low for you boot with 'dma_debug_entries=<your_desired_number>' to overwrite the architectural default. + +void debug_dmap_mapping_error(struct device *dev, dma_addr_t dma_addr); + +dma-debug interface debug_dma_mapping_error() to debug drivers that fail +to check dma mapping errors on addresses returned by dma_map_single() and +dma_map_page() interfaces. This interface clears a flag set by +debug_dma_map_page() to indicate that dma_mapping_error() has been called by +the driver. When driver does unmap, debug_dma_unmap() checks the flag and if +this flag is still set, prints warning message that includes call trace that +leads up to the unmap. This interface can be called from dma_mapping_error() +routines to enable dma mapping error check debugging. + diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt index dbbdcbba75a3..4110cca96bd6 100644 --- a/Documentation/arm64/memory.txt +++ b/Documentation/arm64/memory.txt @@ -27,17 +27,17 @@ Start End Size Use ----------------------------------------------------------------------- 0000000000000000 0000007fffffffff 512GB user -ffffff8000000000 ffffffbbfffcffff ~240GB vmalloc +ffffff8000000000 ffffffbbfffeffff ~240GB vmalloc -ffffffbbfffd0000 ffffffbcfffdffff 64KB [guard page] +ffffffbbffff0000 ffffffbbffffffff 64KB [guard page] -ffffffbbfffe0000 ffffffbcfffeffff 64KB PCI I/O space +ffffffbc00000000 ffffffbdffffffff 8GB vmemmap -ffffffbbffff0000 ffffffbcffffffff 64KB [guard page] +ffffffbe00000000 ffffffbffbbfffff ~8GB [guard, future vmmemap] -ffffffbc00000000 ffffffbdffffffff 8GB vmemmap +ffffffbffbe00000 ffffffbffbe0ffff 64KB PCI I/O space -ffffffbe00000000 ffffffbffbffffff ~8GB [guard, future vmmemap] +ffffffbbffff0000 ffffffbcffffffff ~2MB [guard] ffffffbffc000000 ffffffbfffffffff 64MB modules diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index c07f7b4fb88d..71c4da413444 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt @@ -466,6 +466,10 @@ Note: 5.3 swappiness Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only. +Please note that unlike the global swappiness, memcg knob set to 0 +really prevents from any swapping even if there is a swap storage +available. This might lead to memcg OOM killer if there are no file +pages to reclaim. Following cgroups' swappiness can't be changed. - root cgroup (uses /proc/sys/vm/swappiness). diff --git a/Documentation/devicetree/bindings/arm/atmel-at91.txt b/Documentation/devicetree/bindings/arm/atmel-at91.txt index ecc81e368715..d187e9f7cf1c 100644 --- a/Documentation/devicetree/bindings/arm/atmel-at91.txt +++ b/Documentation/devicetree/bindings/arm/atmel-at91.txt @@ -8,7 +8,7 @@ PIT Timer required properties: shared across all System Controller members. TC/TCLIB Timer required properties: -- compatible: Should be "atmel,<chip>-pit". +- compatible: Should be "atmel,<chip>-tcb". <chip> can be "at91rm9200" or "at91sam9x5" - reg: Should contain registers location and length - interrupts: Should contain all interrupts for the TC block diff --git a/Documentation/devicetree/bindings/input/touchscreen/egalax-ts.txt b/Documentation/devicetree/bindings/input/touchscreen/egalax-ts.txt new file mode 100644 index 000000000000..df70318a617f --- /dev/null +++ b/Documentation/devicetree/bindings/input/touchscreen/egalax-ts.txt @@ -0,0 +1,19 @@ +* EETI eGalax Multiple Touch Controller + +Required properties: +- compatible: must be "eeti,egalax_ts" +- reg: i2c slave address +- interrupt-parent: the phandle for the interrupt controller +- interrupts: touch controller interrupt +- wakeup-gpios: the gpio pin to be used for waking up the controller + as well as uased as irq pin + +Example: + + egalax_ts@04 { + compatible = "eeti,egalax_ts"; + reg = <0x04>; + interrupt-parent = <&gpio1>; + interrupts = <9 2>; + wakeup-gpios = <&gpio1 9 0>; + }; diff --git a/Documentation/devicetree/bindings/net/mdio-gpio.txt b/Documentation/devicetree/bindings/net/mdio-gpio.txt index bc9549529014..c79bab025369 100644 --- a/Documentation/devicetree/bindings/net/mdio-gpio.txt +++ b/Documentation/devicetree/bindings/net/mdio-gpio.txt @@ -8,9 +8,16 @@ gpios property as described in section VIII.1 in the following order: MDC, MDIO. +Note: Each gpio-mdio bus should have an alias correctly numbered in "aliases" +node. + Example: -mdio { +aliases { + mdio-gpio0 = <&mdio0>; +}; + +mdio0: mdio { compatible = "virtual,mdio-gpio"; #address-cells = <1>; #size-cells = <0>; diff --git a/Documentation/devicetree/bindings/pinctrl/nvidia,tegra20-pinmux.txt b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra20-pinmux.txt index c8e578263ce2..683fde93c4fb 100644 --- a/Documentation/devicetree/bindings/pinctrl/nvidia,tegra20-pinmux.txt +++ b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra20-pinmux.txt @@ -93,7 +93,7 @@ Valid values for pin and group names are: With some exceptions, these support nvidia,high-speed-mode, nvidia,schmitt, nvidia,low-power-mode, nvidia,pull-down-strength, - nvidia,pull-up-strength, nvidia,slew_rate-rising, nvidia,slew_rate-falling. + nvidia,pull-up-strength, nvidia,slew-rate-rising, nvidia,slew-rate-falling. drive_ao1, drive_ao2, drive_at1, drive_at2, drive_cdev1, drive_cdev2, drive_csus, drive_dap1, drive_dap2, drive_dap3, drive_dap4, drive_dbg, diff --git a/Documentation/devicetree/bindings/pinctrl/nvidia,tegra30-pinmux.txt b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra30-pinmux.txt index c275b70349c1..6f426ed7009e 100644 --- a/Documentation/devicetree/bindings/pinctrl/nvidia,tegra30-pinmux.txt +++ b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra30-pinmux.txt @@ -83,7 +83,7 @@ Valid values for pin and group names are: drive groups: These all support nvidia,pull-down-strength, nvidia,pull-up-strength, - nvidia,slew_rate-rising, nvidia,slew_rate-falling. Most but not all + nvidia,slew-rate-rising, nvidia,slew-rate-falling. Most but not all support nvidia,high-speed-mode, nvidia,schmitt, nvidia,low-power-mode. ao1, ao2, at1, at2, at3, at4, at5, cdev1, cdev2, cec, crt, csus, dap1, diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index a1793d670cd0..3844d21d6ca3 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -33,7 +33,7 @@ Table of Contents 2 Modifying System Parameters 3 Per-Process Parameters - 3.1 /proc/<pid>/oom_score_adj - Adjust the oom-killer + 3.1 /proc/<pid>/oom_adj & /proc/<pid>/oom_score_adj - Adjust the oom-killer score 3.2 /proc/<pid>/oom_score - Display current oom-killer score 3.3 /proc/<pid>/io - Display the IO accounting fields @@ -1320,10 +1320,10 @@ of the kernel. CHAPTER 3: PER-PROCESS PARAMETERS ------------------------------------------------------------------------------ -3.1 /proc/<pid>/oom_score_adj- Adjust the oom-killer score +3.1 /proc/<pid>/oom_adj & /proc/<pid>/oom_score_adj- Adjust the oom-killer score -------------------------------------------------------------------------------- -This file can be used to adjust the badness heuristic used to select which +These file can be used to adjust the badness heuristic used to select which process gets killed in out of memory conditions. The badness heuristic assigns a value to each candidate task ranging from 0 @@ -1361,6 +1361,12 @@ same system, cpuset, mempolicy, or memory controller resources to use at least equivalent to discounting 50% of the task's allowed memory from being considered as scoring against the task. +For backwards compatibility with previous kernels, /proc/<pid>/oom_adj may also +be used to tune the badness score. Its acceptable values range from -16 +(OOM_ADJUST_MIN) to +15 (OOM_ADJUST_MAX) and a special value of -17 +(OOM_DISABLE) to disable oom killing entirely for that task. Its value is +scaled linearly with /proc/<pid>/oom_score_adj. + The value of /proc/<pid>/oom_score_adj may be reduced no lower than the last value set by a CAP_SYS_RESOURCE process. To reduce the value any lower requires CAP_SYS_RESOURCE. @@ -1375,7 +1381,9 @@ minimal amount of work. ------------------------------------------------------------- This file can be used to check the current score used by the oom-killer is for -any given <pid>. +any given <pid>. Use it together with /proc/<pid>/oom_score_adj to tune which +process should be killed in an out-of-memory situation. + 3.3 /proc/<pid>/io - Display the IO accounting fields ------------------------------------------------------- diff --git a/Documentation/firmware_class/README b/Documentation/firmware_class/README index 7eceaff63f5f..815b711bcd85 100644 --- a/Documentation/firmware_class/README +++ b/Documentation/firmware_class/README @@ -18,32 +18,40 @@ High level behavior (mixed): ============================ - kernel(driver): calls request_firmware(&fw_entry, $FIRMWARE, device) - - userspace: + 1), kernel(driver): + - calls request_firmware(&fw_entry, $FIRMWARE, device) + - kernel searchs the fimware image with name $FIRMWARE directly + in the below search path of root filesystem: + "/lib/firmware/updates/" UTS_RELEASE, + "/lib/firmware/updates", + "/lib/firmware/" UTS_RELEASE, + "/lib/firmware" + - If found, goto 7), else goto 2) + + 2), userspace: - /sys/class/firmware/xxx/{loading,data} appear. - hotplug gets called with a firmware identifier in $FIRMWARE and the usual hotplug environment. - hotplug: echo 1 > /sys/class/firmware/xxx/loading - kernel: Discard any previous partial load. + 3), kernel: Discard any previous partial load. - userspace: + 4), userspace: - hotplug: cat appropriate_firmware_image > \ /sys/class/firmware/xxx/data - kernel: grows a buffer in PAGE_SIZE increments to hold the image as it + 5), kernel: grows a buffer in PAGE_SIZE increments to hold the image as it comes in. - userspace: + 6), userspace: - hotplug: echo 0 > /sys/class/firmware/xxx/loading - kernel: request_firmware() returns and the driver has the firmware + 7), kernel: request_firmware() returns and the driver has the firmware image in fw_entry->{data,size}. If something went wrong request_firmware() returns non-zero and fw_entry is set to NULL. - kernel(driver): Driver code calls release_firmware(fw_entry) releasing + 8), kernel(driver): Driver code calls release_firmware(fw_entry) releasing the firmware image and any related resource. High level behavior (driver code): diff --git a/Documentation/hwmon/fam15h_power b/Documentation/hwmon/fam15h_power index a92918e0bd69..80654813d04a 100644 --- a/Documentation/hwmon/fam15h_power +++ b/Documentation/hwmon/fam15h_power @@ -10,7 +10,7 @@ Supported chips: BIOS and Kernel Developer's Guide (BKDG) For AMD Family 15h Processors (not yet published) -Author: Andreas Herrmann <andreas.herrmann3@amd.com> +Author: Andreas Herrmann <herrmann.der.user@googlemail.com> Description ----------- diff --git a/Documentation/networking/netdev-features.txt b/Documentation/networking/netdev-features.txt index 4164f5c02e4b..f310edec8a77 100644 --- a/Documentation/networking/netdev-features.txt +++ b/Documentation/networking/netdev-features.txt @@ -164,4 +164,4 @@ read the CRC recorded by the NIC on receipt of the packet. This requests that the NIC receive all possible frames, including errored frames (such as bad FCS, etc). This can be helpful when sniffing a link with bad packets on it. Some NICs may receive more packets if also put into normal -PROMISC mdoe. +PROMISC mode. diff --git a/Documentation/networking/vxlan.txt b/Documentation/networking/vxlan.txt index 5b34b762d7d5..6d993510f091 100644 --- a/Documentation/networking/vxlan.txt +++ b/Documentation/networking/vxlan.txt @@ -32,7 +32,7 @@ no entry is in the forwarding table. # ip link delete vxlan0 3. Show vxlan info - # ip -d show vxlan0 + # ip -d link show vxlan0 It is possible to create, destroy and display the vxlan forwarding table using the new bridge command. @@ -41,7 +41,7 @@ forwarding table using the new bridge command. # bridge fdb add to 00:17:42:8a:b4:05 dst 192.19.0.2 dev vxlan0 2. Delete forwarding table entry - # bridge fdb delete 00:17:42:8a:b4:05 + # bridge fdb delete 00:17:42:8a:b4:05 dev vxlan0 3. Show forwarding table # bridge fdb show dev vxlan0 diff --git a/Documentation/zh_CN/IRQ.txt b/Documentation/zh_CN/IRQ.txt new file mode 100644 index 000000000000..956026d5cf82 --- /dev/null +++ b/Documentation/zh_CN/IRQ.txt @@ -0,0 +1,39 @@ +Chinese translated version of Documentation/IRQ.txt + +If you have any comment or update to the content, please contact the +original document maintainer directly. However, if you have a problem +communicating in English you can also ask the Chinese maintainer for +help. Contact the Chinese maintainer if this translation is outdated +or if there is a problem with the translation. + +Maintainer: Eric W. Biederman <ebiederman@xmission.com> +Chinese maintainer: Fu Wei <tekkamanninja@gmail.com> +--------------------------------------------------------------------- +Documentation/IRQ.txt 的中文翻译 + +如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文 +交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻 +译存在问题,请联系中文版维护者。 +英文版维护者: Eric W. Biederman <ebiederman@xmission.com> +中文版维护者: 傅炜 Fu Wei <tekkamanninja@gmail.com> +中文版翻译者: 傅炜 Fu Wei <tekkamanninja@gmail.com> +中文版校译者: 傅炜 Fu Wei <tekkamanninja@gmail.com> + + +以下为正文 +--------------------------------------------------------------------- +何为 IRQ? + +一个 IRQ 是来自某个设备的一个中断请求。目前,它们可以来自一个硬件引脚, +或来自一个数据包。多个设备可能连接到同个硬件引脚,从而共享一个 IRQ。 + +一个 IRQ 编号是用于告知硬件中断源的内核标识。通常情况下,这是一个 +全局 irq_desc 数组的索引,但是除了在 linux/interrupt.h 中的实现, +具体的细节是体系结构特定的。 + +一个 IRQ 编号是设备上某个可能的中断源的枚举。通常情况下,枚举的编号是 +该引脚在系统内中断控制器的所有输入引脚中的编号。对于 ISA 总线中的情况, +枚举的是在两个 i8259 中断控制器中 16 个输入引脚。 + +架构可以对 IRQ 编号指定额外的含义,在硬件涉及任何手工配置的情况下, +是被提倡的。ISA 的 IRQ 是一个分配这类额外含义的典型例子。 diff --git a/Documentation/zh_CN/arm64/booting.txt b/Documentation/zh_CN/arm64/booting.txt new file mode 100644 index 000000000000..28fa325b7461 --- /dev/null +++ b/Documentation/zh_CN/arm64/booting.txt @@ -0,0 +1,156 @@ +Chinese translated version of Documentation/arm64/booting.txt + +If you have any comment or update to the content, please contact the +original document maintainer directly. However, if you have a problem +communicating in English you can also ask the Chinese maintainer for +help. Contact the Chinese maintainer if this translation is outdated +or if there is a problem with the translation. + +Maintainer: Will Deacon <will.deacon@arm.com> +Chinese maintainer: Fu Wei <tekkamanninja@gmail.com> +--------------------------------------------------------------------- +Documentation/arm64/booting.txt 的中文翻译 + +如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文 +交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻 +译存在问题,请联系中文版维护者。 + +英文版维护者: Will Deacon <will.deacon@arm.com> +中文版维护者: 傅炜 Fu Wei <tekkamanninja@gmail.com> +中文版翻译者: 傅炜 Fu Wei <tekkamanninja@gmail.com> +中文版校译者: 傅炜 Fu Wei <tekkamanninja@gmail.com> + +以下为正文 +--------------------------------------------------------------------- + 启动 AArch64 Linux + ================== + +作者: Will Deacon <will.deacon@arm.com> +日期: 2012 年 09 月 07 日 + +本文档基于 Russell King 的 ARM 启动文档,且适用于所有公开发布的 +AArch64 Linux 内核代码。 + +AArch64 异常模型由多个异常级别(EL0 - EL3)组成,对于 EL0 和 EL1 +异常级有对应的安全和非安全模式。EL2 是系统管理级,且仅存在于 +非安全模式下。EL3 是最高特权级,且仅存在于安全模式下。 + +基于本文档的目的,我们将简单地使用‘引导装载程序’(‘boot loader’) +这个术语来定义在将控制权交给 Linux 内核前 CPU 上执行的所有软件。 +这可能包含安全监控和系统管理代码,或者它可能只是一些用于准备最小启动 +环境的指令。 + +基本上,引导装载程序(至少)应实现以下操作: + +1、设置和初始化 RAM +2、设置设备树数据 +3、解压内核映像 +4、调用内核映像 + + +1、设置和初始化 RAM +----------------- + +必要性: 强制 + +引导装载程序应该找到并初始化系统中所有内核用于保持系统变量数据的 RAM。 +这个操作的执行是设备依赖的。(它可能使用内部算法来自动定位和计算所有 +RAM,或可能使用对这个设备已知的 RAM 信息,还可能使用任何引导装载程序 +设计者想到的匹配方法。) + + +2、设置设备树数据 +--------------- + +必要性: 强制 + +设备树数据块(dtb)大小必须不大于 2 MB,且位于从内核映像起始算起第一个 +512MB 内的 2MB 边界上。这使得内核可以通过初始页表中的单个节描述符来 +映射此数据块。 + + +3、解压内核映像 +------------- + +必要性: 可选 + +AArch64 内核当前没有提供自解压代码,因此如果使用了压缩内核映像文件 +(比如 Image.gz),则需要通过引导装载程序(使用 gzip 等)来进行解压。 +若引导装载程序没有实现这个需求,就要使用非压缩内核映像文件。 + + +4、调用内核映像 +------------- + +必要性: 强制 + +已解压的内核映像包含一个 32 字节的头,内容如下: + + u32 magic = 0x14000008; /* 跳转到 stext, 小端 */ + u32 res0 = 0; /* 保留 */ + u64 text_offset; /* 映像装载偏移 */ + u64 res1 = 0; /* 保留 */ + u64 res2 = 0; /* 保留 */ + +映像必须位于系统 RAM 起始处的特定偏移(当前是 0x80000)。系统 RAM +的起始地址必须是以 2MB 对齐的。 + +在跳转入内核前,必须符合以下状态: + +- 停止所有 DMA 设备,这样内存数据就不会因为虚假网络包或磁盘数据而 + 被破坏。这可能可以节省你许多的调试时间。 + +- 主 CPU 通用寄存器设置 + x0 = 系统 RAM 中设备树数据块(dtb)的物理地址。 + x1 = 0 (保留,将来可能使用) + x2 = 0 (保留,将来可能使用) + x3 = 0 (保留,将来可能使用) + +- CPU 模式 + 所有形式的中断必须在 PSTATE.DAIF 中被屏蔽(Debug、SError、IRQ + 和 FIQ)。 + CPU 必须处于 EL2(推荐,可访问虚拟化扩展)或非安全 EL1 模式下。 + +- 高速缓存、MMU + MMU 必须关闭。 + 指令缓存开启或关闭都可以。 + 数据缓存必须关闭且无效。 + 外部高速缓存(如果存在)必须配置并禁用。 + +- 架构计时器 + CNTFRQ 必须设定为计时器的频率。 + 如果在 EL1 模式下进入内核,则 CNTHCTL_EL2 中的 EL1PCTEN (bit 0) + 必须置位。 + +- 一致性 + 通过内核启动的所有 CPU 在内核入口地址上必须处于相同的一致性域中。 + 这可能要根据具体实现来定义初始化过程,以使能每个CPU上对维护操作的 + 接收。 + +- 系统寄存器 + 在进入内核映像的异常级中,所有构架中可写的系统寄存器必须通过软件 + 在一个更高的异常级别下初始化,以防止在 未知 状态下运行。 + +引导装载程序必须在每个 CPU 处于以下状态时跳入内核入口: + +- 主 CPU 必须直接跳入内核映像的第一条指令。通过此 CPU 传递的设备树 + 数据块必须在每个 CPU 节点中包含以下内容: + + 1、‘enable-method’属性。目前,此字段支持的值仅为字符串“spin-table”。 + + 2、‘cpu-release-addr’标识一个 64-bit、初始化为零的内存位置。 + + 引导装载程序必须生成这些设备树属性,并在跳入内核入口之前将其插入 + 数据块。 + +- 任何辅助 CPU 必须在内存保留区(通过设备树中的 /memreserve/ 域传递 + 给内核)中自旋于内核之外,轮询它们的 cpu-release-addr 位置(必须 + 包含在保留区中)。可通过插入 wfe 指令来降低忙循环开销,而主 CPU 将 + 发出 sev 指令。当对 cpu-release-addr 所指位置的读取操作返回非零值 + 时,CPU 必须直接跳入此值所指向的地址。 + +- 辅助 CPU 通用寄存器设置 + x0 = 0 (保留,将来可能使用) + x1 = 0 (保留,将来可能使用) + x2 = 0 (保留,将来可能使用) + x3 = 0 (保留,将来可能使用) diff --git a/Documentation/zh_CN/arm64/memory.txt b/Documentation/zh_CN/arm64/memory.txt new file mode 100644 index 000000000000..83b519314706 --- /dev/null +++ b/Documentation/zh_CN/arm64/memory.txt @@ -0,0 +1,93 @@ +Chinese translated version of Documentation/arm64/memory.txt + +If you have any comment or update to the content, please contact the +original document maintainer directly. However, if you have a problem +communicating in English you can also ask the Chinese maintainer for +help. Contact the Chinese maintainer if this translation is outdated +or if there is a problem with the translation. + +Maintainer: Catalin Marinas <catalin.marinas@arm.com> +Chinese maintainer: Fu Wei <tekkamanninja@gmail.com> +--------------------------------------------------------------------- +Documentation/arm64/memory.txt 的中文翻译 + +如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文 +交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻 +译存在问题,请联系中文版维护者。 + +英文版维护者: Catalin Marinas <catalin.marinas@arm.com> +中文版维护者: 傅炜 Fu Wei <tekkamanninja@gmail.com> +中文版翻译者: 傅炜 Fu Wei <tekkamanninja@gmail.com> +中文版校译者: 傅炜 Fu Wei <tekkamanninja@gmail.com> + +以下为正文 +--------------------------------------------------------------------- + Linux 在 AArch64 中的内存布局 + =========================== + +作者: Catalin Marinas <catalin.marinas@arm.com> +日期: 2012 年 02 月 20 日 + +本文档描述 AArch64 Linux 内核所使用的虚拟内存布局。此构架可以实现 +页大小为 4KB 的 4 级转换表和页大小为 64KB 的 3 级转换表。 + +AArch64 Linux 使用页大小为 4KB 的 3 级转换表配置,对于用户和内核 +都有 39-bit (512GB) 的虚拟地址空间。对于页大小为 64KB的配置,仅 +使用 2 级转换表,但内存布局相同。 + +用户地址空间的 63:39 位为 0,而内核地址空间的相应位为 1。TTBRx 的 +选择由虚拟地址的 63 位给出。swapper_pg_dir 仅包含内核(全局)映射, +而用户 pgd 仅包含用户(非全局)映射。swapper_pgd_dir 地址被写入 +TTBR1 中,且从不写入 TTBR0。 + + +AArch64 Linux 内存布局: + +起始地址 结束地址 大小 用途 +----------------------------------------------------------------------- +0000000000000000 0000007fffffffff 512GB 用户空间 + +ffffff8000000000 ffffffbbfffcffff ~240GB vmalloc + +ffffffbbfffd0000 ffffffbcfffdffff 64KB [防护页] + +ffffffbbfffe0000 ffffffbcfffeffff 64KB PCI I/O 空间 + +ffffffbbffff0000 ffffffbcffffffff 64KB [防护页] + +ffffffbc00000000 ffffffbdffffffff 8GB vmemmap + +ffffffbe00000000 ffffffbffbffffff ~8GB [防护页,未来用于 vmmemap] + +ffffffbffc000000 ffffffbfffffffff 64MB 模块 + +ffffffc000000000 ffffffffffffffff 256GB 内存空间 + + +4KB 页大小的转换表查找: + ++--------+--------+--------+--------+--------+--------+--------+--------+ +|63 56|55 48|47 40|39 32|31 24|23 16|15 8|7 0| ++--------+--------+--------+--------+--------+--------+--------+--------+ + | | | | | | + | | | | | v + | | | | | [11:0] 页内偏移 + | | | | +-> [20:12] L3 索引 + | | | +-----------> [29:21] L2 索引 + | | +---------------------> [38:30] L1 索引 + | +-------------------------------> [47:39] L0 索引 (未使用) + +-------------------------------------------------> [63] TTBR0/1 + + +64KB 页大小的转换表查找: + ++--------+--------+--------+--------+--------+--------+--------+--------+ +|63 56|55 48|47 40|39 32|31 24|23 16|15 8|7 0| ++--------+--------+--------+--------+--------+--------+--------+--------+ + | | | | | + | | | | v + | | | | [15:0] 页内偏移 + | | | +----------> [28:16] L3 索引 + | | +--------------------------> [41:29] L2 索引 (仅使用 38:29 ) + | +-------------------------------> [47:42] L1 索引 (未使用) + +-------------------------------------------------> [63] TTBR0/1 |